• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Scala SharedSparkContext类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Scala中com.holdenkarau.spark.testing.SharedSparkContext的典型用法代码示例。如果您正苦于以下问题:Scala SharedSparkContext类的具体用法?Scala SharedSparkContext怎么用?Scala SharedSparkContext使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了SharedSparkContext类的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: ColumnsTest

//设置package包名称以及导入依赖的类
package com.drakeconsulting.big_data_maker

import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.{StructField, StringType, LongType, DoubleType}

class ColumnsTest extends FunSuite with SharedSparkContext {
  val numLoops = 100

  test("test StringConstant") {
    val s1 = new StringConstant("f1", "abc")
    assert("abc" === s1.getValue(1))
    assert(StructField("f1", StringType, false) == s1.getStructField)
  }

  test("test RandomLong") {
    val s1 = new RandomLong("f1", 666666L)
    for (x <- 1 to numLoops) {
      assert(s1.getValue(1) >= 0)
      assert(s1.getValue(1) <= 666666L)
    }
    assert(StructField("f1", LongType, false) == s1.getStructField)
  }

  test("test RandomDouble") {
    val s1 = new RandomDouble("f1", 666666.00)
    for (x <- 1 to numLoops) {
      assert(s1.getValue(1) >= 0)
      assert(s1.getValue(1) <= 666666.00)
    }
    assert(StructField("f1", DoubleType, false) == s1.getStructField)
  }

  test("test Categorical") {
    val list = List("a", "b", "c", "d")
    val s1 = new Categorical("f1", list)
    for (x <- 1 to numLoops) {
      val v = s1.getValue(1)
      assert(list.exists(key => v.contains(key)))
    }
    assert(StructField("f1", StringType, false) == s1.getStructField)
  }
} 
开发者ID:dondrake,项目名称:BigDataMaker,代码行数:45,代码来源:TestColumns.scala


示例2: BigDataMakerTest

//设置package包名称以及导入依赖的类
package com.drakeconsulting.big_data_maker

import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext

class BigDataMakerTest extends FunSuite with SharedSparkContext {
  test("first") {
    val sqlContext = new SQLContext(sc)
    val bd = new BigData(sqlContext, "/tmp/b", 5, 100)
    bd.addColumn(new StringConstant("f1", "abc"))
    bd.addColumn(new StringConstant("f2", "def"))

    val df = bd._createDataFrame
    df.show
    assert(500 === df.count)
    assert(2 === df.columns.length)
  }

  test("col names") {
    val sqlContext = new SQLContext(sc)
    val bd = new BigData(sqlContext, "/tmp/b", 5, 100)
    bd.addColumn(new StringConstant("f1", "abc"))
    bd.addColumn(new StringConstant("", "def"))

    assert("f1" === bd.cols(0).name)
    assert("f_1" === bd.cols(1).name)
  }
} 
开发者ID:dondrake,项目名称:BigDataMaker,代码行数:30,代码来源:TestBigDataMaker.scala


示例3: AppTest

//设置package包名称以及导入依赖的类
package com.github.dongjinleekr.spark

import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.{RDDComparisons, SharedSparkContext}

/*
* see: http://blog.cloudera.com/blog/2015/09/making-apache-spark-testing-easy-with-spark-testing-base/
* see: https://github.com/holdenk/spark-testing-base/wiki/SharedSparkContext
* */
class AppTest extends FunSuite with SharedSparkContext {
	test("test initializing spark context") {
		val list = List(1, 2, 3, 4)
		val rdd = sc.parallelize(list)

		assert(rdd.count === list.length)
	}
} 
开发者ID:dongjinleekr,项目名称:spark-sbt-quickstart,代码行数:18,代码来源:AppTest.scala


示例4: FunctionalSyntaxOWLExpressionsRDDBuilderTest

//设置package包名称以及导入依赖的类
package net.sansa_stack.owl.spark.rdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.FunSuite


class FunctionalSyntaxOWLExpressionsRDDBuilderTest extends FunSuite with SharedSparkContext {
  var _rdd: OWLExpressionsRDD = null

  def rdd = {
    if (_rdd == null) {
      _rdd = FunctionalSyntaxOWLExpressionsRDDBuilder.build(
        sc, "src/test/resources/ont_functional.owl")
      _rdd.cache()
    }

    _rdd
  }

  test("There should be three annotation lines with full URIs") {
    val res = rdd.filter(line => line.startsWith("Annotation(")).collect()
    val expected = List(
      "Annotation(<http://ex.com/foo#hasName> \"Name\")",
      "Annotation(<http://ex.com/bar#hasTitle> \"Title\")",
      """Annotation(<http://ex.com/default#description> "A longer
description running over
several lines")""")

    assert(res.length == 3)
    for (e <- expected) {
      assert(res.contains(e))
    }
  }

  
//  test("There should be an import statement") {
//    val res = rdd.filter(line => line.startsWith("Import")).collect()
//    assert(res.length == 1)
//    assert(res(0) == "Import(<http://www.example.com/my/2.0>)")
//  }

  test("There should not be any empty lines") {
    val res = rdd.filter(line => line.trim.isEmpty).collect()
    assert(res.length == 0)
  }

  test("There should not be any comment lines") {
    val res = rdd.filter(line => line.trim.startsWith("#")).collect()
    assert(res.length == 0)
  }

  test("There should be a DisjointObjectProperties axiom") {
    val res = rdd.filter(line => line.trim.startsWith("DisjointObjectProperties")).collect()
    assert(res.length == 1)
  }

  test("The total number of axioms should be correct") {
    val total = 70 // = 71 - uncommented Import(...)
    assert(rdd.count() == total)
  }
} 
开发者ID:SANSA-Stack,项目名称:SANSA-OWL,代码行数:62,代码来源:FunctionalSyntaxOWLExpressionsRDDBuilderTest.scala


示例5: FunctionalSyntaxOWLExpressionsDatasetBuilderTest

//设置package包名称以及导入依赖的类
package net.sansa_stack.owl.spark.dataset

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SparkSession
import org.scalatest.FunSuite


class FunctionalSyntaxOWLExpressionsDatasetBuilderTest extends FunSuite with SharedSparkContext {
  lazy val spark = SparkSession.builder().appName(sc.appName).master(sc.master).getOrCreate()
  var _dataset: OWLExpressionsDataset = null
  def dataset: OWLExpressionsDataset = {
    if (_dataset == null) {
      _dataset = FunctionalSyntaxOWLExpressionsDatasetBuilder.build(
        spark, "src/test/resources/ont_functional.owl")
      _dataset.cache()
    }
    _dataset
  }

  test("There should be three annotation lines with full URIs") {
    val res = dataset.filter(line => line.startsWith("Annotation(")).collectAsList()
    val expected = List(
      "Annotation(<http://ex.com/foo#hasName> \"Name\")",
      "Annotation(<http://ex.com/bar#hasTitle> \"Title\")",
      """Annotation(<http://ex.com/default#description> "A longer
description running over
several lines")""")
    assert(res.size() == 3)
    for (e <- expected) {
      assert(res.contains(e))
    }
  }

  
  //  test("There should be an import statement") {
  //    val res = rdd.filter(line => line.startsWith("Import")).collect()
  //    assert(res.length == 1)
  //    assert(res(0) == "Import(<http://www.example.com/my/2.0>)")
  //  }

  test("There should not be any empty lines") {
    val res = dataset.filter(line => line.trim.isEmpty)
    assert(res.count() == 0)
  }

  test("There should not be any comment lines") {
    val res = dataset.filter(line => line.trim.startsWith("#"))
    assert(res.count() == 0)
  }

  test("There should be a DisjointObjectProperties axiom") {
    val res = dataset.filter(line => line.trim.startsWith("DisjointObjectProperties"))
    assert(res.count() == 1)
  }

  test("The total number of axioms should be correct") {
    val total = 70 // = 71 - uncommented Import(...)
    assert(dataset.count() == total)
  }
} 
开发者ID:SANSA-Stack,项目名称:SANSA-OWL,代码行数:61,代码来源:FunctionalSyntaxOWLExpressionsDatasetBuilderTest.scala


示例6: JoinTest

//设置package包名称以及导入依赖的类
package com.highperformancespark.examples.goldilocks

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.rdd.RDD
import org.scalatest.FunSuite


class JoinTest extends FunSuite with SharedSparkContext {
  test("Hash join"){
    val keySet = "a, b, c, d, e, f, g".split(",")
    val smallRDD = sc.parallelize(keySet.map(letter => (letter, letter.hashCode)))
    val largeRDD: RDD[(String, Double)] =
      sc.parallelize(keySet.flatMap{ letter =>
        Range(1, 50).map(i => (letter, letter.hashCode() / i.toDouble))})
    val result: RDD[(String, (Double, Int))] =
      RDDJoinExamples.manualBroadCastHashJoin(
        largeRDD, smallRDD)
    val nativeJoin: RDD[(String, (Double, Int))] = largeRDD.join(smallRDD)

    assert(result.subtract(nativeJoin).count == 0)
  }
} 
开发者ID:gourimahapatra,项目名称:high-performance-spark,代码行数:23,代码来源:JoinTest.scala


示例7: WordCountTest

//设置package包名称以及导入依赖的类
package com.highperformancespark.examples.wordcount


import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.FunSuite

class WordCountTest extends FunSuite with SharedSparkContext {
  test("word count with Stop Words Removed"){
    val wordRDD = sc.parallelize(Seq(
      "How happy was the panda? You ask.",
      "Panda is the most happy panda in all the #$!?ing land!"))

    val stopWords: Set[String] = Set("a", "the", "in", "was", "there", "she", "he")
    val illegalTokens: Array[Char] = "#$%?!.".toCharArray

    val wordCounts = WordCount.withStopWordsFiltered(
      wordRDD, illegalTokens, stopWords)
    val wordCountsAsMap = wordCounts.collectAsMap()
    assert(!wordCountsAsMap.contains("the"))
    assert(!wordCountsAsMap.contains("?"))
    assert(!wordCountsAsMap.contains("#$!?ing"))
    assert(wordCountsAsMap.contains("ing"))
    assert(wordCountsAsMap.get("panda").get.equals(3))
  }
} 
开发者ID:gourimahapatra,项目名称:high-performance-spark,代码行数:26,代码来源:WordCountTest.scala


示例8: SparkCassRDDFunctionsSpec

//设置package包名称以及导入依赖的类
package com.github.jparkie.spark.cassandra.rdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.{ MustMatchers, WordSpec }

class SparkCassRDDFunctionsSpec extends WordSpec with MustMatchers with SharedSparkContext {
  "Package com.github.jparkie.spark.cassandra.rdd" must {
    "lift RDD into SparkCassRDDFunctions" in {
      val testRDD = sc.parallelize(1 to 25)
        .map(currentNumber => (currentNumber.toLong, s"Hello World: $currentNumber!"))

      // If internalSparkContext is available, RDD was lifted.
      testRDD.internalSparkContext
    }
  }
} 
开发者ID:jparkie,项目名称:Spark2Cassandra,代码行数:17,代码来源:SparkCassRDDFunctionsSpec.scala


示例9: SparkCassDataFrameFunctionsSpec

//设置package包名称以及导入依赖的类
package com.github.jparkie.spark.cassandra.sql

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext
import org.scalatest.{ MustMatchers, WordSpec }

class SparkCassDataFrameFunctionsSpec extends WordSpec with MustMatchers with SharedSparkContext {
  "Package com.github.jparkie.spark.cassandra.sql" must {
    "lift DataFrame into SparkCassDataFrameFunctions" in {
      val sqlContext = new SQLContext(sc)

      import sqlContext.implicits._

      val testRDD = sc.parallelize(1 to 25)
        .map(currentNumber => (currentNumber.toLong, s"Hello World: $currentNumber!"))
      val testDataFrame = testRDD.toDF("test_key", "test_value")

      // If internalSparkContext is available, RDD was lifted.
      testDataFrame.internalSparkContext
    }
  }
} 
开发者ID:jparkie,项目名称:Spark2Cassandra,代码行数:23,代码来源:SparkCassDataFrameFunctionsSpec.scala


示例10: TransformationTestWithSparkTestingBase

//设置package包名称以及导入依赖的类
package com.chapter16.SparkTesting

import org.scalatest.Assertions._
import org.apache.spark.rdd.RDD
import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.FunSuite

class TransformationTestWithSparkTestingBase extends FunSuite with SharedSparkContext {
  def tokenize(line: RDD[String]) = {
    line.map(x => x.split(' ')).collect()
  }

  test("works, obviously!") {
    assert(1 == 1)
  }

  test("Words counting") {
    assert(sc.parallelize("Hello world My name is Reza".split("\\W")).map(_ + 1).count == 6)
  }

  test("Testing RDD transformations using a shared Spark Context") {
    val input = List("Testing", "RDD transformations", "using a shared", "Spark Context")
    val expected = Array(Array("Testing"), Array("RDD", "transformations"), Array("using", "a", "shared"), Array("Spark", "Context"))
    val transformed = tokenize(sc.parallelize(input))
    assert(transformed === expected)
  }
} 
开发者ID:PacktPublishing,项目名称:Scala-and-Spark-for-Big-Data-Analytics,代码行数:28,代码来源:TransformationTestWithSparkTestingBase.scala


示例11: WordCountTest

//设置package包名称以及导入依赖的类
package $organization$.$name$



import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.FunSuite

class WordCountTest extends FunSuite with SharedSparkContext {
  test("word count with Stop Words Removed"){
    val linesRDD = sc.parallelize(Seq(
      "How happy was the panda? You ask.",
      "Panda is the most happy panda in all the#!?ing land!"))

    val stopWords: Set[String] = Set("a", "the", "in", "was", "there", "she", "he")
    val splitTokens: Array[Char] = "#%?!. ".toCharArray

    val wordCounts = WordCount.withStopWordsFiltered(
      linesRDD, splitTokens, stopWords)
    val wordCountsAsMap = wordCounts.collectAsMap()
    assert(!wordCountsAsMap.contains("the"))
    assert(!wordCountsAsMap.contains("?"))
    assert(!wordCountsAsMap.contains("#!?ing"))
    assert(wordCountsAsMap.contains("ing"))
    assert(wordCountsAsMap.get("panda").get.equals(3))
  }
} 
开发者ID:holdenk,项目名称:sparkProjectTemplate.g8,代码行数:27,代码来源:WordCountTest.scala


示例12: AppTest

//设置package包名称以及导入依赖的类
package com.github.dongjinleekr.spark.dataset

import org.scalatest.FunSuite
import com.holdenkarau.spark.testing.{RDDComparisons, SharedSparkContext}

/*
* see: http://blog.cloudera.com/blog/2015/09/making-apache-spark-testing-easy-with-spark-testing-base/
* see: https://github.com/holdenk/spark-testing-base/wiki/SharedSparkContext
* */
class AppTest extends FunSuite with SharedSparkContext {
	test("test initializing spark context") {
		val list = List(1, 2, 3, 4)
		val rdd = sc.parallelize(list)

		assert(rdd.count === list.length)
	}
} 
开发者ID:dongjinleekr,项目名称:spark-dataset,代码行数:18,代码来源:AppTest.scala


示例13: PackageSpec

//设置package包名称以及导入依赖的类
package com.github.jparkie.spark.elasticsearch.sql

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.SQLContext
import org.scalatest.{ MustMatchers, WordSpec }

class PackageSpec extends WordSpec with MustMatchers with SharedSparkContext {
  "Package com.github.jparkie.spark.elasticsearch.sql" must {
    "lift DataFrame into SparkEsDataFrameFunctions" in {

      val sqlContext = new SQLContext(sc)

      val inputData = Seq(
        ("TEST_VALUE_1", 1),
        ("TEST_VALUE_2", 2),
        ("TEST_VALUE_3", 3)
      )

      val outputDataFrame = sqlContext.createDataFrame(inputData)
        .toDF("key", "value")

      // If sparkContext is available, DataFrame was lifted into SparkEsDataFrameFunctions.
      outputDataFrame.sparkContext
    }
  }
} 
开发者ID:jparkie,项目名称:Spark2Elasticsearch,代码行数:27,代码来源:PackageSpec.scala


示例14: MapperSpec

//设置package包名称以及导入依赖的类
package com.github.log0ymxm.mapper

import org.scalatest._
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.{ SparkSession, Row }
import org.apache.spark.mllib.linalg.distributed.{ CoordinateMatrix, IndexedRow, IndexedRowMatrix, MatrixEntry }
import org.apache.spark.mllib.linalg.{ DenseVector, Vector, Vectors }

class MapperSpec extends FunSuite with SharedSparkContext {

  test("simple mapper on noisy circle") {
    val spark = SparkSession.builder().getOrCreate()

    val fileLoc = getClass.getClassLoader.getResource("circles.csv").getPath()
    val circle = spark.read
      .option("header", false)
      .option("inferSchema", true)
      .csv(fileLoc)

    assert(circle.count == 400)

    val indexedRDD = circle.rdd.zipWithIndex.map {
      case (Row(x: Double, y: Double), i) =>
        val v: Vector = new DenseVector(Array(x, y))
        IndexedRow(i, v)
    }
    val matrix = new IndexedRowMatrix(indexedRDD)
    val similarities = matrix.toCoordinateMatrix
      .transpose()
      .toIndexedRowMatrix()
      .columnSimilarities()
    val distances = new CoordinateMatrix(
      similarities
        .entries
        .map((entry) => new MatrixEntry(entry.i, entry.j, 1 - entry.value))
    )

    val filtration = new IndexedRowMatrix(indexedRDD.map({ row =>
      IndexedRow(row.index, new DenseVector(Array(
        Vectors.norm(row.vector, 2)
      )))
    }))

    //Mapper.writeAsJson(graph, "mapper-vis/circle-graph.json")
    val graph = Mapper.mapper(sc, distances, filtration, 100, 2.0)

    assert(graph.vertices.count == 160)
    assert(graph.edges.count == 327)
  }
} 
开发者ID:log0ymxm,项目名称:spark-mapper,代码行数:51,代码来源:MapperSpec.scala


示例15: CoverSpec

//设置package包名称以及导入依赖的类
package com.github.log0ymxm.mapper

import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.mllib.linalg.distributed.{ IndexedRow, IndexedRowMatrix }

import org.scalatest._
import com.holdenkarau.spark.testing.SharedSparkContext

class CoverSpec extends FunSuite with SharedSparkContext {
  test("cover") {
    val rdd = sc.parallelize((0 to 10).toSeq)
    val filtration = new IndexedRowMatrix(
      rdd.map({ x =>
        new IndexedRow(x, new DenseVector(Array(x * 2, scala.math.sin(x))))
      })
    )

    val cover = new Cover(filtration, 4, 0.5)

    assert(cover.numCoverSegments == 16)
    assert(cover.filterRanges(0) == NumericBoundary(0.0, 20.0))
    assert(cover.filterRanges(1).lower >= -1.0)
    assert(cover.filterRanges(1).upper <= 1.0)

    assert(cover.coverAssignment(new DenseVector(Array(8.33, 0.5))) == List(CoverSegmentKey(6), CoverSegmentKey(7)))

  }
} 
开发者ID:log0ymxm,项目名称:spark-mapper,代码行数:29,代码来源:CoverSpec.scala


示例16: SparkTestBase

//设置package包名称以及导入依赖的类
package comp.bio.aging.crispr

import com.holdenkarau.spark.testing.SharedSparkContext
import org.bdgenomics.formats.avro.{Contig, NucleotideContigFragment}
import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpec}


class SparkTestBase extends WordSpec with Matchers with BeforeAndAfterAll with SharedSparkContext{

  def sparkContext = sc

  def contig() = {
    val c= new Contig()
    c.setContigName("test")
    c
  }

  protected def makeFragment(str: String, start: Long) = {
    NucleotideContigFragment.newBuilder()
      .setContig(contig())
      .setFragmentStartPosition(start)
      .setFragmentLength(str.length: Long)
      .setFragmentSequence(str)
      .setFragmentEndPosition(start + str.length)
      .build()
  }

  def dnas2fragments(dnas: Seq[String]): List[NucleotideContigFragment] = {
    val (_, frags) = dnas.foldLeft((0L, List.empty[NucleotideContigFragment]))
    {
      case ((start, acc), str) => (start + str.length, makeFragment(str, start)::acc)
    }
    frags.reverse
  }
} 
开发者ID:antonkulaga,项目名称:CRISPR,代码行数:36,代码来源:SparkTestBase.scala


示例17: beforeAll

//设置package包名称以及导入依赖的类
import com.holdenkarau.spark.testing.{LocalSparkContext, SharedSparkContext}
import org.apache.spark.SparkConf
import org.scalatest.{BeforeAndAfterAll, Suite}


trait SharedConfig extends BeforeAndAfterAll with SharedSparkContext with HiveCleanup {
  self: Suite =>

  override val conf = new SparkConf()
    .setMaster("local[*]")
    .setAppName("testApp")
    .set("spark.ui.enabled", "false")
    .set("spark.app.id", appID)
    .set("spark.driver.memory", "2G")
    .set("spark.executor.memory", "2G")
    .set("spark.scheduler.minRegisteredResourcesRatio", "1")

  override def beforeAll(): Unit = {
    super.beforeAll()
//    System.setProperty("spark.testing", "true")
  }

  override def afterAll(): Unit = {
    try {
      LocalSparkContext.stop(sc)
    } finally {
      super.afterAll()
    }
  }
} 
开发者ID:lordlinus,项目名称:sample-spark-template,代码行数:31,代码来源:SharedConfig.scala


示例18: IndexReadRDDTest

//设置package包名称以及导入依赖的类
package top.myetl.lucenerdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
import top.myetl.lucenerdd.rdd.IndexReadRDD
import top.myetl.lucenerdd.util.{Constants, LuceneRDDKryoRegistrator}


class IndexReadRDDTest extends FlatSpec
  with Matchers
  with BeforeAndAfterEach
  with SharedSparkContext {


  override def beforeAll(): Unit = {
    conf.set(Constants.HdfsBaseDirKey, "hdfs://ubuntu:9000/sparklu/")
    conf.setAppName("test2app")
    LuceneRDDKryoRegistrator.registerKryoClasses(conf)
    super.beforeAll()
  }

  "IndexReadRDD partitions " should "test new IndexReadRDD" in{
    val rdd = new IndexReadRDD(sc, "w1")
    rdd.cache()
    println(rdd.count())
    println(rdd.count())
  }

} 
开发者ID:myetl,项目名称:sparkLu,代码行数:30,代码来源:IndexReadRDDTest.scala


示例19: LuceneRDDQueryTest

//设置package包名称以及导入依赖的类
package top.myetl.lucenerdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.lucene.document.Document
import org.apache.lucene.search.ScoreDoc
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}
import top.myetl.lucenerdd.convert.DocToBean
import top.myetl.lucenerdd.query.MyQuery
import top.myetl.lucenerdd.query.MyQuery.term
import top.myetl.lucenerdd.rdd.{IndexReadRDD, LuceneRDD}
import top.myetl.lucenerdd.util.{Constants, LuceneRDDKryoRegistrator}


class LuceneRDDQueryTest extends FlatSpec
  with Matchers
  with BeforeAndAfterEach
  with SharedSparkContext {


  override def beforeAll(): Unit = {
    conf.set(Constants.HdfsBaseDirKey, "hdfs://ubuntu:9000/sparklu/")
    conf.setAppName("test2app")
    LuceneRDDKryoRegistrator.registerKryoClasses(conf)
    super.beforeAll()
  }

  val convert = new DocToBean[String] {
    override def toBean(score: ScoreDoc, doc: Document): String = score.doc.toString+" -> "+doc.get("name")
  }

  "Query by step" should "query by step" in{
    val rdd = new IndexReadRDD(sc, "w1")
    rdd.cache()
    println(rdd.count())
    println(rdd.count())

    val newRDD = new LuceneRDD[String](rdd)(convert)

    newRDD.query(term("name", "Person2")).take(8).foreach(println(_))

  }

  "Simple" should "simple api for query" in{
    val rdd: LuceneRDD[String] = sc.luceneRDD("w1")(convert)
    println(rdd.count())

    rdd.query(MyQuery.matchAll).take(8).foreach(println(_))
  }

} 
开发者ID:myetl,项目名称:sparkLu,代码行数:51,代码来源:LuceneRDDQueryTest.scala



注:本文中的com.holdenkarau.spark.testing.SharedSparkContext类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Scala Drawable类代码示例发布时间:2022-05-23
下一篇:
Scala SharedPreferences类代码示例发布时间:2022-05-23
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap