• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Scala Seconds类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Scala中org.apache.spark.streaming.Seconds的典型用法代码示例。如果您正苦于以下问题:Scala Seconds类的具体用法?Scala Seconds怎么用?Scala Seconds使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Seconds类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: KMeansClusteringApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.StreamingKMeans
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import org.apache.spark.rdd.RDD.doubleRDDToDoubleRDDFunctions
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext

object KMeansClusteringApp {

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: KMeansClusteringApp <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val substream = ssc.socketTextStream(hostname, port.toInt)
      .filter(!_.contains("NaN"))
      .map(_.split(" "))
      .filter(f => f(1) != "0")

    val orientationStream = substream
      .map(f => Seq(1, 4, 5, 6, 10, 11, 12, 20, 21, 22, 26, 27, 28, 36, 37, 38, 42, 43, 44).map(i => f(i)).toArray)
      .map(arr => arr.map(_.toDouble))
      .filter(f => f(0) == 1.0 || f(0) == 2.0 || f(0) == 3.0)
      .map(f => LabeledPoint(f(0), Vectors.dense(f.slice(1, f.length))))
    val test = orientationStream.transform(rdd => rdd.randomSplit(Array(0.3, 0.7))(0))
    val train = orientationStream.transformWith(test, (r1: RDD[LabeledPoint], r2: RDD[LabeledPoint]) => r1.subtract(r2)).cache()
    val model = new StreamingKMeans()
      .setK(3)
      .setDecayFactor(0)
      .setRandomCenters(18, 0.0)

    model.trainOn(train.map(v => v.features))
    val prediction = model.predictOnValues(test.map(v => (v.label, v.features)))

    ssc.start()
    ssc.awaitTermination()
  }

} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:54,代码来源:L9-10KMeans.scala


示例2: Consumer

//设置package包名称以及导入依赖的类
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.mllib.classification.SVMModel
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.sql.SparkSession

object Consumer {

  def main(args: Array[String]): Unit = {

    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "localhost:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "use_a_separate_group_id_for_each_stream",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    val topics = Array("streaming")

    val sparkConf = new SparkConf().setMaster("local[8]").setAppName("KafkaTest")
    val streamingContext = new StreamingContext(sparkConf, Seconds(1))
    // Create a input direct stream
    val kafkaStream = KafkaUtils.createDirectStream[String, String](
      streamingContext,
      PreferConsistent,
      Subscribe[String, String](topics, kafkaParams)
    )

    val sc = SparkSession.builder().master("local[8]").appName("KafkaTest").getOrCreate()
    val model = SVMModel.load(sc.sparkContext, "/home/xiaoyu/model")
    val result = kafkaStream.map(record => (record.key, record.value))
    result.foreachRDD(
      patient => {
        patient.collect().toBuffer.foreach(
          (x: (Any, String)) => {
            val features = x._2.split(',').map(x => x.toDouble).tail
            println(model.predict(Vectors.dense(features)))

          }
        )
      }
    )

    streamingContext.start()
    streamingContext.awaitTermination()

  }
} 
开发者ID:XiaoyuGuo,项目名称:DataFusionClass,代码行数:55,代码来源:Consumer.scala


示例3: StatefulWordcount

//设置package包名称以及导入依赖的类
package com.test.spark

import org.apache.spark.SparkConf
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.kafka010.ConsumerStrategies
import org.apache.spark.streaming.kafka010.LocationStrategies
import org.apache.spark.streaming.kafka010.KafkaUtils

object StatefulWordcount extends App {
  val conf = new SparkConf().setAppName("Stateful Wordcount").setMaster("local[2]")
  val ssc = new StreamingContext(conf, Seconds(10))
  val kafkaParams = Map[String, String]("bootstrap.servers" -> "localhost:9092", "key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer", "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer", "group.id" -> "mygroup", "auto.offset.reset" -> "earliest")
  val topics = Set("widas")
  val inputKafkaStream = KafkaUtils.createDirectStream(ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, kafkaParams))
  val words = inputKafkaStream.transform { rdd =>
    rdd.flatMap(record => (record.value().toString.split(" ")))
  }
  val wordpairs = words.map(word => (word, 1))
  ssc.checkpoint("/Users/nagainelu/bigdata/jobs/WordCount_checkpoint")
  val updateFunc = (values: Seq[Int], state: Option[Int]) => {
    val currentCount = values.foldLeft(0)(_ + _)
    val previousCount = state.getOrElse(0)
    Some(currentCount + previousCount)
  }
  val wordCounts = wordpairs.reduceByKey(_ + _).updateStateByKey(updateFunc)
  wordCounts.print()
  ssc.start()
  ssc.awaitTermination()
} 
开发者ID:malli3131,项目名称:SparkApps,代码行数:31,代码来源:StatefulWordcount.scala


示例4: T01

//设置package包名称以及导入依赖的类
package streaming

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}


object T01 {
  def main(args: Array[String]) {
    val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
    val ssc = new StreamingContext(conf, Seconds(1))
    val lines = ssc.socketTextStream("localhost", 9999)
    val words = lines.flatMap(_.split(" "))
    // not necessary since Spark 1.3
    // Count each word in each batch
    val pairs = words.map(word => (word, 1))
    val wordCounts = pairs.reduceByKey(_ + _)
    // Print the first ten elements of each RDD generated in this DStream to the console
    wordCounts.print()
    ssc.start() // Start the computation
    ssc.awaitTermination() // Wait for the computation to terminate

    //    nc -lk 9999
    //      ./bin/run-example streaming.T01 localhost 9999
  }

} 
开发者ID:IMJIU,项目名称:Spark1.6,代码行数:27,代码来源:T01.scala


示例5: Predict

//设置package包名称以及导入依赖的类
package com.databricks.apps.twitterClassifier

import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.KMeansModel
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.streaming.twitter._
import org.apache.spark.streaming.{Seconds, StreamingContext}

object Predict extends App {
  import SparkSetup._

  val options = PredictOptions.parse(args)
  val ssc = new StreamingContext(sc, Seconds(options.intervalInSecs))
  Predictor.doIt(options, sc, ssc)
}


object Predictor {
  def doIt(options: PredictOptions, sc: SparkContext, ssc: StreamingContext) {
    println("Initializing the the KMeans model...")
    val model: KMeansModel = new KMeansModel(sc.objectFile[Vector](options.modelDirectory.getCanonicalPath).collect)

    println("Materializing Twitter stream...")
    TwitterUtils.createStream(ssc, maybeTwitterAuth)
      .map(_.getText)
      .foreachRDD { rdd =>
        rdd.filter(t => model.predict(featurize(t)) == options.clusterNumber)
           .foreach(print)  // register DStream as an output stream and materialize it
      }
    println("Initialization complete, starting streaming computation.")
    ssc.start()
    ssc.awaitTermination()
  }
} 
开发者ID:krish121,项目名称:Spark-reference-applications,代码行数:35,代码来源:Predict.scala


示例6: StreamingWordCount

//设置package包名称以及导入依赖的类
package org.examples.scala.examples

import org.apache.spark.streaming.{Seconds, StreamingContext}
import StreamingContext._
import org.apache.spark._
import org.apache.spark.SparkContext._


object StreamingWordCount {
  def run(args: Array[String]) {
    if (args.length < 2) {
      System.err.println("Usage BasicStreamingExample <master> <output>")
    }
    val Array(master, output) = args.take(2)

    val conf = new SparkConf().setMaster(master).setAppName("BasicStreamingExample")
    val ssc = new StreamingContext(conf, Seconds(30))

    val lines = ssc.socketTextStream("localhost" , 7777)
    val words = lines.flatMap(_.split(" "))
    val wc = words.map(x => (x, 1)).reduceByKey((x, y) => x + y)

    wc.saveAsTextFiles(output)
    wc.print

    println("pandas: sscstart")
    ssc.start()
    println("pandas: awaittermination")
    ssc.awaitTermination()
    println("pandas: done!")
  }
} 
开发者ID:jjmleiro,项目名称:learning-spark,代码行数:33,代码来源:StreamingWordCount.scala


示例7: KafkaStreaming

//设置package包名称以及导入依赖的类
package org.myorganization.spark.streaming


import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{StreamingContext, Seconds}
import org.apache.spark.streaming.kafka._
import kafka.serializer.StringDecoder



object KafkaStreaming {
    def main(args: Array[String]): Unit = {
        val (batchDuration, topics, bootstrapServers) = getParams(args)

        val conf = new SparkConf().setAppName("gpKafkaStreaming")
        val sc   = new SparkContext(conf)
        val ssc  = new StreamingContext(sc, Seconds(batchDuration))

        val topicsSet   = topics.split(",").toSet
        val kafkaParams = Map[String, String]("bootstrap.servers" -> bootstrapServers, "auto.offset.reset" -> "smallest")
        val messages    = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet)

        val data                 = messages.map(_._2)
        val loggerSerializerLogs = data.map(_.split("""\s+"""))
                                       .filter(x => x.length > 6)
                                       .map(x => (x(0), x(6)))
                                       .filter(filterLogLines)
                                       .map(x => x._1)
        val logCounts            = loggerSerializerLogs.map(x => (x, 1L)).reduceByKey(_ + _)
        logCounts.print(10)

        ssc.start()
        ssc.awaitTermination()
    }


    def filterLogLines(line: Tuple2[String, String]): Boolean = {
        val pattern = """logger.+"""
        line._2.matches(pattern)
    }


    def getParams(args: Array[String]): Tuple3[Int, String, String] = {
        if (args.length !=3 ) {
            System.err.println(s"""
                |Usage: spark-kafka.sh <sampling-period> <topics> <bootstrap-servers>
                |  <sampling-period>    is the duration of each batch (in seconds)
                |  <topics>             is a list of one or more kafka topics to consume from
                |  <bootstrap-servers>  is a list of one or more Kafka bootstrap servers
                |
                """.stripMargin)
            System.exit(1)
        }
        Tuple3[Int, String, String](args(0).toInt, args(1), args(2))
    }
} 
开发者ID:gpapag,项目名称:spark-streaming-kafka,代码行数:57,代码来源:KafkaStreaming.scala


示例8: SimpleApp

//设置package包名称以及导入依赖的类
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}

object SimpleApp {
      def main(args: Array[String]) : Unit = {
        val conf = new SparkConf().setAppName("Sinmple Application").set("spark.driver.allowMultipleContexts", "true")
        val sc = new SparkContext(conf)
        val ssc = new StreamingContext(conf, Seconds(5))
        val lines = ssc.socketTextStream("localhost", 9999)
        val words = lines.flatMap(_.toLowerCase.split(" "))
        words.foreachRDD { rdd =>
          val sqlContext = SQLContext.getOrCreate(rdd.sparkContext)
          import sqlContext.implicits._
          val wordsDataFrame = rdd.toDF("words")
          wordsDataFrame.registerTempTable("allwords")
          val wcdf = sqlContext.sql("select words,count(*) as total from allwords group by words")
          wcdf.show()
          import org.elasticsearch.spark.sql._
          wcdf.saveToEs("wordcount/wc")
        }
        ssc.start()             // Start the computation
        ssc.awaitTermination()  // Wait for the computation to terminate
      }
}

 
开发者ID:mykumar,项目名称:SparkScalaInternalExperiements,代码行数:26,代码来源:SimpleApp.scala


示例9: Main

//设置package包名称以及导入依赖的类
import Fqueue.{FqueueReceiver, FqueueSender}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}


object Main {
  private def sendData() = {
    val fqueuSender = new FqueueSender("localhost:18740", 4, 4000)
    fqueuSender.connect()
    while (true) {
      val ret = fqueuSender.enQueue("track_BOdao2015*", "123")
      Thread.sleep(1000)
    }

    fqueuSender.stop()
  }

  private def getData() = {
    val fqueueReceiver = new FqueueReceiver("localhost:18740", 4, 4000)
    fqueueReceiver.connect()
    val data = fqueueReceiver.deQueue("track_BOdao2015*")
    println(data.getOrElse("null"))
    fqueueReceiver.stop()
  }

  def main(args: Array[String]) {
    new Thread("fqueue sender") {
      override def run() { sendData() }
    }.start()
    val config = new SparkConf().setAppName("testfqueue").setMaster("local[2]")
    val ssc = new StreamingContext(config, Seconds(5))
    val lines = ssc.receiverStream(new FqueueStreamingReceiver("localhost:18740", 4, 4000))
    lines.print()
    ssc.start()
    ssc.awaitTermination()
  }
} 
开发者ID:TopSpoofer,项目名称:FqueueStreamingReceiver,代码行数:38,代码来源:Main.scala


示例10: SparkStreamKinesis

//设置package包名称以及导入依赖的类
import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream.LATEST
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK_2
import org.apache.spark.streaming.kinesis._
import org.apache.spark.streaming.{Duration, Seconds, StreamingContext}


object SparkStreamKinesis{
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("Spark Kinesis").setMaster("local[4]")
    val ssc = new StreamingContext(conf, Seconds(1))

    println("Spark Streaming")

    

    val kinesisStream = KinesisUtils.createStream(ssc, "sparrow-ci",
      "sparrow-ci",
      "kinesis.us-east-1.amazonaws.com",
      "us-east-1",
      LATEST,
      Duration(2000),
      MEMORY_AND_DISK_2)

    kinesisStream.print()

    kinesisStream.flatMap(new String(_))
      .foreachRDD(_.collect().foreach(print))

    ssc.start()
    ssc.awaitTermination()
  }
} 
开发者ID:noppanit,项目名称:spark-streaming-kinesis-example,代码行数:34,代码来源:main.scala


示例11: LogAnalyzerWindowed

//设置package包名称以及导入依赖的类
package com.databricks.apps.logs

import scala.math.Ordering

import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.dstream.DStream


class LogAnalyzerWindowed(val windowLength: Long, val slideInterval: Long) extends AnalyzeFunctions with Serializable {

  import LogStatistics.EMPTY_LOG_STATISTICS

  var logStatistics = EMPTY_LOG_STATISTICS

  def processAccessLogs(accessLogsDStream: DStream[ApacheAccessLog]): Unit = {
    val windowDStream: DStream[ApacheAccessLog] = accessLogsDStream
      .window(Seconds(windowLength), Seconds(slideInterval))
    windowDStream.foreachRDD(accessLogs => {
      if (accessLogs.count() == 0) {
        logStatistics = EMPTY_LOG_STATISTICS
      } else {
        logStatistics = LogStatistics(contentSizeStats(accessLogs).get,
          responseCodeCount(accessLogs).take(100).toMap,
          filterIPAddress(ipAddressCount(accessLogs)).take(100),
          endpointCount(accessLogs).top(10)(Ordering.by[(String, Long), Long](_._2)).toMap)
      }
    })
  }

  def getLogStatistics: LogStatistics = logStatistics
} 
开发者ID:krish121,项目名称:Spark-reference-applications,代码行数:32,代码来源:LogAnalyzerWindowed.scala


示例12: LogAnalyzerStreamingImportDirectory

//设置package包名称以及导入依赖的类
package com.databricks.apps.logs.chapter2

import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext}

import com.databricks.apps.logs.{ApacheAccessLog, LogAnalyzerRDD}


object LogAnalyzerStreamingImportDirectory extends App {
  val WINDOW_LENGTH = Seconds(30)
  val SLIDE_INTERVAL = Seconds(10)

  val spark = SparkSession
    .builder()
    .appName("Log Analyzer Import Streaming HDFS")
    .getOrCreate()
  val streamingContext = new StreamingContext(spark.sparkContext, SLIDE_INTERVAL)

  val directory = args(0)

  // This method monitors a directory for new files to read in for streaming.
  val logData: DStream[String] = streamingContext.textFileStream(directory)

  val accessLogsDStream: DStream[ApacheAccessLog] = logData.map(ApacheAccessLog.parseLogLine)
  val windowDStream: DStream[ApacheAccessLog] = accessLogsDStream.window(WINDOW_LENGTH, SLIDE_INTERVAL)

  val logAnalyzerRDD = LogAnalyzerRDD(spark)
  windowDStream.foreachRDD(accessLogs => {
    if (accessLogs.count() == 0) {
      println("No access logs received in this time interval")
    } else {
      val logStatistics = logAnalyzerRDD.processRdd(accessLogs)
      logStatistics.printToStandardOut()
    }
  })

  // Start the streaming server.
  streamingContext.start() // Start the computation
  streamingContext.awaitTermination() // Wait for the computation to terminate
} 
开发者ID:krish121,项目名称:Spark-reference-applications,代码行数:42,代码来源:LogAnalyzerStreamingImportDirectory.scala


示例13: DashboardController

//设置package包名称以及导入依赖的类
package controllers

import javax.inject.{Inject, Singleton}

import akka.actor.ActorSystem
import akka.stream.Materializer
import org.apache.spark.streaming.Seconds
import play.api.Configuration
import play.api.inject.ApplicationLifecycle
import play.api.libs.streams.ActorFlow
import play.api.mvc._
import services.{SparkService, StreamActor}

@Singleton()
class DashboardController @Inject() ()(implicit system: ActorSystem, materializer: Materializer, configuration : Configuration, lifecycle: ApplicationLifecycle) extends Controller {

  val sparkService = SparkService.getInstance(configuration, lifecycle)

  def index = Action {
    Ok(views.html.index())
  }

  def stream = WebSocket.accept[String, String] { request =>
    val filters = Seq("Euro")
    val tweets = sparkService.getTwitterStream(filters)
    val stream = tweets
      //      .map { tweet =>
      //        tweet.getText
      //      }
      //      .flatMap { text => text.split("\\s") }
      //      .filter(_.startsWith("#"))
      .map { tweet =>
      Option(tweet.getPlace).map(_.getCountry)
    }
      .filter(_.isDefined)
      .map(_.get)
      .map { element => (element, 1)}
      .reduceByKeyAndWindow(_ + _, Seconds(60))
      .transform( count => count.sortBy(_._2, false))

    ActorFlow.actorRef(out => StreamActor.props(out, stream))
  }

} 
开发者ID:OpenCompare,项目名称:live-pcm,代码行数:45,代码来源:DashboardController.scala


示例14: EnrichmentInAStream

//设置package包名称以及导入依赖的类
package com.malaska.spark.training.streaming.dstream

import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}

object EnrichmentInAStream {
  def main(args:Array[String]): Unit = {
    def main(args:Array[String]): Unit = {
      val host = args(0)
      val port = args(1)
      val checkpointFolder = args(2)

      val isLocal = true

      val sparkSession = if (isLocal) {
        SparkSession.builder
          .master("local")
          .appName("my-spark-app")
          .config("spark.some.config.option", "config-value")
          .config("spark.driver.host","127.0.0.1")
          .config("spark.sql.parquet.compression.codec", "gzip")
          .enableHiveSupport()
          .getOrCreate()
      } else {
        SparkSession.builder
          .appName("my-spark-app")
          .config("spark.some.config.option", "config-value")
          .enableHiveSupport()
          .getOrCreate()
      }

      val ssc = new StreamingContext(sparkSession.sparkContext.getConf, Seconds(1))
      ssc.checkpoint(checkpointFolder)

      val lines = ssc.socketTextStream(host, port.toInt)
      val words = lines.flatMap(_.split(" "))
      words.foreachRDD(rdd => rdd.foreachPartition(wordIt => {
        //make connection to storage layer
        // May use static connection
        wordIt.foreach(word => {
          word.toUpperCase
          //write to storage location
        })

      }))

      ssc.start()
      ssc.awaitTermination()


    }
  }
} 
开发者ID:TedBear42,项目名称:spark_training,代码行数:54,代码来源:EnrichmentInAStream.scala


示例15: CountingInAStreamExpBatchCounting

//设置package包名称以及导入依赖的类
package com.malaska.spark.training.streaming.dstream

import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}

object CountingInAStreamExpBatchCounting {
  Logger.getLogger("org").setLevel(Level.OFF)
  Logger.getLogger("akka").setLevel(Level.OFF)

  def main(args:Array[String]): Unit = {
    val host = args(0)
    val port = args(1)
    val checkpointFolder = args(2)

    val isLocal = true

    val sparkSession = if (isLocal) {
      SparkSession.builder
        .master("local")
        .appName("my-spark-app")
        .config("spark.some.config.option", "config-value")
        .config("spark.driver.host","127.0.0.1")
        .config("spark.sql.parquet.compression.codec", "gzip")
        .enableHiveSupport()
        .master("local[3]")
        .getOrCreate()
    } else {
      SparkSession.builder
        .appName("my-spark-app")
        .config("spark.some.config.option", "config-value")
        .enableHiveSupport()
        .getOrCreate()
    }

    val ssc = new StreamingContext(sparkSession.sparkContext, Seconds(2))
    ssc.checkpoint(checkpointFolder)

    val lines = ssc.socketTextStream(host, port.toInt)
    val words = lines.flatMap(line => line.toLowerCase.split(" "))
    val wordCounts = words.map(word => (word, 1))
      .reduceByKey((a,b) => a + b)

    wordCounts.foreachRDD(rdd => {
      println("{")
      val localCollection = rdd.collect()
      println("  size:" + localCollection.length)
      localCollection.foreach(r => println("  " + r))
      println("}")
    })

    ssc.start()

    ssc.awaitTermination()


  }
} 
开发者ID:TedBear42,项目名称:spark_training,代码行数:59,代码来源:CountingInAStreamExpBatchCounting.scala


示例16: StatefulWordCount

//设置package包名称以及导入依赖的类
package com.github.melentye.spark.streaming

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}

object StatefulWordCount {

  private val checkpointDirectory = "spark-checkpoints"

  def main(args: Array[String]): Unit = withStreamingContext { ssc =>
    val lines = ssc.socketTextStream("localhost", 9999)
    val words = lines.flatMap(_.split(" "))
    val pairs = words.map(word => (word, 1))

    def updateFunction(newValues: Seq[Int], runningCount: Option[Int]): Option[Int] = {
      val newCount = runningCount.getOrElse(0) + newValues.sum
      Some(newCount)
    }

    val runningCounts = pairs.updateStateByKey[Int](updateFunction _)
    runningCounts.print()
  }

  def withStreamingContext(f: StreamingContext => Unit): Unit = {
    def createContext(): StreamingContext = {
      val conf = new SparkConf()
        .setMaster("local[*]")
        .setAppName("StatefulWordCount")
      val ssc = new StreamingContext(conf, Seconds(1))
      f(ssc)
      ssc.checkpoint(checkpointDirectory)
      ssc
    }
    val ssc = StreamingContext.getOrCreate(checkpointDirectory, createContext _)
    ssc.start()
    ssc.awaitTermination()
  }
} 
开发者ID:melentye,项目名称:spark-playground,代码行数:39,代码来源:StatefulWordCount.scala


示例17: RedditDemoSpark

//设置package包名称以及导入依赖的类
import com.github.catalystcode.fortis.spark.streaming.reddit.{RedditAuth, RedditUtils}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}

class RedditDemoSpark(auth: RedditAuth) {
  def run(): Unit = {
    // set up the spark context and streams
    val conf = new SparkConf().setAppName("Reddit Application").setIfMissing("spark.master", "local[*]")
    val sc = new SparkContext(conf)
    val ssc = new StreamingContext(sc, Seconds(1))
    val keywordSet = List("healthcare")

    RedditUtils.createPageStream(auth, keywordSet, ssc, pollingPeriodInSeconds=10).map(x => s"Post: ${x}").print()

    // run forever
    ssc.start()
    ssc.awaitTermination()
  }

} 
开发者ID:CatalystCode,项目名称:streaming-reddit,代码行数:21,代码来源:RedditDemoSpark.scala


示例18: StreamingSimpleModel

//设置package包名称以及导入依赖的类
package com.bigchange.streaming

import breeze.linalg.DenseVector
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.{LabeledPoint, StreamingLinearRegressionWithSGD}
import org.apache.spark.streaming.{Seconds, StreamingContext}


object StreamingSimpleModel {

  def main(args: Array[String]) {

    val ssc = new StreamingContext("local","test",Seconds(10))
    val stream = ssc.socketTextStream("localhost",9999)
    val numberFeatures = 100
    val zeroVector = DenseVector.zeros[Double](numberFeatures)
    val model = new StreamingLinearRegressionWithSGD()
      .setInitialWeights(Vectors.dense(zeroVector.data))
      .setNumIterations(1)
      .setStepSize(0.01)


    val labeledStream = stream.map { event =>
      val split = event.split("\t")
      val y = split(0).toDouble
      val features = split(1).split(",").map(_.toDouble)
      LabeledPoint(label = y, features = Vectors.dense(features))
    }

    model.trainOn(labeledStream)
    // ??DStream?????
    val predictAndTrue = labeledStream.transform { rdd =>
     val latestModel = model.latestModel()
      rdd.map { point =>
        val predict = latestModel.predict(point.features)
        predict - point.label
      }
    }
    // ??MSE
    predictAndTrue.foreachRDD { rdd =>
      val  mse = rdd.map(x => x * x).mean()
      val rmse = math.sqrt(mse)
      println(s"current batch, MSE: $mse, RMSE:$rmse")

    }
    ssc.start()
    ssc.awaitTermination()

  }
} 
开发者ID:bigchange,项目名称:AI,代码行数:51,代码来源:StreamingSimpleModel.scala


示例19: SparkFlatSpec

//设置package包名称以及导入依赖的类
package it.agilelab.bigdata.wasp.consumers

import java.nio.file.Files

import akka.actor.ActorSystem
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{ConfigMap, BeforeAndAfterAllConfigMap, BeforeAndAfter, FlatSpec}


class SparkFlatSpec extends FlatSpec with BeforeAndAfterAllConfigMap with ScalaFutures  {
  private val master = "local[2]"
  private val appName = "example-spark-streaming"
  private val batchDuration = Seconds(1)
  private val checkpointDir = Files.createTempDirectory(appName).toString

  protected var ssc: StreamingContext = _
  protected var sc: SparkContext = _
  protected var sqlContext: SQLContext = _
  protected var actorSystem = ActorSystem()


  override def beforeAll(cm: ConfigMap) {
    val sparkConf: SparkConf = new SparkConf()
      .setMaster("local[2]").setAppName("Instant Matching")

    ssc = new StreamingContext(sparkConf, batchDuration)

    sc = ssc.sparkContext

    sqlContext = new SQLContext(sc)
  }

  override def afterAll(cm: ConfigMap) {
    if (ssc != null) {
      ssc.awaitTerminationOrTimeout(1000)
      ssc.stop()
    }
    if (sc != null) {
      sc.stop()
      sc.clearCallSite()
    }
    actorSystem.shutdown()
  }
} 
开发者ID:agile-lab-dev,项目名称:wasp,代码行数:48,代码来源:SparkFlatSpec.scala


示例20: main

//设置package包名称以及导入依赖的类
package com.aluxian.tweeather.scripts

import java.io.File

import org.apache.hadoop.fs.FileSystem
import org.apache.log4j.PropertyConfigurator
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.{Minutes, Seconds}
import org.apache.spark.{SparkConf, SparkContext}


trait Script {

  protected lazy val streamingTimeout = sys.props.get("tw.streaming.timeout") // in seconds
    .map(_.toLong * 1000).getOrElse(-1L)
  protected lazy val streamingInterval = sys.props.get("tw.streaming.interval") // in seconds
    .map(s => Seconds(s.toLong)).getOrElse(Minutes(5))

  protected lazy val scriptName = "Tweeather_" + getClass.getSimpleName.stripSuffix("$")
  protected lazy val sc = new SparkContext(
    new SparkConf()
      .setIfMissing("spark.app.name", scriptName)
      .setIfMissing("spark.eventLog.dir", "tw/logs")
      .setIfMissing("spark.eventLog.enabled", "true")
      .setIfMissing("spark.streaming.stopGracefullyOnShutdown", "true")
      .setIfMissing("spark.streaming.blockInterval", "30s")
  )

  protected lazy val hdfs = FileSystem.get(sc.hadoopConfiguration)
  protected lazy val sqlc = new SQLContext(sc)

  def main(args: Array[String]) {
    // Log4j properties
    Option(getClass.getResource("/com/aluxian/tweeather/res/log4j.properties")) match {
      case Some(url) => PropertyConfigurator.configure(url)
      case None => System.err.println("Unable to load log4j.properties")
    }

    // Ensure the event log directory exists
    new File("tw/logs").mkdirs()
  }

} 
开发者ID:cnajeefa,项目名称:Tourism-Sentiment-Analysis,代码行数:44,代码来源:Script.scala



注:本文中的org.apache.spark.streaming.Seconds类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Scala AnyContent类代码示例发布时间:2022-05-23
下一篇:
Scala ProducerRecord类代码示例发布时间:2022-05-23
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap