本文整理汇总了Scala中org.apache.spark.streaming.Seconds类的典型用法代码示例。如果您正苦于以下问题:Scala Seconds类的具体用法?Scala Seconds怎么用?Scala Seconds使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Seconds类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。
示例1: KMeansClusteringApp
//设置package包名称以及导入依赖的类
package org.apress.prospark
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.StreamingKMeans
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD
import org.apache.spark.rdd.RDD.doubleRDDToDoubleRDDFunctions
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
object KMeansClusteringApp {
def main(args: Array[String]) {
if (args.length != 4) {
System.err.println(
"Usage: KMeansClusteringApp <appname> <batchInterval> <hostname> <port>")
System.exit(1)
}
val Seq(appName, batchInterval, hostname, port) = args.toSeq
val conf = new SparkConf()
.setAppName(appName)
.setJars(SparkContext.jarOfClass(this.getClass).toSeq)
val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))
val substream = ssc.socketTextStream(hostname, port.toInt)
.filter(!_.contains("NaN"))
.map(_.split(" "))
.filter(f => f(1) != "0")
val orientationStream = substream
.map(f => Seq(1, 4, 5, 6, 10, 11, 12, 20, 21, 22, 26, 27, 28, 36, 37, 38, 42, 43, 44).map(i => f(i)).toArray)
.map(arr => arr.map(_.toDouble))
.filter(f => f(0) == 1.0 || f(0) == 2.0 || f(0) == 3.0)
.map(f => LabeledPoint(f(0), Vectors.dense(f.slice(1, f.length))))
val test = orientationStream.transform(rdd => rdd.randomSplit(Array(0.3, 0.7))(0))
val train = orientationStream.transformWith(test, (r1: RDD[LabeledPoint], r2: RDD[LabeledPoint]) => r1.subtract(r2)).cache()
val model = new StreamingKMeans()
.setK(3)
.setDecayFactor(0)
.setRandomCenters(18, 0.0)
model.trainOn(train.map(v => v.features))
val prediction = model.predictOnValues(test.map(v => (v.label, v.features)))
ssc.start()
ssc.awaitTermination()
}
}
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:54,代码来源:L9-10KMeans.scala
示例2: Consumer
//设置package包名称以及导入依赖的类
import org.apache.spark.streaming.kafka010.KafkaUtils
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.mllib.classification.SVMModel
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.sql.SparkSession
object Consumer {
def main(args: Array[String]): Unit = {
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "localhost:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "use_a_separate_group_id_for_each_stream",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val topics = Array("streaming")
val sparkConf = new SparkConf().setMaster("local[8]").setAppName("KafkaTest")
val streamingContext = new StreamingContext(sparkConf, Seconds(1))
// Create a input direct stream
val kafkaStream = KafkaUtils.createDirectStream[String, String](
streamingContext,
PreferConsistent,
Subscribe[String, String](topics, kafkaParams)
)
val sc = SparkSession.builder().master("local[8]").appName("KafkaTest").getOrCreate()
val model = SVMModel.load(sc.sparkContext, "/home/xiaoyu/model")
val result = kafkaStream.map(record => (record.key, record.value))
result.foreachRDD(
patient => {
patient.collect().toBuffer.foreach(
(x: (Any, String)) => {
val features = x._2.split(',').map(x => x.toDouble).tail
println(model.predict(Vectors.dense(features)))
}
)
}
)
streamingContext.start()
streamingContext.awaitTermination()
}
}
开发者ID:XiaoyuGuo,项目名称:DataFusionClass,代码行数:55,代码来源:Consumer.scala
示例3: StatefulWordcount
//设置package包名称以及导入依赖的类
package com.test.spark
import org.apache.spark.SparkConf
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.kafka010.ConsumerStrategies
import org.apache.spark.streaming.kafka010.LocationStrategies
import org.apache.spark.streaming.kafka010.KafkaUtils
object StatefulWordcount extends App {
val conf = new SparkConf().setAppName("Stateful Wordcount").setMaster("local[2]")
val ssc = new StreamingContext(conf, Seconds(10))
val kafkaParams = Map[String, String]("bootstrap.servers" -> "localhost:9092", "key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer", "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer", "group.id" -> "mygroup", "auto.offset.reset" -> "earliest")
val topics = Set("widas")
val inputKafkaStream = KafkaUtils.createDirectStream(ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, kafkaParams))
val words = inputKafkaStream.transform { rdd =>
rdd.flatMap(record => (record.value().toString.split(" ")))
}
val wordpairs = words.map(word => (word, 1))
ssc.checkpoint("/Users/nagainelu/bigdata/jobs/WordCount_checkpoint")
val updateFunc = (values: Seq[Int], state: Option[Int]) => {
val currentCount = values.foldLeft(0)(_ + _)
val previousCount = state.getOrElse(0)
Some(currentCount + previousCount)
}
val wordCounts = wordpairs.reduceByKey(_ + _).updateStateByKey(updateFunc)
wordCounts.print()
ssc.start()
ssc.awaitTermination()
}
开发者ID:malli3131,项目名称:SparkApps,代码行数:31,代码来源:StatefulWordcount.scala
示例4: T01
//设置package包名称以及导入依赖的类
package streaming
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
object T01 {
def main(args: Array[String]) {
val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
val ssc = new StreamingContext(conf, Seconds(1))
val lines = ssc.socketTextStream("localhost", 9999)
val words = lines.flatMap(_.split(" "))
// not necessary since Spark 1.3
// Count each word in each batch
val pairs = words.map(word => (word, 1))
val wordCounts = pairs.reduceByKey(_ + _)
// Print the first ten elements of each RDD generated in this DStream to the console
wordCounts.print()
ssc.start() // Start the computation
ssc.awaitTermination() // Wait for the computation to terminate
// nc -lk 9999
// ./bin/run-example streaming.T01 localhost 9999
}
}
开发者ID:IMJIU,项目名称:Spark1.6,代码行数:27,代码来源:T01.scala
示例5: Predict
//设置package包名称以及导入依赖的类
package com.databricks.apps.twitterClassifier
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.KMeansModel
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.streaming.twitter._
import org.apache.spark.streaming.{Seconds, StreamingContext}
object Predict extends App {
import SparkSetup._
val options = PredictOptions.parse(args)
val ssc = new StreamingContext(sc, Seconds(options.intervalInSecs))
Predictor.doIt(options, sc, ssc)
}
object Predictor {
def doIt(options: PredictOptions, sc: SparkContext, ssc: StreamingContext) {
println("Initializing the the KMeans model...")
val model: KMeansModel = new KMeansModel(sc.objectFile[Vector](options.modelDirectory.getCanonicalPath).collect)
println("Materializing Twitter stream...")
TwitterUtils.createStream(ssc, maybeTwitterAuth)
.map(_.getText)
.foreachRDD { rdd =>
rdd.filter(t => model.predict(featurize(t)) == options.clusterNumber)
.foreach(print) // register DStream as an output stream and materialize it
}
println("Initialization complete, starting streaming computation.")
ssc.start()
ssc.awaitTermination()
}
}
开发者ID:krish121,项目名称:Spark-reference-applications,代码行数:35,代码来源:Predict.scala
示例6: StreamingWordCount
//设置package包名称以及导入依赖的类
package org.examples.scala.examples
import org.apache.spark.streaming.{Seconds, StreamingContext}
import StreamingContext._
import org.apache.spark._
import org.apache.spark.SparkContext._
object StreamingWordCount {
def run(args: Array[String]) {
if (args.length < 2) {
System.err.println("Usage BasicStreamingExample <master> <output>")
}
val Array(master, output) = args.take(2)
val conf = new SparkConf().setMaster(master).setAppName("BasicStreamingExample")
val ssc = new StreamingContext(conf, Seconds(30))
val lines = ssc.socketTextStream("localhost" , 7777)
val words = lines.flatMap(_.split(" "))
val wc = words.map(x => (x, 1)).reduceByKey((x, y) => x + y)
wc.saveAsTextFiles(output)
wc.print
println("pandas: sscstart")
ssc.start()
println("pandas: awaittermination")
ssc.awaitTermination()
println("pandas: done!")
}
}
开发者ID:jjmleiro,项目名称:learning-spark,代码行数:33,代码来源:StreamingWordCount.scala
示例7: KafkaStreaming
//设置package包名称以及导入依赖的类
package org.myorganization.spark.streaming
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{StreamingContext, Seconds}
import org.apache.spark.streaming.kafka._
import kafka.serializer.StringDecoder
object KafkaStreaming {
def main(args: Array[String]): Unit = {
val (batchDuration, topics, bootstrapServers) = getParams(args)
val conf = new SparkConf().setAppName("gpKafkaStreaming")
val sc = new SparkContext(conf)
val ssc = new StreamingContext(sc, Seconds(batchDuration))
val topicsSet = topics.split(",").toSet
val kafkaParams = Map[String, String]("bootstrap.servers" -> bootstrapServers, "auto.offset.reset" -> "smallest")
val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet)
val data = messages.map(_._2)
val loggerSerializerLogs = data.map(_.split("""\s+"""))
.filter(x => x.length > 6)
.map(x => (x(0), x(6)))
.filter(filterLogLines)
.map(x => x._1)
val logCounts = loggerSerializerLogs.map(x => (x, 1L)).reduceByKey(_ + _)
logCounts.print(10)
ssc.start()
ssc.awaitTermination()
}
def filterLogLines(line: Tuple2[String, String]): Boolean = {
val pattern = """logger.+"""
line._2.matches(pattern)
}
def getParams(args: Array[String]): Tuple3[Int, String, String] = {
if (args.length !=3 ) {
System.err.println(s"""
|Usage: spark-kafka.sh <sampling-period> <topics> <bootstrap-servers>
| <sampling-period> is the duration of each batch (in seconds)
| <topics> is a list of one or more kafka topics to consume from
| <bootstrap-servers> is a list of one or more Kafka bootstrap servers
|
""".stripMargin)
System.exit(1)
}
Tuple3[Int, String, String](args(0).toInt, args(1), args(2))
}
}
开发者ID:gpapag,项目名称:spark-streaming-kafka,代码行数:57,代码来源:KafkaStreaming.scala
示例8: SimpleApp
//设置package包名称以及导入依赖的类
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
object SimpleApp {
def main(args: Array[String]) : Unit = {
val conf = new SparkConf().setAppName("Sinmple Application").set("spark.driver.allowMultipleContexts", "true")
val sc = new SparkContext(conf)
val ssc = new StreamingContext(conf, Seconds(5))
val lines = ssc.socketTextStream("localhost", 9999)
val words = lines.flatMap(_.toLowerCase.split(" "))
words.foreachRDD { rdd =>
val sqlContext = SQLContext.getOrCreate(rdd.sparkContext)
import sqlContext.implicits._
val wordsDataFrame = rdd.toDF("words")
wordsDataFrame.registerTempTable("allwords")
val wcdf = sqlContext.sql("select words,count(*) as total from allwords group by words")
wcdf.show()
import org.elasticsearch.spark.sql._
wcdf.saveToEs("wordcount/wc")
}
ssc.start() // Start the computation
ssc.awaitTermination() // Wait for the computation to terminate
}
}
开发者ID:mykumar,项目名称:SparkScalaInternalExperiements,代码行数:26,代码来源:SimpleApp.scala
示例9: Main
//设置package包名称以及导入依赖的类
import Fqueue.{FqueueReceiver, FqueueSender}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
object Main {
private def sendData() = {
val fqueuSender = new FqueueSender("localhost:18740", 4, 4000)
fqueuSender.connect()
while (true) {
val ret = fqueuSender.enQueue("track_BOdao2015*", "123")
Thread.sleep(1000)
}
fqueuSender.stop()
}
private def getData() = {
val fqueueReceiver = new FqueueReceiver("localhost:18740", 4, 4000)
fqueueReceiver.connect()
val data = fqueueReceiver.deQueue("track_BOdao2015*")
println(data.getOrElse("null"))
fqueueReceiver.stop()
}
def main(args: Array[String]) {
new Thread("fqueue sender") {
override def run() { sendData() }
}.start()
val config = new SparkConf().setAppName("testfqueue").setMaster("local[2]")
val ssc = new StreamingContext(config, Seconds(5))
val lines = ssc.receiverStream(new FqueueStreamingReceiver("localhost:18740", 4, 4000))
lines.print()
ssc.start()
ssc.awaitTermination()
}
}
开发者ID:TopSpoofer,项目名称:FqueueStreamingReceiver,代码行数:38,代码来源:Main.scala
示例10: SparkStreamKinesis
//设置package包名称以及导入依赖的类
import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream.LATEST
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK_2
import org.apache.spark.streaming.kinesis._
import org.apache.spark.streaming.{Duration, Seconds, StreamingContext}
object SparkStreamKinesis{
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Spark Kinesis").setMaster("local[4]")
val ssc = new StreamingContext(conf, Seconds(1))
println("Spark Streaming")
val kinesisStream = KinesisUtils.createStream(ssc, "sparrow-ci",
"sparrow-ci",
"kinesis.us-east-1.amazonaws.com",
"us-east-1",
LATEST,
Duration(2000),
MEMORY_AND_DISK_2)
kinesisStream.print()
kinesisStream.flatMap(new String(_))
.foreachRDD(_.collect().foreach(print))
ssc.start()
ssc.awaitTermination()
}
}
开发者ID:noppanit,项目名称:spark-streaming-kinesis-example,代码行数:34,代码来源:main.scala
示例11: LogAnalyzerWindowed
//设置package包名称以及导入依赖的类
package com.databricks.apps.logs
import scala.math.Ordering
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.dstream.DStream
class LogAnalyzerWindowed(val windowLength: Long, val slideInterval: Long) extends AnalyzeFunctions with Serializable {
import LogStatistics.EMPTY_LOG_STATISTICS
var logStatistics = EMPTY_LOG_STATISTICS
def processAccessLogs(accessLogsDStream: DStream[ApacheAccessLog]): Unit = {
val windowDStream: DStream[ApacheAccessLog] = accessLogsDStream
.window(Seconds(windowLength), Seconds(slideInterval))
windowDStream.foreachRDD(accessLogs => {
if (accessLogs.count() == 0) {
logStatistics = EMPTY_LOG_STATISTICS
} else {
logStatistics = LogStatistics(contentSizeStats(accessLogs).get,
responseCodeCount(accessLogs).take(100).toMap,
filterIPAddress(ipAddressCount(accessLogs)).take(100),
endpointCount(accessLogs).top(10)(Ordering.by[(String, Long), Long](_._2)).toMap)
}
})
}
def getLogStatistics: LogStatistics = logStatistics
}
开发者ID:krish121,项目名称:Spark-reference-applications,代码行数:32,代码来源:LogAnalyzerWindowed.scala
示例12: LogAnalyzerStreamingImportDirectory
//设置package包名称以及导入依赖的类
package com.databricks.apps.logs.chapter2
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
import com.databricks.apps.logs.{ApacheAccessLog, LogAnalyzerRDD}
object LogAnalyzerStreamingImportDirectory extends App {
val WINDOW_LENGTH = Seconds(30)
val SLIDE_INTERVAL = Seconds(10)
val spark = SparkSession
.builder()
.appName("Log Analyzer Import Streaming HDFS")
.getOrCreate()
val streamingContext = new StreamingContext(spark.sparkContext, SLIDE_INTERVAL)
val directory = args(0)
// This method monitors a directory for new files to read in for streaming.
val logData: DStream[String] = streamingContext.textFileStream(directory)
val accessLogsDStream: DStream[ApacheAccessLog] = logData.map(ApacheAccessLog.parseLogLine)
val windowDStream: DStream[ApacheAccessLog] = accessLogsDStream.window(WINDOW_LENGTH, SLIDE_INTERVAL)
val logAnalyzerRDD = LogAnalyzerRDD(spark)
windowDStream.foreachRDD(accessLogs => {
if (accessLogs.count() == 0) {
println("No access logs received in this time interval")
} else {
val logStatistics = logAnalyzerRDD.processRdd(accessLogs)
logStatistics.printToStandardOut()
}
})
// Start the streaming server.
streamingContext.start() // Start the computation
streamingContext.awaitTermination() // Wait for the computation to terminate
}
开发者ID:krish121,项目名称:Spark-reference-applications,代码行数:42,代码来源:LogAnalyzerStreamingImportDirectory.scala
示例13: DashboardController
//设置package包名称以及导入依赖的类
package controllers
import javax.inject.{Inject, Singleton}
import akka.actor.ActorSystem
import akka.stream.Materializer
import org.apache.spark.streaming.Seconds
import play.api.Configuration
import play.api.inject.ApplicationLifecycle
import play.api.libs.streams.ActorFlow
import play.api.mvc._
import services.{SparkService, StreamActor}
@Singleton()
class DashboardController @Inject() ()(implicit system: ActorSystem, materializer: Materializer, configuration : Configuration, lifecycle: ApplicationLifecycle) extends Controller {
val sparkService = SparkService.getInstance(configuration, lifecycle)
def index = Action {
Ok(views.html.index())
}
def stream = WebSocket.accept[String, String] { request =>
val filters = Seq("Euro")
val tweets = sparkService.getTwitterStream(filters)
val stream = tweets
// .map { tweet =>
// tweet.getText
// }
// .flatMap { text => text.split("\\s") }
// .filter(_.startsWith("#"))
.map { tweet =>
Option(tweet.getPlace).map(_.getCountry)
}
.filter(_.isDefined)
.map(_.get)
.map { element => (element, 1)}
.reduceByKeyAndWindow(_ + _, Seconds(60))
.transform( count => count.sortBy(_._2, false))
ActorFlow.actorRef(out => StreamActor.props(out, stream))
}
}
开发者ID:OpenCompare,项目名称:live-pcm,代码行数:45,代码来源:DashboardController.scala
示例14: EnrichmentInAStream
//设置package包名称以及导入依赖的类
package com.malaska.spark.training.streaming.dstream
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}
object EnrichmentInAStream {
def main(args:Array[String]): Unit = {
def main(args:Array[String]): Unit = {
val host = args(0)
val port = args(1)
val checkpointFolder = args(2)
val isLocal = true
val sparkSession = if (isLocal) {
SparkSession.builder
.master("local")
.appName("my-spark-app")
.config("spark.some.config.option", "config-value")
.config("spark.driver.host","127.0.0.1")
.config("spark.sql.parquet.compression.codec", "gzip")
.enableHiveSupport()
.getOrCreate()
} else {
SparkSession.builder
.appName("my-spark-app")
.config("spark.some.config.option", "config-value")
.enableHiveSupport()
.getOrCreate()
}
val ssc = new StreamingContext(sparkSession.sparkContext.getConf, Seconds(1))
ssc.checkpoint(checkpointFolder)
val lines = ssc.socketTextStream(host, port.toInt)
val words = lines.flatMap(_.split(" "))
words.foreachRDD(rdd => rdd.foreachPartition(wordIt => {
//make connection to storage layer
// May use static connection
wordIt.foreach(word => {
word.toUpperCase
//write to storage location
})
}))
ssc.start()
ssc.awaitTermination()
}
}
}
开发者ID:TedBear42,项目名称:spark_training,代码行数:54,代码来源:EnrichmentInAStream.scala
示例15: CountingInAStreamExpBatchCounting
//设置package包名称以及导入依赖的类
package com.malaska.spark.training.streaming.dstream
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}
object CountingInAStreamExpBatchCounting {
Logger.getLogger("org").setLevel(Level.OFF)
Logger.getLogger("akka").setLevel(Level.OFF)
def main(args:Array[String]): Unit = {
val host = args(0)
val port = args(1)
val checkpointFolder = args(2)
val isLocal = true
val sparkSession = if (isLocal) {
SparkSession.builder
.master("local")
.appName("my-spark-app")
.config("spark.some.config.option", "config-value")
.config("spark.driver.host","127.0.0.1")
.config("spark.sql.parquet.compression.codec", "gzip")
.enableHiveSupport()
.master("local[3]")
.getOrCreate()
} else {
SparkSession.builder
.appName("my-spark-app")
.config("spark.some.config.option", "config-value")
.enableHiveSupport()
.getOrCreate()
}
val ssc = new StreamingContext(sparkSession.sparkContext, Seconds(2))
ssc.checkpoint(checkpointFolder)
val lines = ssc.socketTextStream(host, port.toInt)
val words = lines.flatMap(line => line.toLowerCase.split(" "))
val wordCounts = words.map(word => (word, 1))
.reduceByKey((a,b) => a + b)
wordCounts.foreachRDD(rdd => {
println("{")
val localCollection = rdd.collect()
println(" size:" + localCollection.length)
localCollection.foreach(r => println(" " + r))
println("}")
})
ssc.start()
ssc.awaitTermination()
}
}
开发者ID:TedBear42,项目名称:spark_training,代码行数:59,代码来源:CountingInAStreamExpBatchCounting.scala
示例16: StatefulWordCount
//设置package包名称以及导入依赖的类
package com.github.melentye.spark.streaming
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
object StatefulWordCount {
private val checkpointDirectory = "spark-checkpoints"
def main(args: Array[String]): Unit = withStreamingContext { ssc =>
val lines = ssc.socketTextStream("localhost", 9999)
val words = lines.flatMap(_.split(" "))
val pairs = words.map(word => (word, 1))
def updateFunction(newValues: Seq[Int], runningCount: Option[Int]): Option[Int] = {
val newCount = runningCount.getOrElse(0) + newValues.sum
Some(newCount)
}
val runningCounts = pairs.updateStateByKey[Int](updateFunction _)
runningCounts.print()
}
def withStreamingContext(f: StreamingContext => Unit): Unit = {
def createContext(): StreamingContext = {
val conf = new SparkConf()
.setMaster("local[*]")
.setAppName("StatefulWordCount")
val ssc = new StreamingContext(conf, Seconds(1))
f(ssc)
ssc.checkpoint(checkpointDirectory)
ssc
}
val ssc = StreamingContext.getOrCreate(checkpointDirectory, createContext _)
ssc.start()
ssc.awaitTermination()
}
}
开发者ID:melentye,项目名称:spark-playground,代码行数:39,代码来源:StatefulWordCount.scala
示例17: RedditDemoSpark
//设置package包名称以及导入依赖的类
import com.github.catalystcode.fortis.spark.streaming.reddit.{RedditAuth, RedditUtils}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
class RedditDemoSpark(auth: RedditAuth) {
def run(): Unit = {
// set up the spark context and streams
val conf = new SparkConf().setAppName("Reddit Application").setIfMissing("spark.master", "local[*]")
val sc = new SparkContext(conf)
val ssc = new StreamingContext(sc, Seconds(1))
val keywordSet = List("healthcare")
RedditUtils.createPageStream(auth, keywordSet, ssc, pollingPeriodInSeconds=10).map(x => s"Post: ${x}").print()
// run forever
ssc.start()
ssc.awaitTermination()
}
}
开发者ID:CatalystCode,项目名称:streaming-reddit,代码行数:21,代码来源:RedditDemoSpark.scala
示例18: StreamingSimpleModel
//设置package包名称以及导入依赖的类
package com.bigchange.streaming
import breeze.linalg.DenseVector
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.{LabeledPoint, StreamingLinearRegressionWithSGD}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object StreamingSimpleModel {
def main(args: Array[String]) {
val ssc = new StreamingContext("local","test",Seconds(10))
val stream = ssc.socketTextStream("localhost",9999)
val numberFeatures = 100
val zeroVector = DenseVector.zeros[Double](numberFeatures)
val model = new StreamingLinearRegressionWithSGD()
.setInitialWeights(Vectors.dense(zeroVector.data))
.setNumIterations(1)
.setStepSize(0.01)
val labeledStream = stream.map { event =>
val split = event.split("\t")
val y = split(0).toDouble
val features = split(1).split(",").map(_.toDouble)
LabeledPoint(label = y, features = Vectors.dense(features))
}
model.trainOn(labeledStream)
// ??DStream?????
val predictAndTrue = labeledStream.transform { rdd =>
val latestModel = model.latestModel()
rdd.map { point =>
val predict = latestModel.predict(point.features)
predict - point.label
}
}
// ??MSE
predictAndTrue.foreachRDD { rdd =>
val mse = rdd.map(x => x * x).mean()
val rmse = math.sqrt(mse)
println(s"current batch, MSE: $mse, RMSE:$rmse")
}
ssc.start()
ssc.awaitTermination()
}
}
开发者ID:bigchange,项目名称:AI,代码行数:51,代码来源:StreamingSimpleModel.scala
示例19: SparkFlatSpec
//设置package包名称以及导入依赖的类
package it.agilelab.bigdata.wasp.consumers
import java.nio.file.Files
import akka.actor.ActorSystem
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{ConfigMap, BeforeAndAfterAllConfigMap, BeforeAndAfter, FlatSpec}
class SparkFlatSpec extends FlatSpec with BeforeAndAfterAllConfigMap with ScalaFutures {
private val master = "local[2]"
private val appName = "example-spark-streaming"
private val batchDuration = Seconds(1)
private val checkpointDir = Files.createTempDirectory(appName).toString
protected var ssc: StreamingContext = _
protected var sc: SparkContext = _
protected var sqlContext: SQLContext = _
protected var actorSystem = ActorSystem()
override def beforeAll(cm: ConfigMap) {
val sparkConf: SparkConf = new SparkConf()
.setMaster("local[2]").setAppName("Instant Matching")
ssc = new StreamingContext(sparkConf, batchDuration)
sc = ssc.sparkContext
sqlContext = new SQLContext(sc)
}
override def afterAll(cm: ConfigMap) {
if (ssc != null) {
ssc.awaitTerminationOrTimeout(1000)
ssc.stop()
}
if (sc != null) {
sc.stop()
sc.clearCallSite()
}
actorSystem.shutdown()
}
}
开发者ID:agile-lab-dev,项目名称:wasp,代码行数:48,代码来源:SparkFlatSpec.scala
示例20: main
//设置package包名称以及导入依赖的类
package com.aluxian.tweeather.scripts
import java.io.File
import org.apache.hadoop.fs.FileSystem
import org.apache.log4j.PropertyConfigurator
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.{Minutes, Seconds}
import org.apache.spark.{SparkConf, SparkContext}
trait Script {
protected lazy val streamingTimeout = sys.props.get("tw.streaming.timeout") // in seconds
.map(_.toLong * 1000).getOrElse(-1L)
protected lazy val streamingInterval = sys.props.get("tw.streaming.interval") // in seconds
.map(s => Seconds(s.toLong)).getOrElse(Minutes(5))
protected lazy val scriptName = "Tweeather_" + getClass.getSimpleName.stripSuffix("$")
protected lazy val sc = new SparkContext(
new SparkConf()
.setIfMissing("spark.app.name", scriptName)
.setIfMissing("spark.eventLog.dir", "tw/logs")
.setIfMissing("spark.eventLog.enabled", "true")
.setIfMissing("spark.streaming.stopGracefullyOnShutdown", "true")
.setIfMissing("spark.streaming.blockInterval", "30s")
)
protected lazy val hdfs = FileSystem.get(sc.hadoopConfiguration)
protected lazy val sqlc = new SQLContext(sc)
def main(args: Array[String]) {
// Log4j properties
Option(getClass.getResource("/com/aluxian/tweeather/res/log4j.properties")) match {
case Some(url) => PropertyConfigurator.configure(url)
case None => System.err.println("Unable to load log4j.properties")
}
// Ensure the event log directory exists
new File("tw/logs").mkdirs()
}
}
开发者ID:cnajeefa,项目名称:Tourism-Sentiment-Analysis,代码行数:44,代码来源:Script.scala
注:本文中的org.apache.spark.streaming.Seconds类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论