• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Scala StorageLevel类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Scala中org.apache.spark.storage.StorageLevel的典型用法代码示例。如果您正苦于以下问题:Scala StorageLevel类的具体用法?Scala StorageLevel怎么用?Scala StorageLevel使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了StorageLevel类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: StationJourneyCountCustomApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToOrderedRDDFunctions
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
import org.apache.spark.streaming.kafka.KafkaUtils
import kafka.serializer.StringDecoder
import org.apache.spark.storage.StorageLevel

object StationJourneyCountCustomApp {

  def main(args: Array[String]) {
    if (args.length != 7) {
      System.err.println(
        "Usage: StationJourneyCountApp <appname> <brokerUrl> <topic> <consumerGroupId> <zkQuorum> <checkpointDir> <outputPath>")
      System.exit(1)
    }

    val Seq(appName, brokerUrl, topic, consumerGroupId, zkQuorum, checkpointDir, outputPath) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)
      //.set("spark.streaming.receiver.writeAheadLog.enable", "true")

    val ssc = new StreamingContext(conf, Seconds(10))
    ssc.checkpoint(checkpointDir)

    val topics = Map[String, Int](
      topic -> 1)
    val params = Map[String, String](
      "zookeeper.connect" -> zkQuorum,
      "group.id" -> consumerGroupId,
      "bootstrap.servers" -> brokerUrl)
    KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](ssc, params, topics, StorageLevel.MEMORY_ONLY_SER).map(_._2)
      .map(rec => rec.split(","))
      .map(rec => ((rec(3), rec(7)), 1))
      .reduceByKey(_ + _)
      .repartition(1)
      .map(rec => (rec._2, rec._1))
      .transform(rdd => rdd.sortByKey(ascending = false))
      .saveAsTextFiles(outputPath)

    ssc.start()
    ssc.awaitTermination()
  }

} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:52,代码来源:L5-14KafkaCustomConf.scala


示例2: FqueueStreamingReceiver

//设置package包名称以及导入依赖的类
import java.io.{BufferedReader, InputStreamReader}
import java.net.Socket

import Fqueue.FqueueReceiver
import org.apache.spark.Logging
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.receiver.Receiver


class FqueueStreamingReceiver(val address: String, val connectionPoolSize: Int, val timeOut: Int)
  extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) with Logging {
  private var receiver: Option[FqueueReceiver] = None

  def onStart() {
    new Thread("Socket Receiver") {
      override def run() { receive() }
    }.start()
  }

  def onStop(): Unit = {
    receiver foreach { _.stop() }
  }

  private def receive(): Unit = {
    val fqueueReceiver = new FqueueReceiver(address, connectionPoolSize, timeOut)
    receiver = Some(fqueueReceiver)
    receiver foreach { _.connect() }

    try
    {
      var stop = false
      while (!isStopped() && !stop) {
        val data = fqueueReceiver.deQueue("track_BOdao2015*")
        data match {
          case Some(str) => store(str)
          case None => Thread.sleep(1000)//stop = true
        }
      }
      receiver foreach { _.stop() }
    } catch {
      case e: Exception =>
        println("get data from fqueue err! pleace sure the server is live")
        println(e.getMessage)
        println(e.getStackTraceString)
        receiver foreach { _.stop() }
    }
  }
} 
开发者ID:TopSpoofer,项目名称:FqueueStreamingReceiver,代码行数:49,代码来源:FqueueStreamingReceiver.scala


示例3: SimpleDataStream

//设置package包名称以及导入依赖的类
package com.fortysevendeg.log

import com.fortysevendeg.log.models._
import com.fortysevendeg.log.utils.Regex._
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Milliseconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}

import scala.language.postfixOps

object SimpleDataStream {

  def main(args: Array[String]) = {

    // run:
    // $ adb logcat -v time | nc -lk 9999

    // Spark configuration

    val conf = new SparkConf().setMaster("local[2]").setAppName("SimpleDataStream")
    val sc = new SparkContext(conf)
    val ssc = new StreamingContext(sc, Milliseconds(1000))
    ssc.checkpoint("/tmp")

    val logLines = ssc.socketTextStream("localhost", 9999, StorageLevel.MEMORY_AND_DISK_SER)

    val logs = logLines.flatMap { line =>
      for {
        typePlusAppPlusPid <- typePlusAppPlusPid.findFirstIn(line)
        data = extractTypeAppPid(typePlusAppPlusPid)
        logType = data._1
        app <- data._2
        pid <- data._3
        date <- date.findFirstIn(line)
        message <- message.findFirstIn(line)
      } yield {
        LogLine(LogInfo(app, pid, logType, date), message.substring(2))
      }
    }

    logs foreachRDD (_.foreach { log =>
      println(s"${log.info.logType}: ${log.info.app}: ${log.message}")
    })

    ssc.start()
    ssc.awaitTermination()
  }
} 
开发者ID:javipacheco,项目名称:spark-android-log,代码行数:49,代码来源:SimpleDataStream.scala


示例4: appl

//设置package包名称以及导入依赖的类
package model
import model.{DataFramesBuilder, IndexDataFramesSearch}
import org.apache.spark.sql.SparkSession
import org.apache.spark.storage.StorageLevel
import play.api.libs.json._
import play.api.libs.functional.syntax._



  object appl {
  case class Node(ID: Long, Name: String)
  case class fatherNode(ID:Long, Name:String, children:List[Node])



  def search(spark: SparkSession, nodeID:Long) = {
    val path = "/Users/mali/Downloads/taxdmp/"
    val edgesPath = path + "nodes.dmp"
    val verticesPath = path + "names.dmp"

    implicit val nodesWrites: Writes[Node] = (
      (JsPath \ "ID").write[Long] and
        (JsPath \ "Name").write[String]
      )(unlift(Node.unapply))

    implicit val fatherNodeWrites: Writes[fatherNode] = (
      (JsPath \ "ID").write[Long] and
        (JsPath \ "Name").write[String] and
        (JsPath \ "children").write[List[Node]]
      )(unlift(fatherNode.unapply))

  val edParentDF = DataFramesBuilder.getEdgesParentDF(edgesPath, spark)
  val veDF = DataFramesBuilder.getVerticesDF(verticesPath, spark)
  val df = edParentDF.getOrElse(spark.createDataFrame(List())).persist(StorageLevel.MEMORY_ONLY).cache()
  val bv = DataFramesBuilder.buildPathToRootDF(df, spark, 3)
  //  val edParentDF = spark.read.parquet(path + "edParentDF").persist(StorageLevel.MEMORY_ONLY).cache()
  //  val veDF = spark.read.parquet(path + "veDF").persist(StorageLevel.MEMORY_ONLY).cache()
  val indexDF = spark.read.parquet(path + "pathToRootDF").persist(StorageLevel.MEMORY_ONLY).cache()
  println(IndexDataFramesSearch.getChildren(indexDF, nodeID))
  println("success???")
  val result = IndexDataFramesSearch.getChildren(indexDF, nodeID)


    val nodeFather = new Node(nodeID,DataFramesSearch.findVNameByID(indexDF,nodeID))
    println(nodeFather.ID+" : "+nodeFather.Name)


    val childs = result.map(i => Node(i,DataFramesSearch.findVNameByID(indexDF,i)))
    println(childs)
    val tree = new fatherNode(nodeID,DataFramesSearch.findVNameByID(indexDF,nodeID),childs)
    val res = tree
    val json = Json.toJson(res)
    json

}
} 
开发者ID:martinkr1120,项目名称:finalProject_Withplay,代码行数:57,代码来源:appl.scala


示例5: streaming

//设置package包名称以及导入依赖的类
package edu.uchicago.cs.data.client

import org.apache.spark.sql.SparkSession
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.storage.StorageLevel

object streaming {
    def main(args: Array[String]) {
    
      if (args.length < 2) {
        System.err.println("Usage: NetworkWorkCount <hostname> <port>")
        System.exit(1)
      }
      
      //StreamingExamples.setStreamingLogLevels()
      
      val sparkConf = new SparkConf().setAppName("NetworkWorkCount")
      val ssc = new StreamingContext(sparkConf, Seconds(1))
      
      // Create a socket stream on target ip:port and count the
      // words in input stream of \n delimited text (e.g. generated by 'nc').
      // Note that no duplication in storage level only for running locally.
      // Replication necessary in distributed scenario for fault tolerance.
      val lines = ssc.socketTextStream(args(0), args(1).toInt, StorageLevel.MEMORY_AND_DISK_SER)
      val words = lines.flatMap(_.split(" "))
      val wordCounts = words.map(x => (x,1)).reduceByKey(_ + _)
      wordCounts.print()
      ssc.start()
      ssc.awaitTermination()
      
  }
} 
开发者ID:adam-dziedzic,项目名称:spark-client,代码行数:34,代码来源:streaming.scala


示例6: JMSInputDStream

//设置package包名称以及导入依赖的类
package com.redhat.spark.streaming.jms

import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming._
import org.apache.spark.streaming.dstream._
import org.apache.spark.streaming.receiver.Receiver

private[streaming]
class JMSInputDStream(
                       @transient ssc_ : StreamingContext,
                       brokerURL: String,
                       username: String,
                       password: String,
                       queuename: String,
                       selector: String,
                       storageLevel: StorageLevel
                       ) extends ReceiverInputDStream[JMSEvent](ssc_) {

  override def getReceiver(): Receiver[JMSEvent] = {
    new JMSReceiver(brokerURL, username, password, queuename, selector, storageLevel)
  }
} 
开发者ID:xiaoJacky,项目名称:sparkLearning,代码行数:23,代码来源:JMSInputDStream.scala


示例7: AppConfigs

//设置package包名称以及导入依赖的类
package com.groupon.dse.configs

import java.util.Properties

import org.apache.spark.storage.StorageLevel

object AppConfigs {
  val SparkReceivers = ("spark.num.receivers", "1")
  val SparkStorageLevel = ("spark.storage.level", "MEMORY_AND_DISK_SER_2")

  val Topics = ("topics", "")
  val TopicsBlackList = ("topics.blacklist", "")
  val TopicsEnableBlockingConsumption = ("topic.consumption.blocking", "false")
  val TopicConsumptionPolicy = ("topic.consumption.policy", "OFFSET")
  val TopicConsumptionOffsetThreshold = ("topic.consumption.offset.threshold", "0")
  val TopicConsumptionTimeThresholdMs = ("topic.consumption.time.threshold.ms", "1000")
  val TopicFetchSizeBytes = ("topic.fetch.size.bytes", "1048576")
  val TopicRepartitionFactor = ("topic.repartition.factor", "1")
  val TopicStartOffset = ("topic.start.offset", "-1") //-1: Max, -2: Min, Other: Actual offset value

  val PartitionRefreshIntervalMs = ("partition.refresh.interval.ms", "30000")
  val PartitionWarmUpRefreshIntervalMs = ("partition.warmup.refresh.interval.ms", "10000")
  val ReceiverRestIntervalOnFailMs = ("receiver.rest.interval.fail.ms", "2500")
  val ReceiverRestIntervalOnSuccessMs = ("receiver.rest.interval.success.ms", "100")

  val KafkaBrokerConnect = ("kafka.broker.zk.connect", "")
  val KafkaSocketTimeoutMs = ("kafka.socket.timeout", "10000")
  val KafkaSocketBufferSizeBytes = ("kafka.socket.buffer.size", "1048576")
  val KafkaZkSessionTimeoutMs = ("kafka.zk.session.timeout.ms", "10000")
  val KafkaZkConnectionTimeoutMs = ("kafka.zk.connection.timeout.ms", "10000")

  val StateControllerType = ("statecontroller.type", "MEMORY")
  val ZookeeperStateControllerConnect = ("statecontroller.zk.connect", "")
  val ZookeeperStateControllerRoot = ("statecontroller.zk.root", "/baryon")
  val ZookeeperStateControllerConnTimeoutMs = ("statecontroller.zk.conn.timeout.ms", "120000")
  val ZookeeperStateControllerSessionTimeoutMs = ("statecontroller.zk.session.timeout.ms", "60000")

  val TopicFetcherType = ("topics.fetcher.type", "LOCAL")
  val HDFSTopicSource = ("topics.fetcher.hdfs.source", "")
  val HTTPTopicSource = ("topics.fetcher.http.source", "")

  
  def validatedBooleanConfig(
                              properties: Properties,
                              propertyName: String,
                              propertyDefault: String)
  : Boolean = {
    properties.getProperty(propertyName, propertyDefault) match {
      case "true" => true
      case "false" => false
      case _ => throw InvalidConfigException(s"$propertyName should be set to true or false")
    }
  }

  case class MissingConfigException(message: String) extends Exception(message)

  case class InvalidConfigException(message: String) extends Exception(message)

} 
开发者ID:groupon,项目名称:baryon,代码行数:60,代码来源:AppConfigs.scala


示例8: NetCDF

//设置package包名称以及导入依赖的类
package se.kth.climate.fast.netcdf

import se.kth.climate.fast.common.Metadata
import se.kth.climate.fast.netcdf.hadoop._
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
import com.typesafe.scalalogging.LazyLogging
import scala.concurrent.Future
import scala.concurrent.ExecutionContext
import com.google.gson.Gson
import ucar.nc2.NetcdfFile

object NetCDF extends LazyLogging {
  def metaDataAsync(path: String)(implicit sc: SparkContext, ec: ExecutionContext): Future[Metadata] = {
    Future {
      NetCDF.metaData(path)
    }
  }

  def metaData(path: String)(implicit sc: SparkContext): Metadata = {
    val metaSRDD = sc.textFile(path + "/metadata.json", 1);
    val metaS = metaSRDD.collect().mkString;
    val gson = new Gson();
    gson.fromJson(metaS, classOf[Metadata]);
  }

  def rawData(path: String)(implicit sc: SparkContext): RDD[NetcdfFile] = {
    val rdd = sc.newAPIHadoopFile[Void, NCWritable, NetCDFFileFormat](path)
    val ncrdd = rdd.map {
      case (_, v) => {
        val ncfile = v.get;
        //ncfile.setImmutable(); // can't write them out, so don't let anyone mutate them
        ncfile
      }
    }
    //ncrdd.persist(StorageLevel.MEMORY_ONLY_SER);
    ncrdd.cache();
    ncrdd
  }
} 
开发者ID:ClimateFAST,项目名称:FASTSpark,代码行数:42,代码来源:NetCDF.scala


示例9: RedditUtils

//设置package包名称以及导入依赖的类
package com.github.catalystcode.fortis.spark.streaming.reddit

import com.github.catalystcode.fortis.spark.streaming.reddit.client.RedditClient
import com.github.catalystcode.fortis.spark.streaming.reddit.dto.RedditObject
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.ReceiverInputDStream

object RedditUtils {
  def createPageStream(redditAuth: RedditAuth,
                       keywords: Seq[String],
                       ssc: StreamingContext,
                       storageLevel: StorageLevel = StorageLevel.MEMORY_ONLY,
                       pollingPeriodInSeconds: Int = 3,
                       subredit: Option[String] = None,
                       searchLimit: Int = 25,
                       searchResultType: Option[String] = Option("link")
  ): ReceiverInputDStream[RedditObject] = {
    return new RedditInputDStream(
      client = new RedditClient(redditAuth.applicationId, redditAuth.secret),
      keywords = keywords,
      ssc = ssc,
      storageLevel = storageLevel,
      subredit = subredit,
      searchLimit = searchLimit,
      searchResultType = searchResultType,
      pollingPeriodInSeconds = pollingPeriodInSeconds)
  }
} 
开发者ID:CatalystCode,项目名称:streaming-reddit,代码行数:30,代码来源:RedditUtils.scala


示例10: GraphProviders

//设置package包名称以及导入依赖的类
package ml.sparkling.graph.loaders.csv.providers

import ml.sparkling.graph.loaders.csv.types.Types
import ml.sparkling.graph.loaders.csv.types.Types.ToVertexId
import org.apache.spark.graphx.{Edge, Graph, VertexId}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.sql.SparkSession;
import scala.reflect.ClassTag


object GraphProviders {
  val defaultStorageLevel=StorageLevel.MEMORY_ONLY
  def simpleGraphBuilder[VD: ClassTag, ED: ClassTag](defaultVertex: Option[VD]=None,
                                                     vertexProvider: Row => Seq[(VertexId, VD)],
                                                     edgeProvider: Row => Seq[Edge[ED]],
                                                     edgeStorageLevel: StorageLevel = defaultStorageLevel,
                                                     vertexStorageLevel: StorageLevel =defaultStorageLevel)
                                                    (dataFrame: DataFrame): Graph[VD, ED] = {

    def mapRows[MT: ClassTag](mappingFunction: (Row) => Seq[MT]): RDD[MT] = {
      dataFrame.rdd.mapPartitionsWithIndex((id, rowIterator) => {
        rowIterator.flatMap { case row => mappingFunction(row) }
      })
    }

    val vertices: RDD[(VertexId, VD)] = mapRows(vertexProvider)
    val edges: RDD[Edge[ED]] = mapRows(edgeProvider)
    defaultVertex match{
      case None => Graph(vertices,edges,edgeStorageLevel=edgeStorageLevel,vertexStorageLevel=vertexStorageLevel)
      case Some(defaultVertexValue)=> Graph(vertices,edges,defaultVertexValue,edgeStorageLevel,vertexStorageLevel)
    }

  }

  def indexedGraphBuilder[VD:ClassTag, ED: ClassTag](defaultVertex: Option[VD]=None,
                                                      vertexProvider: (Row, ToVertexId[VD]) => Seq[(VertexId, VD)],
                                                      edgeProvider: (Row, ToVertexId[VD]) => Seq[Edge[ED]],
                                                      columnsToIndex: Seq[Int],
                                                      edgeStorageLevel: StorageLevel = defaultStorageLevel,
                                                      vertexStorageLevel: StorageLevel = defaultStorageLevel)
                                                     (dataFrame: DataFrame): Graph[VD, ED] = {
    val index = dataFrame.rdd.flatMap(row => columnsToIndex.map(row(_))).distinct().zipWithUniqueId().collect().toMap
    def extractIdFromIndex(vertex: VD) = index(vertex)
    simpleGraphBuilder(defaultVertex,
      vertexProvider(_: Row, extractIdFromIndex _),
      edgeProvider(_: Row, extractIdFromIndex _),
      edgeStorageLevel,
      vertexStorageLevel)(dataFrame)

  }
} 
开发者ID:sparkling-graph,项目名称:sparkling-graph,代码行数:54,代码来源:GraphProviders.scala


示例11: createStream

//设置package包名称以及导入依赖的类
package it.agilelab.bigdata.wasp.consumers.readers

import it.agilelab.bigdata.wasp.core.WaspSystem
import it.agilelab.bigdata.wasp.core.WaspSystem._
import it.agilelab.bigdata.wasp.core.kafka.CheckOrCreateTopic
import it.agilelab.bigdata.wasp.core.logging.WaspLogger
import it.agilelab.bigdata.wasp.core.models.{DefaultConfiguration, TopicModel}
import it.agilelab.bigdata.wasp.core.utils.{AvroToJsonUtil, ConfigManager, JsonToByteArrayUtil}
import kafka.serializer.{DefaultDecoder, StringDecoder}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.kafka.KafkaUtils



  //TODO: check warning (not understood)
  def createStream(group: String, topic: TopicModel)(implicit ssc: StreamingContext): DStream[String] = {
    val kafkaConfig = ConfigManager.getKafkaConfig

    val kafkaConfigMap: Map[String, String] = Map(
      "zookeeper.connect" -> kafkaConfig.zookeeper.toString,
      "zookeeper.connection.timeout.ms" -> kafkaConfig.zookeeper.timeout.getOrElse(DefaultConfiguration.timeout).toString
    )


    if (??[Boolean](WaspSystem.getKafkaAdminActor, CheckOrCreateTopic(topic.name, topic.partitions, topic.replicas))) {
      val receiver = KafkaUtils.createStream[String, Array[Byte], StringDecoder, DefaultDecoder](
        ssc,
        kafkaConfigMap + ("group.id" -> group),
        Map(topic.name -> 3),
        StorageLevel.MEMORY_AND_DISK_2
      )

      topic.topicDataType match {
        case "avro" => receiver.map(x => (x._1, AvroToJsonUtil.avroToJson(x._2))).map(_._2)
        case "json" => receiver.map(x => (x._1, JsonToByteArrayUtil.byteArrayToJson(x._2))).map(_._2)
        case _ => receiver.map(x => (x._1, AvroToJsonUtil.avroToJson(x._2))).map(_._2)
      }

    } else {
      logger.error(s"Topic not found on Kafka: $topic")
      throw new Exception(s"Topic not found on Kafka: $topic")
    }
  }
} 
开发者ID:agile-lab-dev,项目名称:wasp,代码行数:47,代码来源:KafkaReader.scala


示例12: TwitterInputDStream

//设置package包名称以及导入依赖的类
package com.aluxian.tweeather.streaming

import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.receiver.Receiver
import twitter4j.auth.{Authorization, OAuthAuthorization}
import twitter4j.conf.ConfigurationBuilder
import twitter4j.{FilterQuery, Status}


class TwitterInputDStream(@transient ssc: StreamingContext,
                          twitterAuth: Option[Authorization],
                          filterQuery: Option[FilterQuery],
                          storageLevel: StorageLevel
                         ) extends ReceiverInputDStream[Status](ssc) {

  private val authorization = twitterAuth.getOrElse(createOAuthAuthorization())

  private def createOAuthAuthorization(): Authorization = {
    new OAuthAuthorization(new ConfigurationBuilder().build())
  }

  override def getReceiver(): Receiver[Status] = {
    new TwitterReceiver(authorization, filterQuery, storageLevel)
  }

} 
开发者ID:cnajeefa,项目名称:Tourism-Sentiment-Analysis,代码行数:29,代码来源:TwitterInputDStream.scala


示例13: TwitterUtils

//设置package包名称以及导入依赖的类
package com.aluxian.tweeather.streaming

import java.util.Properties

import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream
import twitter4j.auth.{AccessToken, Authorization}
import twitter4j.{FilterQuery, Status, TwitterFactory}

object TwitterUtils {

  
  def createMultiStream(ssc: StreamingContext,
                        queryBuilder: () => FilterQuery = () => null,
                        credentials: Seq[Authorization] = loadDefaultCredentials(),
                        storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER
                       ): DStream[Status] = {
    credentials
      .map(auth => createStream(ssc, Some(queryBuilder()), Some(auth)))
      .reduce { (accStream, stream) => accStream.union(stream) }
  }

  private def loadDefaultCredentials(): Seq[Authorization] = {
    val props = loadTwitterProperties()
    val num = props.getProperty("twitter.credentials").toInt
    1.to(num).map(i => {
      val twitter = new TwitterFactory().getInstance()

      twitter.setOAuthConsumer(
        props.getProperty(s"twitter.credentials.$i.consumerKey"),
        props.getProperty(s"twitter.credentials.$i.consumerSecret")
      )

      twitter.setOAuthAccessToken(new AccessToken(
        props.getProperty(s"twitter.credentials.$i.token"),
        props.getProperty(s"twitter.credentials.$i.tokenSecret")
      ))

      twitter.getAuthorization
    })
  }

  private def loadTwitterProperties(): Properties = {
    val properties = new Properties()
    val stream = getClass.getResourceAsStream("/com/aluxian/tweeather/res/twitter.properties")
    properties.load(stream)
    stream.close()
    properties
  }

} 
开发者ID:cnajeefa,项目名称:Tourism-Sentiment-Analysis,代码行数:53,代码来源:TwitterUtils.scala


示例14: FacebookPostReceiver

//设置package包名称以及导入依赖的类
package com.github.catalystcode.fortis.spark.streaming.facebook

import java.util.Date

import com.github.catalystcode.fortis.spark.streaming.facebook.client.FacebookPageClient
import com.github.catalystcode.fortis.spark.streaming.facebook.dto.FacebookPost
import com.github.catalystcode.fortis.spark.streaming.{PollingReceiver, PollingSchedule}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.receiver.Receiver

private class FacebookPostReceiver(
  clients: Set[FacebookPageClient],
  pollingSchedule: PollingSchedule,
  storageLevel: StorageLevel,
  pollingWorkers: Int
) extends PollingReceiver[FacebookPost](pollingSchedule, pollingWorkers, storageLevel) with Logger {

  @volatile private var lastIngestedDate: Option[Date] = None

  override protected def poll(): Unit = {
    clients.par.foreach(_
      .loadNewFacebookPosts(lastIngestedDate)
      .filter(x => {
        logDebug(s"Got facebook ${x.post.getPermalinkUrl} from page ${x.pageId} time ${x.post.getCreatedTime}")
        isNew(x)
      })
      .foreach(x => {
        logInfo(s"Storing facebook ${x.post.getPermalinkUrl}")
        store(x)
        markStored(x)
      })
    )
  }

  private def isNew(item: FacebookPost) = {
    lastIngestedDate.isEmpty || item.post.getCreatedTime.after(lastIngestedDate.get)
  }

  private def markStored(item: FacebookPost): Unit = {
    if (isNew(item)) {
      lastIngestedDate = Some(item.post.getCreatedTime)
      logDebug(s"Updating last ingested date to ${item.post.getCreatedTime}")
    }
  }
}

class FacebookPostInputDStream(
  ssc: StreamingContext,
  clients: Set[FacebookPageClient],
  pollingSchedule: PollingSchedule,
  pollingWorkers: Int,
  storageLevel: StorageLevel
) extends ReceiverInputDStream[FacebookPost](ssc) {

  override def getReceiver(): Receiver[FacebookPost] = {
    logDebug("Creating facebook receiver")
    new FacebookPostReceiver(clients, pollingSchedule, storageLevel, pollingWorkers)
  }
} 
开发者ID:CatalystCode,项目名称:streaming-facebook,代码行数:62,代码来源:FacebookPostInputDStream.scala


示例15: FacebookCommentsReceiver

//设置package包名称以及导入依赖的类
package com.github.catalystcode.fortis.spark.streaming.facebook

import java.util.Date

import com.github.catalystcode.fortis.spark.streaming.facebook.client.FacebookPageClient
import com.github.catalystcode.fortis.spark.streaming.facebook.dto.FacebookComment
import com.github.catalystcode.fortis.spark.streaming.{PollingReceiver, PollingSchedule}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.receiver.Receiver

private class FacebookCommentsReceiver(
  clients: Set[FacebookPageClient],
  pollingSchedule: PollingSchedule,
  storageLevel: StorageLevel,
  pollingWorkers: Int
) extends PollingReceiver[FacebookComment](pollingSchedule, pollingWorkers, storageLevel) with Logger {

  @volatile private var lastIngestedDate: Option[Date] = None

  override protected def poll(): Unit = {
    clients.par.foreach(_
      .loadNewFacebookComments(lastIngestedDate)
      .filter(x => {
        logDebug(s"Got comment with id ${x.comment.getId} from page ${x.pageId}")
        isNew(x)
      })
      .foreach(x => {
        logInfo(s"Storing comment ${x.comment.getId} from page ${x.pageId}")
        store(x)
        markStored(x)
      })
    )
  }

  private def isNew(item: FacebookComment) = {
    lastIngestedDate.isEmpty || item.comment.getCreatedTime.after(lastIngestedDate.get)
  }

  private def markStored(item: FacebookComment): Unit = {
    if (isNew(item)) {
      lastIngestedDate = Some(item.comment.getCreatedTime)
      logDebug(s"Updating last ingested date to ${lastIngestedDate.get}")
    }
  }
}

class FacebookCommentsInputDStream(
  ssc: StreamingContext,
  clients: Set[FacebookPageClient],
  pollingSchedule: PollingSchedule,
  pollingWorkers: Int,
  storageLevel: StorageLevel
) extends ReceiverInputDStream[FacebookComment](ssc) {

  override def getReceiver(): Receiver[FacebookComment] = {
    logDebug("Creating facebook receiver")
    new FacebookCommentsReceiver(clients, pollingSchedule, storageLevel, pollingWorkers)
  }
} 
开发者ID:CatalystCode,项目名称:streaming-facebook,代码行数:62,代码来源:FacebookCommentsInputDStream.scala


示例16: PollingSchedule

//设置package包名称以及导入依赖的类
package com.github.catalystcode.fortis.spark.streaming

import java.util.concurrent.{ScheduledThreadPoolExecutor, TimeUnit}

import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.receiver.Receiver

case class PollingSchedule(interval: Long, unit: TimeUnit, initialDelay: Long = 1)

// Taken from https://github.com/CatalystCode/streaming-instagram/blob/3873a197212ba5929dd54ec4949f3d1ac10ffc1f/src/main/scala/com/github/catalystcode/fortis/spark/streaming/PollingReceiver.scala
// Put this into a shared library at some point
abstract class PollingReceiver[T](
 pollingSchedule: PollingSchedule,
 pollingWorkers: Int,
 storageLevel: StorageLevel
) extends Receiver[T](storageLevel) {

  private var threadPool: ScheduledThreadPoolExecutor = _

  def onStart(): Unit = {
    threadPool = new ScheduledThreadPoolExecutor(pollingWorkers)

    val pollingThread = new Thread("Polling thread") {
      override def run(): Unit = {
        poll()
      }
    }

    threadPool.scheduleAtFixedRate(
      pollingThread, pollingSchedule.initialDelay,
      pollingSchedule.interval, pollingSchedule.unit)
  }

  def onStop(): Unit = {
    if (threadPool != null) {
      threadPool.shutdown()
    }
  }

  protected def poll(): Unit
} 
开发者ID:CatalystCode,项目名称:streaming-facebook,代码行数:42,代码来源:PollingReceiver.scala


示例17: TwitterStream

//设置package包名称以及导入依赖的类
package io.gzet.timeseries

import com.google.gson.GsonBuilder
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.twitter.TwitterUtils
import org.apache.spark.streaming.{Minutes, StreamingContext}
import org.apache.spark.{Logging, SparkConf, SparkContext}
import twitter4j.auth.OAuthAuthorization
import twitter4j.conf.ConfigurationBuilder

import scala.util.Try

object TwitterStream extends SimpleConfig with Logging {

  def getTwitterStream(ssc: StreamingContext, filters: Seq[String] = Nil) = {
    val builder = new ConfigurationBuilder()
    builder.setOAuthConsumerKey(twitterApiKey)
    builder.setOAuthConsumerSecret(twitterApiSecret)
    builder.setOAuthAccessToken(twitterTokenKey)
    builder.setOAuthAccessTokenSecret(twitterTokenSecret)
    val configuration = builder.build()
    TwitterUtils.createStream(
      ssc,
      Some(new OAuthAuthorization(configuration)),
      filters,
      StorageLevel.MEMORY_ONLY
    )
  }

  def main(args: Array[String]) = {

    val sparkConf = new SparkConf().setAppName("Twitter Extractor")
    val sc = new SparkContext(sparkConf)
    val ssc = new StreamingContext(sc, Minutes(5))

    val twitterStream = getTwitterStream(ssc, args).mapPartitions({ it =>
      val gson = new GsonBuilder().create()
      it map { s =>
        Try(gson.toJson(s))
      }
    })

    twitterStream
      .filter(_.isSuccess)
      .map(_.get)
      .saveAsTextFiles("twitter")

    // Start streaming context
    ssc.start()
    ssc.awaitTermination()

  }

} 
开发者ID:PacktPublishing,项目名称:Mastering-Spark-for-Data-Science,代码行数:55,代码来源:TwitterStream.scala


示例18:

//设置package包名称以及导入依赖的类
import org.apache.spark._
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
import org.apache.spark.SparkContext._
import collection.mutable.HashMap
import java.io._
//import org.apache.spark.graphx.VertexId


  val edgesTuples = sc.textFile(path).flatMap { line =>
    if (!line.isEmpty && line(0) != '#') {
      val lineArray = line.split("\\s+")
      if (lineArray.length < 2) {
        None
      } else {
        val srcId = lineArray(0).toLong
        //val attr = // parse lineArray(1) as appropriate
        //val attr = lineArray(1)
        val dstId = lineArray(1).toLong
        Some((srcId, dstId))
      }
    } else {
      None
    }
  }
 
  Graph.fromEdgeTuples(edgesTuples, 1)

val graph2 = GraphLoader.edgeListFile(sc, path, false, -1, edgeStorageLevel=StorageLevel.MEMORY_AND_DISK, vertexStorageLevel=StorageLevel.MEMORY_AND_DISK)
graph2.edges.mapPartitions(part => Iterator(part.flatMap(e => Iterator((e.srcId, e.dstId))).toSet)).collect
graph2.vertices.mapPartitions(part => Iterator(part.flatMap(v => Iterator(v)).toSet)).collect
  

} 
开发者ID:YifanLi,项目名称:GraphPartition,代码行数:36,代码来源:methodsToConstructGraph.scala


示例19: DailyUserTypeDistributionApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToOrderedRDDFunctions
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
import org.apache.spark.streaming.flume.FlumeUtils

object DailyUserTypeDistributionApp {
  def main(args: Array[String]) {
    if (args.length != 5) {
      System.err.println(
        "Usage: DailyUserTypeDistributionApp <appname> <hostname> <port> <checkpointDir> <outputPath>")
      System.exit(1)
    }
    val Seq(appName, hostname, port, checkpointDir, outputPath) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(10))
    ssc.checkpoint(checkpointDir)

    FlumeUtils.createStream(ssc, hostname, port.toInt, StorageLevel.MEMORY_ONLY_SER_2)
      .map(rec => new String(rec.event.getBody().array()).split(","))
      .map(rec => ((rec(1).split(" ")(0), rec(12)), 1))
      .updateStateByKey(statefulCount)
      .repartition(1)
      .transform(rdd => rdd.sortByKey(ascending = false))
      .saveAsTextFiles(outputPath)

    ssc.start()
    ssc.awaitTermination()
  }

  val statefulCount = (values: Seq[Int], state: Option[Int]) => Some(values.sum + state.getOrElse(0))

} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:43,代码来源:L5-11FlumePush.scala


示例20: YearlyDistributionApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToOrderedRDDFunctions
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
import org.apache.spark.streaming.mqtt.MQTTUtils

object YearlyDistributionApp {
  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: YearlyDistributionApp <appname> <brokerUrl> <topic> <checkpointDir>")
      System.exit(1)
    }
    val Seq(appName, brokerUrl, topic, checkpointDir) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(10))
    ssc.checkpoint(checkpointDir)

    MQTTUtils.createStream(ssc, brokerUrl, topic, StorageLevel.MEMORY_ONLY_SER_2)
      .map(rec => rec.split(","))
      .map(rec => (rec(1).split(" ")(0), 1))
      .updateStateByKey(statefulCount)
      .map(pair => (pair._2, pair._1))
      .transform(rec => rec.sortByKey(ascending = false))
      .saveAsTextFiles("YearlyDistribution")

    ssc.start()
    ssc.awaitTermination()
  }

  val statefulCount = (values: Seq[Int], state: Option[Int]) => Some(values.sum + state.getOrElse(0))

} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:43,代码来源:L5-9Mqtt.scala



注:本文中的org.apache.spark.storage.StorageLevel类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Scala StructType类代码示例发布时间:2022-05-23
下一篇:
Scala Route类代码示例发布时间:2022-05-23
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap