• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Scala StorageLevels类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Scala中org.apache.spark.api.java.StorageLevels的典型用法代码示例。如果您正苦于以下问题:Scala StorageLevels类的具体用法?Scala StorageLevels怎么用?Scala StorageLevels使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了StorageLevels类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: ADAMContextExtensions

//设置package包名称以及导入依赖的类
package org.bdgenomics.adam.rdd

import org.apache.hadoop.io.{LongWritable, Text}
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.SparkContext
import org.apache.spark.api.java.StorageLevels
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.converters.FastaConverter
import org.bdgenomics.adam.rdd.contig.NucleotideContigFragmentRDD
import org.bdgenomics.utils.instrumentation.Metrics
import org.apache.spark.rdd.MetricsContext._
import org.bdgenomics.adam.rdd.feature.FeatureRDD
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.Feature


object ADAMContextExtensions {

  implicit class spExt(val sparkContext: SparkContext) extends HDFSFilesExtensions{

    def loadFastaPersistent(
                   filePath: String,
                   fragmentLength: Long = 10000L): NucleotideContigFragmentRDD = {
      val fastaData: RDD[(LongWritable, Text)] = sparkContext.newAPIHadoopFile(
        filePath,
        classOf[TextInputFormat],
        classOf[LongWritable],
        classOf[Text]
      )
      if (Metrics.isRecording) fastaData.instrument() else fastaData

      val remapData = fastaData.map(kv => (kv._1.get, kv._2.toString))

      // convert rdd and cache
      val fragmentRdd = FastaConverter(remapData, fragmentLength)
        .persist(StorageLevels.MEMORY_AND_DISK)

      NucleotideContigFragmentRDD(fragmentRdd)
    }

    def mergeFeatures(features: List[FeatureRDD]): Option[FeatureRDD] = features match {
      case Nil => None
      case head :: Nil => Some(head)
      case head :: tail =>
        val merged = tail.foldLeft(head){
          case (acc, feature) =>
            val joined = acc.broadcastRegionJoin(feature)
            acc.transform(_ => joined.rdd.map{
              case (one, two) =>
                one.setStart(Math.min(one.getStart, two.getStart))
                one.setEnd(Math.max(one.getEnd, two.getEnd))
                one
            })
        }
        Some(merged)
    }

  }


} 
开发者ID:antonkulaga,项目名称:adam-playground,代码行数:62,代码来源:ADAMContextExtensions.scala


示例2: armsGuided

//设置package包名称以及导入依赖的类
package comp.bio.aging.crispr

import org.apache.spark.api.java.StorageLevels
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.models.{ReferencePosition, ReferenceRegion}
import org.bdgenomics.adam.rdd.contig.NucleotideContigFragmentRDD
import comp.bio.aging.playground.extensions._
import scala.collection.immutable.{List, Nil}



trait HomologyArms {

  def armsGuided(fragmentRDD: NucleotideContigFragmentRDD,
           guidedCats: RDD[(String, List[CutDS])],
           left: Long, right: Long, avoidSites: Set[String] = Set.empty, allowOverlap: Boolean = true): RDD[KnockIn] = {
    arms(fragmentRDD, guidedCats.values.flatMap(f=>f), left, right, avoidSites, allowOverlap)
  }

  def arms(fragmentRDD: NucleotideContigFragmentRDD,
           cuts: RDD[CutDS],
           left: Long, right: Long, avoidSites: Set[String] = Set.empty, allowOverlap: Boolean = true): RDD[KnockIn] = {

    val positiveCuts: RDD[(ReferenceRegion, CutDS)] = cuts.filter(_.positive(left)).map{
      case (cut) => cut.armsRegion(left, right) -> cut
    }.persist(StorageLevels.MEMORY_AND_DISK)

    val extracted: RDD[(ReferenceRegion, String)] = fragmentRDD.extractRegions(positiveCuts.keys.collect().toList)
      .filter{
        case (_, str) => !avoidSites.exists( s=> str.contains(s))
      }
    val joined: RDD[(ReferenceRegion, (CutDS, String))] = positiveCuts.join(extracted) //region,guide, value
    joined.map{
      case (region, (cut, regionSeq)) => cut.knockin(regionSeq, region, left, right, allowOverlap)
    }
  }
} 
开发者ID:antonkulaga,项目名称:CRISPR,代码行数:38,代码来源:HomologyArms.scala


示例3: SparkStreamingOnKafkaReceiver

//设置package包名称以及导入依赖的类
package com.jjzhk.sparkexamples.streaming

import org.apache.spark.SparkConf
import org.apache.spark.api.java.StorageLevels
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Durations, StreamingContext}


object SparkStreamingOnKafkaReceiver {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[2]").setAppName("SparkStreamingOnKafkaReceiver")
    val sc = new StreamingContext(conf, Durations.seconds(30))
    val topicMap = Map[String, Int]("HelloKafka" -> 1) // keytopic, value???????
    val lines = KafkaUtils.createStream(sc, "Master:2181,Worker1:2181,Worker2:2181", "MyFirstConsumerGroup", topicMap,
      StorageLevels.MEMORY_AND_DISK_SER_2)

    val words = lines.flatMap(_._2.split(" ")).map((_, 1))

    val wordCounts = words.reduceByKey(_+_)
    wordCounts.print()

    sc.start()
    sc.awaitTermination()
  }
} 
开发者ID:JJZHK,项目名称:MySpark,代码行数:26,代码来源:SparkStreamingOnKafkaReceiver.scala


示例4: SparkStreamPullDataFromFlume

//设置package包名称以及导入依赖的类
package com.jjzhk.sparkexamples.streaming

import org.apache.spark.SparkConf
import org.apache.spark.api.java.StorageLevels
import org.apache.spark.streaming.flume.FlumeUtils
import org.apache.spark.streaming.{Durations, StreamingContext}


object SparkStreamPullDataFromFlume {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local[4]").setAppName("SparkStreamPullDataFromFlume")
    val sc = new StreamingContext(conf, Durations.seconds(30))
    val lines = FlumeUtils.createPollingStream(sc, "Master", 9898, StorageLevels.MEMORY_ONLY)
    val words = lines.map(e => e.event).flatMap(event => {
      val s = new String(event.getBody.array())
      s.split(" ")
    }).map((_, 1))

    val wordCounts = words.reduceByKey(_+_)
    wordCounts.print()

    sc.start()
    sc.awaitTermination()
  }
} 
开发者ID:JJZHK,项目名称:MySpark,代码行数:27,代码来源:SparkStreamPullDataFromFlume.scala



注:本文中的org.apache.spark.api.java.StorageLevels类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Scala PutRecordRequest类代码示例发布时间:2022-05-23
下一篇:
Scala Http类代码示例发布时间:2022-05-23
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap