• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Scala ShuffledRDD类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Scala中org.apache.spark.rdd.ShuffledRDD的典型用法代码示例。如果您正苦于以下问题:Scala ShuffledRDD类的具体用法?Scala ShuffledRDD怎么用?Scala ShuffledRDD使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了ShuffledRDD类的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: PointsPartitionedByBoxesRDD

//设置package包名称以及导入依赖的类
package org.alitouka.spark.dbscan.spatial.rdd

import org.alitouka.spark.dbscan.spatial.{BoxCalculator, PointSortKey, Box, Point}
import org.apache.spark.rdd.{ShuffledRDD, RDD}
import org.alitouka.spark.dbscan._
import org.apache.spark.SparkContext
import org.alitouka.spark.dbscan.util.PointIndexer


private [dbscan] class PointsPartitionedByBoxesRDD  (prev: RDD[(PointSortKey, Point)], val boxes: Iterable[Box], val boundingBox: Box)
  extends ShuffledRDD [PointSortKey, Point, Point] (prev, new BoxPartitioner(boxes))

object PointsPartitionedByBoxesRDD {

  def apply (rawData: RawDataSet,
    partitioningSettings: PartitioningSettings = new PartitioningSettings (),
    dbscanSettings: DbscanSettings = new DbscanSettings ())
    : PointsPartitionedByBoxesRDD = {

    val sc = rawData.sparkContext
    val boxCalculator = new BoxCalculator (rawData)
    val (boxes, boundingBox) = boxCalculator.generateDensityBasedBoxes (partitioningSettings, dbscanSettings)
    val broadcastBoxes = sc.broadcast(boxes)
    var broadcastNumberOfDimensions = sc.broadcast (boxCalculator.numberOfDimensions)

    val pointsInBoxes = PointIndexer.addMetadataToPoints(
      rawData,
      broadcastBoxes,
      broadcastNumberOfDimensions,
      dbscanSettings.distanceMeasure)

    PointsPartitionedByBoxesRDD (pointsInBoxes, boxes, boundingBox)
  }

  def apply (pointsInBoxes: RDD[(PointSortKey, Point)], boxes: Iterable[Box], boundingBox: Box): PointsPartitionedByBoxesRDD = {
    new PointsPartitionedByBoxesRDD(pointsInBoxes, boxes, boundingBox)
  }


  private [dbscan] def extractPointIdsAndCoordinates (data: RDD[(PointSortKey, Point)]): RDD[(PointId, PointCoordinates)] = {
    data.map ( x => (x._2.pointId, x._2.coordinates) )
  }

} 
开发者ID:isaacboucinha,项目名称:CardioStream,代码行数:45,代码来源:PointsPartitionedByBoxesRDD.scala


示例2: TeraSort

//设置package包名称以及导入依赖的类
package com.microsoft.spark.perf.core

import scala.collection.mutable.ArrayBuffer

import com.microsoft.spark.perf.Benchmarkable
import com.microsoft.spark.perf.core.report.CoreBenchmarkResult
import com.microsoft.spark.perf.report.{BenchmarkResult, ExecutionMode, Failure}
import com.microsoft.spark.perf.report.ExecutionMode.{ForeachResults, WriteTextFile}

import org.apache.spark.SparkContext
import org.apache.spark.rdd.{RDD, ShuffledRDD}

private[core] class TeraSort (
    @transient sparkContext: SparkContext,
    inputPath: String,
    outputPath: String = "",
    inputPartitions: Int,
    outputPartitions: Int,
    override protected val executionMode: ExecutionMode = ExecutionMode.ForeachResults)
  extends Benchmarkable(sparkContext) {

  override val name = "TeraSort"

  private[core] def generateInputRecords: RDD[RecordWrapper] = {
    println(s"Reading from $inputPath")
    val inputData = sparkContext.objectFile[Array[Byte]](inputPath, inputPartitions).cache()
    val dataset = inputData.map(record => new RecordWrapper(record)).cache()
    dataset
  }

  override protected def doBenchmark(
    includeBreakdown: Boolean,
    description: String,
    messages: ArrayBuffer[String]): BenchmarkResult = {
    try {
      val dataset = generateInputRecords
      val partitioner = new TeraSortPartitioner(outputPartitions * 2)
      val output = new ShuffledRDD[Array[Byte], Array[Byte], Array[Byte]](dataset, partitioner).
        cache()
      output.setSerializer(new TeraSortSerializer)
      output.setKeyOrdering(new TeraSortRecordOrdering)

      val executionTime = measureTimeMs {
        executionMode match {
          case ForeachResults =>
            output.foreach(_ => Unit)
          case WriteTextFile(outputPath) =>
            output.map(result => UnsafeUtils.getUnsafeInstance.getLong(result._1, 16)).
              saveAsTextFile(outputPath)
        }
      }
      new CoreBenchmarkResult(name, executionMode.toString, executionTime = Some(executionTime))
    } catch {
      case e: Exception =>
        e.printStackTrace()
        BenchmarkResult(name, executionMode.toString,
          failure = Some(Failure(e.getClass.getName, e.getMessage)))
    }
  }
} 
开发者ID:hdinsight,项目名称:SparkPerf,代码行数:61,代码来源:TeraSort.scala



注:本文中的org.apache.spark.rdd.ShuffledRDD类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Scala Path类代码示例发布时间:2022-05-23
下一篇:
Scala OneForOneStrategy类代码示例发布时间:2022-05-23
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap