Scala Partitioner类代码示例

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› Scala›Scala教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了Scala中org.apache.spark.Partitioner类的典型用法代码示例。如果您正苦于以下问题：Scala Partitioner类的具体用法？Scala Partitioner怎么用？Scala Partitioner使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了Partitioner类的13个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: ImageSectorPartitioner

//设置package包名称以及导入依赖的类
package org.pfcoperez.sparkmandelbrot.partitioners

import org.apache.spark.Partitioner
import org.pfcoperez.geometry.Primitives2D.{PixelFrame, Pixel, sector}

class ImageSectorPartitioner(val sectorSize: (Int, Int), val pixelFrame: PixelFrame) extends Partitioner {

  override def numPartitions: Int = {
    val sectorArea: Long = sectorSize.productIterator.reduce[Any] {
      case (a: Int, b: Int) => a.toLong*b.toLong
    }.asInstanceOf[Long]
    (pixelFrame.area.toDouble/sectorArea.toDouble).ceil.toInt
  }

  override def getPartition(key: Any): Int = key match {
    case (x: Int, y: Int) =>
      val asLongPairSectorSize = (sectorSize._1.toLong, sectorSize._2.toLong)
      val p = Pixel(x, y)
      sector(p, asLongPairSectorSize)(pixelFrame).toInt
  }

  override def equals(obj: scala.Any): Boolean = obj match {
    case that: ImageSectorPartitioner =>
      that.sectorSize == sectorSize && that.pixelFrame == pixelFrame
    case _ => false
  }
}

开发者ID:pfcoperez，项目名称:mandelbrot_spark，代码行数:28，代码来源:ImageSectorPartitioner.scala

示例2: GridPartitioner

//设置package包名称以及导入依赖的类
package se.kth.climate.fast.math

import org.apache.spark.Partitioner

class GridPartitioner(
    val rows: Int,
    val cols: Int,
    val rowsPerPart: Int,
    val colsPerPart: Int) extends Partitioner {

  require(rows > 0)
  require(cols > 0)
  require(rowsPerPart > 0)
  require(colsPerPart > 0)

  private val rowPartitions = math.ceil(rows * 1.0 / rowsPerPart).toInt
  private val colPartitions = math.ceil(cols * 1.0 / colsPerPart).toInt

  override val numPartitions: Int = rowPartitions * colPartitions

  
  def apply(rows: Int, cols: Int, suggestedNumPartitions: Int): GridPartitioner = {
    require(suggestedNumPartitions > 0)
    val scale = 1.0 / math.sqrt(suggestedNumPartitions)
    val rowsPerPart = math.round(math.max(scale * rows, 1.0)).toInt
    val colsPerPart = math.round(math.max(scale * cols, 1.0)).toInt
    new GridPartitioner(rows, cols, rowsPerPart, colsPerPart)
  }
}

开发者ID:ClimateFAST，项目名称:FASTSpark，代码行数:30，代码来源:GridPartitioner.scala

示例3: HashAwarePartitioner

//设置package包名称以及导入依赖的类
package org.apache.spark.lineage

import org.apache.spark.Partitioner
import org.apache.spark.util.Utils

class HashAwarePartitioner(partitions: Int) extends Partitioner {

  def numPartitions = partitions

  def getPartition(key: Any): Int = Utils.nonNegativeMod(key.asInstanceOf[Int], numPartitions)

  override def equals(other: Any): Boolean = other match {
    case h: HashAwarePartitioner =>
      h.numPartitions == numPartitions
    case _ =>
      false
  }
}

开发者ID:lmd1993，项目名称:bigsiftParallel，代码行数:19，代码来源:HashAwarePartitioner.scala

示例4: LocalityAwarePartitioner

//设置package包名称以及导入依赖的类
package org.apache.spark.lineage

import org.apache.spark.Partitioner
import org.apache.spark.lineage.LineageContext._
import org.apache.spark.util.PackIntIntoLong

class LocalityAwarePartitioner(partitions: Int) extends Partitioner {

  def numPartitions = partitions

  def getPartition(key: Any): Int = key.asInstanceOf[RecordId]._1

  override def equals(other: Any): Boolean = other match {
    case h: LocalityAwarePartitioner =>
      h.numPartitions == numPartitions
    case _ =>
      false
  }
}

开发者ID:lmd1993，项目名称:bigsiftParallel，代码行数:20，代码来源:LocalityAwarePartitioner.scala

示例5: SimpleCustomPartitioner

//设置package包名称以及导入依赖的类
package com.malaska.spark.training.partitioning

import org.apache.spark.Partitioner
import org.apache.spark.sql.SparkSession

object SimpleCustomPartitioner {
  def main(args:Array[String]): Unit = {

    val jsonPath = args(0)
    val partitions = args(1).toInt

    val sparkSession = SparkSession.builder
      .master("local")
      .appName("my-spark-app")
      .config("spark.some.config.option", "config-value")
      .getOrCreate()

    val jsonDf = sparkSession.read.json(jsonPath)

    val partitionedRdd = jsonDf.rdd.map(row => {
      val group = row.getAs[String]("group")
      val time = row.getAs[Long]("time")
      val value = row.getAs[Long]("value")
      ((group, time), value) //this a tuple with in a tuple
    }).repartitionAndSortWithinPartitions(new SimpleCustomPartitioner(partitions))

    val pairRdd = jsonDf.rdd.map(row => {
      val group = row.getAs[String]("group")
      val time = row.getAs[Long]("time")
      val value = row.getAs[Long]("value")
      ((group, time), value) //this a tuple with in a tuple
    })

    pairRdd.reduceByKey(_ + _, 100)
    pairRdd.reduceByKey(new SimpleCustomPartitioner(partitions), _ + _)


    partitionedRdd.collect().foreach(r => {
      println(r)
    })

    sparkSession.stop()
  }
}

class SimpleCustomPartitioner(numOfParts:Int) extends Partitioner {
  override def numPartitions: Int = numOfParts

  override def getPartition(key: Any): Int = {
    val k = key.asInstanceOf[(String, Long)]
    Math.abs(k._1.hashCode) % numPartitions
  }
}

开发者ID:TedBear42，项目名称:spark_training，代码行数:54，代码来源:SimpleCustomPartitioner.scala

示例6: AppleCustomPartitioner

//设置package包名称以及导入依赖的类
package com.malaska.spark.training.partitioning

import java.util.Random

import org.apache.spark.Partitioner


class AppleCustomPartitioner(numOfParts:Int) extends Partitioner {
  override def numPartitions: Int = numOfParts
  def random = new Random()

  override def getPartition(key: Any): Int = {
    val k = key.asInstanceOf[(String, Long)]
    val ticker = k._1
    if (ticker.equals("apple")) {
      val saltedTicker = ticker + random.nextInt(9)
      Math.abs(saltedTicker.hashCode) % numPartitions
    } else {
      Math.abs(ticker.hashCode) % numPartitions
    }
  }
}

开发者ID:TedBear42，项目名称:spark_training，代码行数:23，代码来源:AppleCustomPartitioner.scala

示例7: GainChartPartitioner

//设置package包名称以及导入依赖的类
package com.paypal.risk.smunf.gainchart

import com.paypal.risk.smunf.util.CommonFunction
import org.apache.spark.Partitioner


class GainChartPartitioner(partitions: Int) extends Partitioner {
  def numPartitions = partitions

  def getPartition(key: Any): Int = {
    val value = key match {
      case x: GainChartIndicator => x.hashCode
      case x: GainChartRecord => x.indicator.hashCode
      case null => 0
      case _ => key.hashCode
    }
    CommonFunction.nonNegativeMod(value, numPartitions)
  }

  override def equals(other: Any): Boolean = other match {
    case h: GainChartPartitioner => h.numPartitions == numPartitions
    case _ => false
  }

  override def hashCode: Int = numPartitions
}

开发者ID:yanlzhang8936，项目名称:Smunf，代码行数:27，代码来源:GainChartPartitioner.scala

示例8: VariablePartitioner

//设置package包名称以及导入依赖的类
package com.paypal.risk.smunf.util

import org.apache.spark.Partitioner


class VariablePartitioner(partitions: Int, useLabel: Boolean) extends Partitioner{
  def numPartitions = partitions

  def getPartition(key: Any): Int = {
    val value = key match {
      case x: VariableRecord => x.id.hashCode + { if (useLabel) x.label.hashCode else 0 }
      case null => 0
      case _ => key.hashCode
    }
    CommonFunction.nonNegativeMod(value, numPartitions)
  }

  override def equals(other: Any): Boolean = other match {
    case h: VariablePartitioner => h.numPartitions == numPartitions
    case _ => false
  }

  override def hashCode: Int = numPartitions
}

开发者ID:yanlzhang8936，项目名称:Smunf，代码行数:25，代码来源:VariablePartitioner.scala

示例9: SizePartitioner

//设置package包名称以及导入依赖的类
package com.paypal.risk.smunf.math.stats

import com.paypal.risk.smunf.util.CommonFunction
import org.apache.spark.Partitioner


class SizePartitioner(partitions: Int) extends Partitioner {
  def numPartitions = partitions

  def getPartition(key: Any): Int = {
    val value = key match {
      case x: Int => x
      case null => 0
      case _ => key.hashCode
    }
    CommonFunction.nonNegativeMod(value, numPartitions)
  }

  override def equals(other: Any): Boolean = other match {
    case h: SizePartitioner => h.numPartitions == numPartitions
    case _ => false
  }

  override def hashCode: Int = numPartitions
}

开发者ID:yanlzhang8936，项目名称:Smunf，代码行数:26，代码来源:SizePartitioner.scala

示例10: parallelize

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.rdd

import org.apache.spark.Partitioner
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession

import scala.collection.{GenIterable, GenMap, Map}
import scala.reflect.ClassTag

trait PairRDDFunctionsAPI[K, V] {
  protected lazy val spark: SparkSession = SparkSession.builder().getOrCreate()
  protected def parallelize[That: ClassTag](d: Seq[That]): RDD[That] = spark.sparkContext.parallelize(d)
  protected def parallelize[That: ClassTag](d: GenIterable[That]): RDD[That] = parallelize(d.toList)

  def mapValues[U: ClassTag](f: (V) => U): RDDAPI[(K, U)]
  def keys: RDDAPI[K]
  def values: RDDAPI[V]
  def flatMapValues[U: ClassTag](f: (V) => TraversableOnce[U]): RDDAPI[(K, U)]
  def countByKey(): GenMap[K, Long]
  def reduceByKey(func: (V, V) => V): RDDAPI[(K, V)]
  def reduceByKey(func: (V, V) => V, numPartitions: Int): RDDAPI[(K, V)]
  def reduceByKey(partitioner: Partitioner, func: (V, V) => V): RDDAPI[(K, V)]
  def reduceByKeyLocally(func: (V, V) => V): Map[K, V]
  def groupByKey(): RDDAPI[(K, GenIterable[V])]
  def groupByKey(numPartitions: Int): RDDAPI[(K, GenIterable[V])]
  def groupByKey(partitioner: Partitioner): RDDAPI[(K, GenIterable[V])]
  def foldByKey(zeroValue: V)(func: (V, V) => V): RDDAPI[(K, V)]
  def foldByKey(zeroValue: V, numPartitions: Int)(func: (V, V) => V): RDDAPI[(K, V)]
  def foldByKey(zeroValue: V, partitioner: Partitioner)(func: (V, V) => V): RDDAPI[(K, V)]
  def join[W: ClassTag](other: RDDAPI[(K, W)]): RDDAPI[(K, (V, W))]
  def join[W: ClassTag](other: RDDAPI[(K, W)], numPartitions: Int): RDDAPI[(K, (V, W))]
  def join[W: ClassTag](other: RDDAPI[(K, W)], partitioner: Partitioner): RDDAPI[(K, (V, W))]
  def leftOuterJoin[W: ClassTag](other: RDDAPI[(K, W)]): RDDAPI[(K, (V, Option[W]))]
  def leftOuterJoin[W: ClassTag](other: RDDAPI[(K, W)], numPartitions: Int): RDDAPI[(K, (V, Option[W]))]
  def leftOuterJoin[W: ClassTag](other: RDDAPI[(K, W)], partitioner: Partitioner): RDDAPI[(K, (V, Option[W]))]
  def rightOuterJoin[W: ClassTag](other: RDDAPI[(K, W)]): RDDAPI[(K, (Option[V], W))]
  def rightOuterJoin[W: ClassTag](other: RDDAPI[(K, W)], numPartitions: Int): RDDAPI[(K, (Option[V], W))]
  def rightOuterJoin[W: ClassTag](other: RDDAPI[(K, W)], partitioner: Partitioner): RDDAPI[(K, (Option[V], W))]
  def fullOuterJoin[W: ClassTag](other: RDDAPI[(K, W)]): RDDAPI[(K, (Option[V], Option[W]))]
  def fullOuterJoin[W: ClassTag](other: RDDAPI[(K, W)], numPartitions: Int): RDDAPI[(K, (Option[V], Option[W]))]
  def fullOuterJoin[W: ClassTag](other: RDDAPI[(K, W)], partitioner: Partitioner): RDDAPI[(K, (Option[V], Option[W]))]
  def cogroup[W1: ClassTag, W2: ClassTag, W3: ClassTag](other1: RDDAPI[(K, W1)], other2: RDDAPI[(K, W2)], other3: RDDAPI[(K, W3)], partitioner: Partitioner): RDDAPI[(K, (GenIterable[V], GenIterable[W1], GenIterable[W2], GenIterable[W3]))]
  def cogroup[W: ClassTag](other: RDDAPI[(K, W)], partitioner: Partitioner): RDDAPI[(K, (GenIterable[V], GenIterable[W]))]
  def cogroup[W1: ClassTag, W2: ClassTag](other1: RDDAPI[(K, W1)], other2: RDDAPI[(K, W2)], partitioner: Partitioner): RDDAPI[(K, (GenIterable[V], GenIterable[W1], GenIterable[W2]))]
  def cogroup[W1: ClassTag, W2: ClassTag, W3: ClassTag](other1: RDDAPI[(K, W1)], other2: RDDAPI[(K, W2)], other3: RDDAPI[(K, W3)]): RDDAPI[(K, (GenIterable[V], GenIterable[W1], GenIterable[W2], GenIterable[W3]))]
  def cogroup[W: ClassTag](other: RDDAPI[(K, W)]): RDDAPI[(K, (GenIterable[V], GenIterable[W]))]
  def cogroup[W1: ClassTag, W2: ClassTag](other1: RDDAPI[(K, W1)], other2: RDDAPI[(K, W2)]): RDDAPI[(K, (GenIterable[V], GenIterable[W1], GenIterable[W2]))]
  def cogroup[W: ClassTag](other: RDDAPI[(K, W)], numPartitions: Int): RDDAPI[(K, (GenIterable[V], GenIterable[W]))]
  def cogroup[W1: ClassTag, W2: ClassTag](other1: RDDAPI[(K, W1)], other2: RDDAPI[(K, W2)], numPartitions: Int): RDDAPI[(K, (GenIterable[V], GenIterable[W1], GenIterable[W2]))]
  def cogroup[W1: ClassTag, W2: ClassTag, W3: ClassTag](other1: RDDAPI[(K, W1)], other2: RDDAPI[(K, W2)], other3: RDDAPI[(K, W3)], numPartitions: Int): RDDAPI[(K, (GenIterable[V], GenIterable[W1], GenIterable[W2], GenIterable[W3]))]
  def collectAsMap(): GenMap[K, V]
  def subtractByKey[W: ClassTag](other: RDDAPI[(K, W)]): RDDAPI[(K, V)]
  def subtractByKey[W: ClassTag](other: RDDAPI[(K, W)], numPartitions: Int): RDDAPI[(K, V)]
  def subtractByKey[W: ClassTag](other: RDDAPI[(K, W)], p: Partitioner): RDDAPI[(K, V)]
  def aggregateByKey[U: ClassTag](zeroValue: U)(seqOp: (U, V) => U, combOp: (U, U) => U): RDDAPI[(K, U)]
  def aggregateByKey[U: ClassTag](zeroValue: U, partitioner: Partitioner)(seqOp: (U, V) => U, combOp: (U, U) => U): RDDAPI[(K, U)]
  def aggregateByKey[U: ClassTag](zeroValue: U, numPartitions: Int)(seqOp: (U, V) => U, combOp: (U, U) => U): RDDAPI[(K, U)]
  def partitionBy(partitioner: Partitioner): RDDAPI[(K, V)]

}

开发者ID:piotr-kalanski，项目名称:spark-local，代码行数:61，代码来源:PairRDDFunctionsAPI.scala

示例11: GlobalDataPartitioner

//设置package包名称以及导入依赖的类
package mljoin


import org.apache.spark.Partitioner

class GlobalDataPartitioner (numParts:Int, B_i_data_hash: Data2 => Long) extends Partitioner {
  def getPartition(key: Any): Int = {
    key match {
      case null => 0
      case _ => {
        val tmp:Data2 = key.asInstanceOf[Data2]
        nonNegativeMod(B_i_data_hash(tmp), numParts)
      }
    }
  }
  
  def numPartitions: Int = numParts
  
  def nonNegativeMod(x: Long, mod: Int): Int = {
    val rawMod = (x % mod).toInt
    rawMod + (if (rawMod < 0) mod else 0)
  }
}

开发者ID:niketanpansare，项目名称:mljoin，代码行数:24，代码来源:GlobalDataPartitioner.scala

示例12: GlobalModelPartitioner

//设置package包名称以及导入依赖的类
package mljoin

import org.apache.spark.Partitioner

class GlobalModelPartitioner (numParts:Int, B_i_model_hash: Model2 => Long) extends Partitioner {
  def getPartition(key: Any): Int = {
    key match {
      case null => 0
      case _ => {
        val tmp:Model2 = key.asInstanceOf[Model2]
        nonNegativeMod(B_i_model_hash(tmp), numParts)    
      }
    }
  }
  
  def numPartitions: Int = numParts
  
  def nonNegativeMod(x: Long, mod: Int): Int = {
    val rawMod = (x % mod).toInt
    rawMod + (if (rawMod < 0) mod else 0)
  }
}

开发者ID:niketanpansare，项目名称:mljoin，代码行数:23，代码来源:GlobalModelPartitioner.scala

示例13: SymbolPartitioner

//设置package包名称以及导入依赖的类
package per.harenp.Hedgehog

import org.apache.spark.Partitioner


class SymbolPartitioner extends Partitioner
{
  val numParts = 500
  def actualSymbol(key: Any) = {
    val fullPath = key.toString
    val namePart = fullPath.split("/").last
    namePart.split(Array('-','.')).head
  }
  override def numPartitions: Int = numParts
  override def getPartition(key: Any): Int = {
    val code = actualSymbol(key).hashCode % numPartitions
    if (code < 0)
      code + numPartitions
    else
      code
    }
  // Java equals method to let Spark compare our Partitioner objects
  override def equals(other: Any): Boolean = other match {
    case sp: SymbolPartitioner =>
      sp.numPartitions == numPartitions
    case _ =>
      false
  }
}

开发者ID:lasttruffulaseed，项目名称:quant-spark，代码行数:30，代码来源:SymbolPartitioner.scala

注：本文中的org.apache.spark.Partitioner类示例整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。