• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Scala Encoder类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Scala中org.apache.spark.sql.Encoder的典型用法代码示例。如果您正苦于以下问题:Scala Encoder类的具体用法?Scala Encoder怎么用?Scala Encoder使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Encoder类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: GladLogLikelihoodAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import com.enriquegrodrigo.spark.crowd.types.GladPartial
import com.enriquegrodrigo.spark.crowd.types.GladParams
import com.enriquegrodrigo.spark.crowd.utils.Functions
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast

import scala.math.log

private[crowd] class GladLogLikelihoodAggregator(params: Broadcast[GladParams]) 
  extends Aggregator[GladPartial, GladLogLikelihoodAggregatorBuffer, Double]{

  def zero: GladLogLikelihoodAggregatorBuffer = GladLogLikelihoodAggregatorBuffer(0,-1)

  def reduce(b: GladLogLikelihoodAggregatorBuffer, a: GladPartial) : GladLogLikelihoodAggregatorBuffer = {
    val alphaVal = params.value.alpha(a.annotator.toInt)
    val betaVal = a.beta
    val sig = Functions.sigmoid(alphaVal*betaVal) 
    val p0 = 1-a.est
    val p1 = a.est
    val k0 = if (a.value == 0) sig else 1-sig 
    val k1 = if (a.value == 1) sig else 1-sig 
    GladLogLikelihoodAggregatorBuffer(b.agg + Functions.prodlog(p0,k0) 
                                          + Functions.prodlog(p1,k1), p1) 
  }

  def merge(b1: GladLogLikelihoodAggregatorBuffer, b2: GladLogLikelihoodAggregatorBuffer) : GladLogLikelihoodAggregatorBuffer = { 
    GladLogLikelihoodAggregatorBuffer(b1.agg + b2.agg, if (b1.classProb == -1) b2.classProb else b1.classProb)
  }

  def finish(reduction: GladLogLikelihoodAggregatorBuffer) =  {
    val agg = reduction.agg
    val w0 = params.value.w(0)
    val w1 = params.value.w(1)
    val lastVal = reduction.agg + Functions.prodlog((1-reduction.classProb),params.value.w(0)) + 
                      Functions.prodlog(reduction.classProb,params.value.w(1))
    lastVal
  }


  def bufferEncoder: Encoder[GladLogLikelihoodAggregatorBuffer] = Encoders.product[GladLogLikelihoodAggregatorBuffer]

  def outputEncoder: Encoder[Double] = Encoders.scalaDouble
}

private[crowd] case class GladLogLikelihoodAggregatorBuffer(agg: Double, classProb: Double) 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:49,代码来源:GladLogLikelihoodAggregator.scala


示例2: dimensionTableName

//设置package包名称以及导入依赖的类
package org.alghimo.spark.dimensionalModelling

import org.apache.spark.sql.{Column, Dataset, Encoder, Encoders}


  def dimensionTableName: String

  def tmpDimensionTableName: String = {
    val Array(dbName, tableName) = dimensionTableName.split('.')

    s"${dbName}.tmp_${tableName}"
  }

  def maxPartitionsInDimensionTable: Int = 400

  def dimensionTable(refresh: Boolean = false): Dataset[DIM] = {
    if (refresh) {
      spark.catalog.refreshTable(dimensionTableName)
    }

    spark.table(dimensionTableName).as[DIM]
  }

  def currentDimensions(refresh: Boolean = false): Dataset[DIM] = dimensionTable(refresh).filter(s"${isCurrentColumnName}")
  def notCurrentDimensions(refresh: Boolean = false): Dataset[DIM] = dimensionTable(refresh).filter(s"NOT ${isCurrentColumnName}")

  def save(ds: Dataset[DIM], useTempTable: Boolean = true): Dataset[DIM] = {
    println("Saving dimensions..")
    val toSave = if (useTempTable) {
      ds
        .coalesce(maxPartitionsInDimensionTable)
        .write
        .mode("overwrite")
        .saveAsTable(tmpDimensionTableName)

      spark.table(tmpDimensionTableName)
    } else {
      ds
    }

    toSave
      .coalesce(maxPartitionsInDimensionTable)
      .write
      .mode("overwrite")
      .saveAsTable(dimensionTableName)

    if (useTempTable) {
      spark.sql(s"DROP TABLE ${tmpDimensionTableName} PURGE")
    }

    dimensionTable(refresh = true)
  }
} 
开发者ID:alghimo,项目名称:spark-dimensional-modelling,代码行数:54,代码来源:DimensionTableProvider.scala


示例3: enrichedWithDimensionEncoder

//设置package包名称以及导入依赖的类
package org.alghimo.spark.dimensionalModelling

import org.apache.spark.sql.{Dataset, Encoder}


trait EnrichedDimensionWithDimensionProvider[ENRICHED_DIM <: (Product with Serializable), DIM <: (Product with Serializable)] {
  type EnrichedWithDimension = (ENRICHED_DIM, DIM)
  type DimensionWithEnriched = (DIM, ENRICHED_DIM)
  type JoinEnrichedWithDimension = Dataset[EnrichedWithDimension]
  type JoinDimensionWithEnriched = Dataset[DimensionWithEnriched]

  implicit def enrichedWithDimensionEncoder: Encoder[(ENRICHED_DIM, DIM)]
  implicit def dimensionWithEnrichedEncoder: Encoder[(DIM, ENRICHED_DIM)]

  implicit def enrichedWithDimensionToDimensionWithEnriched(e: EnrichedWithDimension): DimensionWithEnriched = e.swap
  implicit def dimensionWithEnrichedToEnrichedWithDimension(d: EnrichedWithDimension): DimensionWithEnriched = d.swap
  implicit def joinEnrichedWithDimensionToJoinDimensionWithEnriched(e: JoinEnrichedWithDimension): JoinDimensionWithEnriched = e.map(_.swap)
  implicit def joinDimensionWithEnrichedToJoinEnrichedWithDimension(d: JoinEnrichedWithDimension): JoinDimensionWithEnriched = d.map(_.swap)
} 
开发者ID:alghimo,项目名称:spark-dimensional-modelling,代码行数:20,代码来源:EnrichedDimensionWithDimensionProvider.scala


示例4: toKeyValueGroupedDataSet

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.dataset

import org.apache.spark.sql.{Encoder, KeyValueGroupedDataset, SparkSession}

import scala.reflect.ClassTag

trait KeyValueGroupedDataSetAPI[K, V] {
  protected lazy val spark: SparkSession = SparkSession.builder().getOrCreate()
  private[sparklocal] def toKeyValueGroupedDataSet(implicit encK: Encoder[K], encT: Encoder[V], encKT: Encoder[(K, V)]): KeyValueGroupedDataset[K, V]

  def count(): DataSetAPI[(K, Long)]
  def mapGroups[U: ClassTag](f: (K, Iterator[V]) => U)
                            (implicit enc: Encoder[U]=null): DataSetAPI[U]
  def reduceGroups(f: (V, V) => V): DataSetAPI[(K, V)]
  def mapValues[W: ClassTag](func: V => W)
                            (implicit enc: Encoder[W]=null): KeyValueGroupedDataSetAPI[K, W]
  def flatMapGroups[U: ClassTag](f: (K, Iterator[V]) => TraversableOnce[U])
                                (implicit enc: Encoder[U]=null): DataSetAPI[U]
  def keys: DataSetAPI[K]
  def cogroup[U: ClassTag, R: ClassTag](other: KeyValueGroupedDataSetAPI[K, U])
                                       (f: (K, Iterator[V], Iterator[U]) => TraversableOnce[R])
                                       (implicit
                                          encK: Encoder[K]=null,
                                          encV: Encoder[V]=null,
                                          encU: Encoder[U]=null,
                                          encR: Encoder[R]=null,
                                          encKV: Encoder[(K,V)]=null,
                                          encKU: Encoder[(K,U)]=null
                                       ): DataSetAPI[R]
} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:31,代码来源:KeyValueGroupedDataSetAPI.scala


示例5: DataSetAPIScalaLazyImpl

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.scala.`lazy`.dataset

import com.datawizards.sparklocal.dataset.{DataSetAPI, KeyValueGroupedDataSetAPI}
import com.datawizards.sparklocal.impl.scala.dataset.DataSetAPIScalaBase
import com.datawizards.sparklocal.rdd.RDDAPI
import org.apache.spark.sql.Encoder

import scala.collection.{GenIterable, SeqView}
import scala.reflect.ClassTag

object DataSetAPIScalaLazyImpl {
  private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    new DataSetAPIScalaLazyImpl(it.toSeq.seq.view)
}

class DataSetAPIScalaLazyImpl[T: ClassTag](private[sparklocal] val data: SeqView[T, Seq[T]]) extends DataSetAPIScalaBase[T] {
  override type InternalCollection = SeqView[T, Seq[T]]

  override private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    DataSetAPIScalaLazyImpl.create(it)

  override protected def union(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.union(dsScala.data.toSeq))

  override protected def intersect(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.intersect(dsScala.data.toSeq))

  override protected def diff(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.diff(dsScala.data.toSeq))

  override def distinct(): DataSetAPI[T] =
    create(data.distinct)

  override def groupByKey[K: ClassTag](func: (T) => K)(implicit enc: Encoder[K]): KeyValueGroupedDataSetAPI[K, T] =
    new KeyValueGroupedDataSetAPIScalaLazyImpl(data.groupBy(func))

  override def rdd(): RDDAPI[T] = RDDAPI(data)
} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:39,代码来源:DataSetAPIScalaLazyImpl.scala


示例6: implicits

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.scala.session

import com.datawizards.sparklocal.accumulator.{AccumulatorV2API, CollectionAccumulatorAPI, DoubleAccumulatorAPI, LongAccumulatorAPI}
import com.datawizards.sparklocal.broadcast.BroadcastAPI
import com.datawizards.sparklocal.impl.scala.accumulator.{CollectionAccumulatorAPIScalaImpl, DoubleAccumulatorAPIScalaImpl, LongAccumulatorAPIScalaImpl}
import com.datawizards.sparklocal.impl.scala.broadcast.BroadcastAPIScalaImpl
import com.datawizards.sparklocal.session.SparkSessionAPI
import org.apache.spark.sql.Encoder

import scala.reflect.ClassTag

trait SparkSessionAPIScalaBase extends SparkSessionAPI {

  object implicits {
    implicit def enc[T]: Encoder[T] = null
  }

  override def broadcast[T: ClassTag](value: T): BroadcastAPI[T] =
    new BroadcastAPIScalaImpl[T](value)

  override def longAccumulator: LongAccumulatorAPI =
    new LongAccumulatorAPIScalaImpl()

  override def longAccumulator(name: String): LongAccumulatorAPI =
    new LongAccumulatorAPIScalaImpl(Some(name))

  override def doubleAccumulator: DoubleAccumulatorAPI =
    new DoubleAccumulatorAPIScalaImpl()

  override def doubleAccumulator(name: String): DoubleAccumulatorAPI =
    new DoubleAccumulatorAPIScalaImpl(Some(name))

  override def collectionAccumulator[T]: CollectionAccumulatorAPI[T] =
    new CollectionAccumulatorAPIScalaImpl[T]()

  override def collectionAccumulator[T](name: String): CollectionAccumulatorAPI[T] =
    new CollectionAccumulatorAPIScalaImpl[T](Some(name))

  override def register(acc: AccumulatorV2API[_, _], name: String): Unit =
    {  }
} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:42,代码来源:SparkSessionAPIScalaBase.scala


示例7: KeyValueGroupedDataSetAPIScalaParallelLazyImpl

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.scala.parallellazy.dataset

import com.datawizards.sparklocal.dataset.KeyValueGroupedDataSetAPI
import com.datawizards.sparklocal.impl.scala.dataset.{DataSetAPIScalaBase, KeyValueGroupedDataSetAPIScalaBase}
import org.apache.spark.sql.Encoder

import scala.collection.{GenIterable, GenSeq}
import scala.reflect.ClassTag

class KeyValueGroupedDataSetAPIScalaParallelLazyImpl[K: ClassTag, T: ClassTag](private[sparklocal] val data: Map[K, GenSeq[T]]) extends KeyValueGroupedDataSetAPIScalaBase[K, T] {
  override type InternalCollection = Map[K, GenSeq[T]]

  override private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]=null): DataSetAPIScalaBase[U] =
    DataSetAPIScalaParallelLazyImpl.create(it)

  override def mapValues[W: ClassTag](func: (T) => W)
                                     (implicit enc: Encoder[W]=null): KeyValueGroupedDataSetAPI[K, W] = {
    val mapped = data.mapValues(_.map(func))
    new KeyValueGroupedDataSetAPIScalaParallelLazyImpl(mapped)
  }

} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:23,代码来源:KeyValueGroupedDataSetAPIScalaParallelLazyImpl.scala


示例8: DataSetAPIScalaParallelLazyImpl

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.scala.parallellazy.dataset

import com.datawizards.sparklocal.dataset.{DataSetAPI, KeyValueGroupedDataSetAPI}
import com.datawizards.sparklocal.impl.scala.dataset.DataSetAPIScalaBase
import com.datawizards.sparklocal.impl.scala.parallellazy.ParallelLazySeq
import com.datawizards.sparklocal.rdd.RDDAPI
import org.apache.spark.sql.Encoder

import scala.collection.GenIterable
import scala.reflect.ClassTag

object DataSetAPIScalaParallelLazyImpl {
  private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    new DataSetAPIScalaParallelLazyImpl(new ParallelLazySeq(it.toSeq.par))
}

class DataSetAPIScalaParallelLazyImpl[T: ClassTag](private[sparklocal] val data: ParallelLazySeq[T]) extends DataSetAPIScalaBase[T] {
  override type InternalCollection = ParallelLazySeq[T]

  override private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    DataSetAPIScalaParallelLazyImpl.create(it)

  private def create[U: ClassTag](data: ParallelLazySeq[U]): DataSetAPIScalaBase[U] =
    new DataSetAPIScalaParallelLazyImpl(data)

  override protected def union(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.union(dsScala.data.toSeq))

  override protected def intersect(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.intersect(dsScala.data.toSeq))

  override protected def diff(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.diff(dsScala.data.toSeq))

  override def distinct(): DataSetAPI[T] =
    create(data.distinct)

  override def groupByKey[K: ClassTag](func: (T) => K)(implicit enc: Encoder[K]): KeyValueGroupedDataSetAPI[K, T] =
    new KeyValueGroupedDataSetAPIScalaParallelLazyImpl(data.groupBy(func))

  override def rdd(): RDDAPI[T] = RDDAPI(data)
} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:43,代码来源:DataSetAPIScalaParallelLazyImpl.scala


示例9: DataSetAPIScalaEagerImpl

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.scala.eager.dataset

import com.datawizards.sparklocal.dataset.{DataSetAPI, KeyValueGroupedDataSetAPI}
import com.datawizards.sparklocal.impl.scala.dataset.DataSetAPIScalaBase
import com.datawizards.sparklocal.rdd.RDDAPI
import org.apache.spark.sql.Encoder

import scala.collection.GenIterable
import scala.reflect.ClassTag

object DataSetAPIScalaEagerImpl {
  private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    new DataSetAPIScalaEagerImpl(it.toSeq.seq)
}

class DataSetAPIScalaEagerImpl[T: ClassTag](private[sparklocal] val data: Seq[T]) extends DataSetAPIScalaBase[T] {
  override type InternalCollection = Seq[T]

  override private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    DataSetAPIScalaEagerImpl.create(it)

  override protected def union(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.union(dsScala.data.toSeq))

  override protected def intersect(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.intersect(dsScala.data.toSeq))

  override protected def diff(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.diff(dsScala.data.toSeq))

  override def distinct(): DataSetAPI[T] =
    create(data.distinct)

  override def groupByKey[K: ClassTag](func: (T) => K)(implicit enc: Encoder[K]): KeyValueGroupedDataSetAPI[K, T] =
    new KeyValueGroupedDataSetAPIScalaEagerImpl(data.groupBy(func))

  override def rdd(): RDDAPI[T] = RDDAPI(data)
} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:39,代码来源:DataSetAPIScalaEagerImpl.scala


示例10: DataSetAPIScalaParallelImpl

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.scala.parallel.dataset

import com.datawizards.sparklocal.dataset.{DataSetAPI, KeyValueGroupedDataSetAPI}
import com.datawizards.sparklocal.impl.scala.`lazy`.dataset.DataSetAPIScalaLazyImpl
import com.datawizards.sparklocal.impl.scala.dataset.DataSetAPIScalaBase
import com.datawizards.sparklocal.rdd.RDDAPI
import org.apache.spark.sql.Encoder

import scala.collection.GenIterable
import scala.collection.parallel.ParSeq
import scala.reflect.ClassTag

object DataSetAPIScalaParallelImpl {
  private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    new DataSetAPIScalaParallelImpl(it.toSeq.par)
}

class DataSetAPIScalaParallelImpl[T: ClassTag](private[sparklocal] val data: ParSeq[T]) extends DataSetAPIScalaBase[T] {
  override type InternalCollection = ParSeq[T]

  override private[sparklocal] def create[U: ClassTag](it: GenIterable[U])(implicit enc: Encoder[U]): DataSetAPIScalaBase[U] =
    DataSetAPIScalaParallelImpl.create(it)

  private def create[U: ClassTag](data: ParSeq[U]): DataSetAPIScalaBase[U] =
    new DataSetAPIScalaParallelImpl(data)

  override protected def union(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.union(dsScala.data.toSeq))

  override protected def intersect(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.intersect(dsScala.data.toSeq))

  override protected def diff(data: InternalCollection, dsScala: DataSetAPIScalaBase[T]): DataSetAPIScalaBase[T] =
    create(data.diff(dsScala.data.toSeq))

  override def distinct(): DataSetAPI[T] =
    create(data.distinct)

  override def groupByKey[K: ClassTag](func: (T) => K)(implicit enc: Encoder[K]): KeyValueGroupedDataSetAPI[K, T] =
    new KeyValueGroupedDataSetAPIScalaParallelImpl(data.groupBy(func))

  override def rdd(): RDDAPI[T] = RDDAPI(data)
} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:44,代码来源:DataSetAPIScalaParallelImpl.scala


示例11: KeyValueGroupedDataSetAPISparkImpl

//设置package包名称以及导入依赖的类
package com.datawizards.sparklocal.impl.spark.dataset

import com.datawizards.sparklocal.dataset.{DataSetAPI, KeyValueGroupedDataSetAPI}
import org.apache.spark.sql.{Encoder, KeyValueGroupedDataset}

import scala.reflect.ClassTag

class KeyValueGroupedDataSetAPISparkImpl[K: ClassTag, T: ClassTag](private[dataset] val data: KeyValueGroupedDataset[K, T]) extends KeyValueGroupedDataSetAPI[K, T] {
  private def create[U: ClassTag](data: KeyValueGroupedDataset[K,U]) = new KeyValueGroupedDataSetAPISparkImpl(data)

  override private[sparklocal] def toKeyValueGroupedDataSet(implicit encK: Encoder[K], encT: Encoder[T], encKT: Encoder[(K, T)]) = data

  override def count(): DataSetAPI[(K, Long)] =
    DataSetAPI(data.count())

  override def mapValues[W: ClassTag](func: (T) => W)
                                     (implicit enc: Encoder[W]=null): KeyValueGroupedDataSetAPI[K, W] =
    create(data.mapValues(func))

  override def mapGroups[U: ClassTag](f: (K, Iterator[T]) => U)
                                     (implicit enc: Encoder[U]=null): DataSetAPI[U] =
    DataSetAPI(data.mapGroups(f))

  override def reduceGroups(f: (T, T) => T): DataSetAPI[(K, T)] =
    DataSetAPI(data.reduceGroups(f))

  override def flatMapGroups[U: ClassTag](f: (K, Iterator[T]) => TraversableOnce[U])
                                         (implicit enc: Encoder[U]=null): DataSetAPI[U] =
    DataSetAPI(data.flatMapGroups(f))

  override def keys: DataSetAPI[K] =
    DataSetAPI(data.keys)

  override def cogroup[U: ClassTag, R: ClassTag](other: KeyValueGroupedDataSetAPI[K, U])
                                                (f: (K, Iterator[T], Iterator[U]) => TraversableOnce[R])
                                                (implicit
                                                 encK: Encoder[K]=null,
                                                 encT: Encoder[T]=null,
                                                 encU: Encoder[U]=null,
                                                 encR: Encoder[R]=null,
                                                 encKT: Encoder[(K,T)]=null,
                                                 encKU: Encoder[(K,U)]=null
                                                ): DataSetAPI[R] = {
    DataSetAPI(data.cogroup(other.toKeyValueGroupedDataSet)(f))
  }


} 
开发者ID:piotr-kalanski,项目名称:spark-local,代码行数:49,代码来源:KeyValueGroupedDataSetAPISparkImpl.scala


示例12: GladAlphaAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import com.enriquegrodrigo.spark.crowd.types.GladPartial
import com.enriquegrodrigo.spark.crowd.types.GladParams
import com.enriquegrodrigo.spark.crowd.utils.Functions
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast

private[crowd] class GladAlphaAggregator(params: Broadcast[GladParams], learningRate: Double) 
  extends Aggregator[GladPartial, GladAlphaAggregatorBuffer, Double]{

  def zero: GladAlphaAggregatorBuffer = GladAlphaAggregatorBuffer(0,-1) 
  
  def reduce(b: GladAlphaAggregatorBuffer, a: GladPartial) : GladAlphaAggregatorBuffer = {
    val alpha = params.value.alpha
    val al = alpha(a.annotator)
    val bet = a.beta
    val aest = a.est
    val sigmoidValue = Functions.sigmoid(alpha(a.annotator)*a.beta)
    val p = if (a.value == 1) a.est else (1-a.est)
    val term = (p - sigmoidValue)*bet
    GladAlphaAggregatorBuffer(b.agg + term, al)
  }

  def merge(b1: GladAlphaAggregatorBuffer, b2: GladAlphaAggregatorBuffer) : GladAlphaAggregatorBuffer = { 
    GladAlphaAggregatorBuffer(b1.agg + b2.agg, if (b1.alpha == -1) b2.alpha else b1.alpha )
  }

  def finish(reduction: GladAlphaAggregatorBuffer) = {
    reduction.alpha + learningRate * reduction.agg
  }

  def bufferEncoder: Encoder[GladAlphaAggregatorBuffer] = Encoders.product[GladAlphaAggregatorBuffer]

  def outputEncoder: Encoder[Double] = Encoders.scalaDouble
}

private[crowd] case class GladAlphaAggregatorBuffer(agg: Double, alpha: Double) 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:40,代码来源:GladAlphaAggregator.scala


示例13: MulticlassMVAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import com.enriquegrodrigo.spark.crowd.types.MulticlassAnnotation
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator

private[crowd] class MulticlassMVAggregator(nClasses: Int) extends Aggregator[MulticlassAnnotation, MulticlassMVPartial, Int]{

  def sumKey(map: Map[Int,Long], pair: (Int,Long)) = {
      val key = pair._1
      val value = pair._2
      val new_value = map.get(key) match {
        case Some(x) => x + value
        case None => value 
      }
      map.updated(key, new_value)
  }

  def zero: MulticlassMVPartial = MulticlassMVPartial(Vector.fill(nClasses)(0),0)

  def reduce(b: MulticlassMVPartial, a: MulticlassAnnotation) : MulticlassMVPartial = {
    MulticlassMVPartial(b.aggVect.updated(a.value, b.aggVect(a.value) + 1), b.count + 1) 
  }

  def merge(b1: MulticlassMVPartial, b2: MulticlassMVPartial) : MulticlassMVPartial = { 
    MulticlassMVPartial(b1.aggVect.zip(b2.aggVect).map(x => x._1 + x._2), b1.count + b2.count) 
  }

  def finish(reduction: MulticlassMVPartial) = {
      reduction.aggVect.indexOf(reduction.aggVect.max)
  }


  def bufferEncoder: Encoder[MulticlassMVPartial] = Encoders.product[MulticlassMVPartial]

  def outputEncoder: Encoder[Int] = Encoders.scalaInt
} 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:38,代码来源:MulticlassMVAggregator.scala


示例14: GladBetaAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import com.enriquegrodrigo.spark.crowd.types.GladPartial
import com.enriquegrodrigo.spark.crowd.types.GladParams
import com.enriquegrodrigo.spark.crowd.utils.Functions
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast

private[crowd] class GladBetaAggregator(params: Broadcast[GladParams], learningRate: Double) 
  extends Aggregator[GladPartial, GladBetaAggregatorBuffer, Double]{

  def zero: GladBetaAggregatorBuffer = GladBetaAggregatorBuffer(0,-1) 
  
  def reduce(b: GladBetaAggregatorBuffer, a: GladPartial) : GladBetaAggregatorBuffer = {
    val alpha = params.value.alpha
    val al = alpha(a.annotator)
    val bet = a.beta
    val aest = a.est
    val sigmoidValue = Functions.sigmoid(alpha(a.annotator)*a.beta)
    val p = if (a.value == 1) a.est else (1-a.est)
    val term = (p - sigmoidValue)*alpha(a.annotator)
    GladBetaAggregatorBuffer(b.agg + term, a.beta)
  }

  def merge(b1: GladBetaAggregatorBuffer, b2: GladBetaAggregatorBuffer) : GladBetaAggregatorBuffer = { 
    GladBetaAggregatorBuffer(b1.agg + b2.agg, if (b1.beta == -1) b2.beta else b1.beta)
  }

  def finish(reduction: GladBetaAggregatorBuffer) = {
    reduction.beta + learningRate * reduction.agg
  }

  def bufferEncoder: Encoder[GladBetaAggregatorBuffer] = Encoders.product[GladBetaAggregatorBuffer]

  def outputEncoder: Encoder[Double] = Encoders.scalaDouble
}

private[crowd] case class GladBetaAggregatorBuffer(agg: Double, beta: Double) 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:40,代码来源:GladBetaAggregator.scala


示例15: BinarySoftMVAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import org.apache.spark.sql.{Encoder, Encoders}
import com.enriquegrodrigo.spark.crowd.types.BinaryAnnotation
import org.apache.spark.sql.expressions.Aggregator

  private[crowd] class BinarySoftMVAggregator extends Aggregator[BinaryAnnotation, BinaryMVPartial, Double]{
  
    def zero: BinaryMVPartial = BinaryMVPartial(0,0)
  
    def reduce(b: BinaryMVPartial, a: BinaryAnnotation) : BinaryMVPartial = 
      BinaryMVPartial(b.aggValue+a.value, b.count + 1) 
  
    def merge(b1: BinaryMVPartial, b2: BinaryMVPartial) : BinaryMVPartial = 
      BinaryMVPartial(b1.aggValue + b2.aggValue, b1.count + b2.count) 
  
    def finish(reduction: BinaryMVPartial) =  {
      val numerator: Double = reduction.aggValue
      val denominator: Double = reduction.count
      if (denominator == 0) 
        throw new IllegalArgumentException() 
      else {
        (numerator / denominator)
      }
    }
  
    def bufferEncoder: Encoder[BinaryMVPartial] = Encoders.product[BinaryMVPartial]
  
    def outputEncoder: Encoder[Double] = Encoders.scalaDouble
  } 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:31,代码来源:BinarySoftMVAggregator.scala


示例16: RaykarBinaryStatisticsAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import com.enriquegrodrigo.spark.crowd.types.RaykarBinaryPartial
import com.enriquegrodrigo.spark.crowd.types.RaykarBinaryParams
import com.enriquegrodrigo.spark.crowd.utils.Functions
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast

private[crowd] class RaykarBinaryStatisticsAggregator(params: Broadcast[RaykarBinaryParams]) 
  extends Aggregator[RaykarBinaryPartial, RaykarBinaryStatisticsAggregatorBuffer, (Double,Double)]{

  def zero: RaykarBinaryStatisticsAggregatorBuffer = RaykarBinaryStatisticsAggregatorBuffer(1,1) //Binary
  
  def reduce(b: RaykarBinaryStatisticsAggregatorBuffer, a: RaykarBinaryPartial) : RaykarBinaryStatisticsAggregatorBuffer = {
    val alphaValue = params.value.alpha(a.annotator)
    val alphaTerm = if (a.value == 1) alphaValue else 1-alphaValue
    val betaValue = params.value.beta(a.annotator)
    val betaTerm = if (a.value == 0) betaValue else 1-betaValue 
    RaykarBinaryStatisticsAggregatorBuffer(b.a * alphaTerm, b.b * betaTerm)
  }

  def merge(b1: RaykarBinaryStatisticsAggregatorBuffer, b2: RaykarBinaryStatisticsAggregatorBuffer) : RaykarBinaryStatisticsAggregatorBuffer = { 
    RaykarBinaryStatisticsAggregatorBuffer(b1.a * b2.a, b1.b*b2.b)
  }

  def finish(reduction: RaykarBinaryStatisticsAggregatorBuffer) = {
    (reduction.a,reduction.b)
  }

  def bufferEncoder: Encoder[RaykarBinaryStatisticsAggregatorBuffer] = Encoders.product[RaykarBinaryStatisticsAggregatorBuffer]

  def outputEncoder: Encoder[(Double,Double)] = Encoders.product[(Double,Double)]
}

private[crowd] case class RaykarBinaryStatisticsAggregatorBuffer(a: Double, b: Double) 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:37,代码来源:RaykarBinaryStatisticsAggregator.scala


示例17: DawidSkeneLogLikelihoodAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import com.enriquegrodrigo.spark.crowd.types.DawidSkenePartial
import com.enriquegrodrigo.spark.crowd.types.DawidSkeneParams
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast

import scala.math.log

private[crowd] class DawidSkeneLogLikelihoodAggregator(params: Broadcast[DawidSkeneParams]) 
  extends Aggregator[DawidSkenePartial, DawidSkeneLogLikelihoodAggregatorBuffer, Double]{

  private def sumKey(map: Map[Int,Double], pair: (Int,Double)) = {
      val key = pair._1
      val value = pair._2
      val new_value = map.get(key) match {
        case Some(x) => x + value
        case None => value 
      }
      map.updated(key, new_value)
  }

  def zero: DawidSkeneLogLikelihoodAggregatorBuffer = DawidSkeneLogLikelihoodAggregatorBuffer(0, -1)

  def reduce(b: DawidSkeneLogLikelihoodAggregatorBuffer, a: DawidSkenePartial) : DawidSkeneLogLikelihoodAggregatorBuffer = {
    val pival = params.value.pi(a.annotator.toInt)(a.est)(a.value)
    DawidSkeneLogLikelihoodAggregatorBuffer(b.agg + log(pival), a.est) 
  }

  def merge(b1: DawidSkeneLogLikelihoodAggregatorBuffer, b2: DawidSkeneLogLikelihoodAggregatorBuffer) : DawidSkeneLogLikelihoodAggregatorBuffer = { 
    DawidSkeneLogLikelihoodAggregatorBuffer(b1.agg + b2.agg, if (b1.predClass == -1) b2.predClass else b1.predClass) 
  }

  def finish(reduction: DawidSkeneLogLikelihoodAggregatorBuffer) =  {
    reduction.agg + log(params.value.w(reduction.predClass))
  }


  def bufferEncoder: Encoder[DawidSkeneLogLikelihoodAggregatorBuffer] = Encoders.product[DawidSkeneLogLikelihoodAggregatorBuffer]

  def outputEncoder: Encoder[Double] = Encoders.scalaDouble
}

private[crowd] case class DawidSkeneLogLikelihoodAggregatorBuffer(agg: Double, predClass: Int) 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:46,代码来源:DawidSkeneLogLikelihoodAggregator.scala


示例18: DawidSkeneEAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import com.enriquegrodrigo.spark.crowd.types.DawidSkenePartial
import com.enriquegrodrigo.spark.crowd.types.DawidSkeneParams
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast

private[crowd] class DawidSkeneEAggregator(params: Broadcast[DawidSkeneParams], nClasses: Int) 
  extends Aggregator[DawidSkenePartial, DawidSkeneAggregatorBuffer, Int]{

  def zero: DawidSkeneAggregatorBuffer = DawidSkeneAggregatorBuffer(Vector.fill(nClasses)(1))
  
  def reduce(b: DawidSkeneAggregatorBuffer, a: DawidSkenePartial) : DawidSkeneAggregatorBuffer = {
    val pi = params.value.pi 
    val classCondi = Vector.range(0,nClasses).map( c => pi(a.annotator.toInt)(c)(a.value))
    val newVect = classCondi.zip(b.aggVect).map(x => x._1 * x._2)
    DawidSkeneAggregatorBuffer(newVect) 
  }

  def merge(b1: DawidSkeneAggregatorBuffer, b2: DawidSkeneAggregatorBuffer) : DawidSkeneAggregatorBuffer = { 
    val buf = DawidSkeneAggregatorBuffer(b1.aggVect.zip(b2.aggVect).map(x => x._1 * x._2))
    buf
  }

  def finish(reduction: DawidSkeneAggregatorBuffer) = {
    val result = reduction.aggVect.zipWithIndex.maxBy(x => x._1*params.value.w(x._2))._2
    result
  }

  def bufferEncoder: Encoder[DawidSkeneAggregatorBuffer] = Encoders.product[DawidSkeneAggregatorBuffer]

  def outputEncoder: Encoder[Int] = Encoders.scalaInt
}

private[crowd] case class DawidSkeneAggregatorBuffer(aggVect: scala.collection.Seq[Double]) 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:37,代码来源:DawidSkeneEAggregator.scala


示例19: BinaryMVAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import org.apache.spark.sql.{Encoder, Encoders}
import com.enriquegrodrigo.spark.crowd.types.BinaryAnnotation
import org.apache.spark.sql.expressions.Aggregator

private[crowd] class BinaryMVAggregator extends Aggregator[BinaryAnnotation, BinaryMVPartial, Int]{
  
    def zero: BinaryMVPartial = BinaryMVPartial(0,0)
  
    def reduce(b: BinaryMVPartial, a: BinaryAnnotation) : BinaryMVPartial = 
      BinaryMVPartial(b.aggValue+a.value, b.count + 1) 
  
    def merge(b1: BinaryMVPartial, b2: BinaryMVPartial) : BinaryMVPartial = 
      BinaryMVPartial(b1.aggValue + b2.aggValue, b1.count + b2.count) 
  
    def finish(reduction: BinaryMVPartial) =  {
      val numerator: Double = reduction.aggValue
      val denominator: Double = reduction.count
      if (denominator == 0) 
        throw new IllegalArgumentException() 
      else if ( (numerator / denominator) >= 0.5 ) 
        1 
      else 
        0
    }
  
    def bufferEncoder: Encoder[BinaryMVPartial] = Encoders.product[BinaryMVPartial]
  
    def outputEncoder: Encoder[Int] = Encoders.scalaInt
  } 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:32,代码来源:BinaryMVAggregator.scala


示例20: GladEAggregator

//设置package包名称以及导入依赖的类
package com.enriquegrodrigo.spark.crowd.aggregators 

import com.enriquegrodrigo.spark.crowd.types.GladPartial
import com.enriquegrodrigo.spark.crowd.types.GladParams
import com.enriquegrodrigo.spark.crowd.utils.Functions
import org.apache.spark.sql.{Encoder,Encoders}
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.broadcast.Broadcast

import scala.math.{log,exp}

private[crowd] class GladEAggregator(params: Broadcast[GladParams]) 
  extends Aggregator[GladPartial, GladEAggregatorBuffer, Double]{

  def zero: GladEAggregatorBuffer = GladEAggregatorBuffer(Vector.fill(2)(1)) //Binary
  
  def reduce(b: GladEAggregatorBuffer, a: GladPartial) : GladEAggregatorBuffer = {
    val alpha = params.value.alpha
    val sigmoidValue = Functions.sigmoid(alpha(a.annotator)*a.beta)
    val p0 = if (a.value == 0) sigmoidValue else (1 - sigmoidValue)
    val p1 = if (a.value == 1) sigmoidValue else (1 - sigmoidValue) 
    GladEAggregatorBuffer(Vector(Functions.logLim(p0) + b.aggVect(0), Functions.logLim(p1) + b.aggVect(1)))
  }

  def merge(b1: GladEAggregatorBuffer, b2: GladEAggregatorBuffer) : GladEAggregatorBuffer = { 
    GladEAggregatorBuffer(b1.aggVect.zip(b2.aggVect).map(x => x._1 * x._2))
  }

  def finish(reduction: GladEAggregatorBuffer) = {
    val w = params.value.w
    val negative = exp(reduction.aggVect(0) + Functions.logLim(w(0)))
    val positive = exp(reduction.aggVect(1) + Functions.logLim(w(1)))
    val norm = negative + positive
    positive/norm
  }

  def bufferEncoder: Encoder[GladEAggregatorBuffer] = Encoders.product[GladEAggregatorBuffer]

  def outputEncoder: Encoder[Double] = Encoders.scalaDouble
}

private[crowd] case class GladEAggregatorBuffer(aggVect: scala.collection.Seq[Double]) 
开发者ID:enriquegrodrigo,项目名称:spark-crowd,代码行数:43,代码来源:GladEAggregator.scala



注:本文中的org.apache.spark.sql.Encoder类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Scala Rating类代码示例发布时间:2022-05-23
下一篇:
Scala NNLS类代码示例发布时间:2022-05-23
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap