• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Scala avg类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Scala中org.apache.spark.sql.functions.avg的典型用法代码示例。如果您正苦于以下问题:Scala avg类的具体用法?Scala avg怎么用?Scala avg使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了avg类的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: Spark

//设置package包名称以及导入依赖的类
package uk.co.bitcat.streaming.spark

import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.avg
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010._

object Spark {

  private case class Measurement(time: String, pollution: Int)

  def main(args: Array[String]) {
    val conf = new SparkConf().setAppName("Pollution Monitor").setMaster("local[*]")

    // Setting the batch interval over which we perform our pollution average calculation
    val streamingContext = new StreamingContext(conf, Seconds(10))

    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "localhost:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "pollution_consumer",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    // Creating a stream to read from Kafka
    val topics = Array("pollution")
    val stream = KafkaUtils.createDirectStream[String, String](
      streamingContext,
      PreferConsistent,
      Subscribe[String, String](topics, kafkaParams)
    )

    // Calculate the pollution average over the last interval
    stream.foreachRDD { rdd =>

      val spark = SparkSession.builder.config(rdd.sparkContext.getConf).getOrCreate()
      import spark.implicits._

      val row = rdd
        .map(_.value.split(","))
        .map(attributes => Measurement(attributes(0).trim, attributes(1).trim.toInt))
        .toDF()
        .agg(avg($"pollution") as "pollutionAverage")
        .filter($"pollutionAverage" > 75.0)
        .foreach(row => println("Raise alert for pollution level: " + row(0)))
    }

    streamingContext.start()
    streamingContext.awaitTermination()
  }
} 
开发者ID:dscook,项目名称:streaming-examples,代码行数:58,代码来源:Spark.scala


示例2: NestedStructureTest

//设置package包名称以及导入依赖的类
package me.invkrh.showcase.nested

import scala.util.Random

import me.invkrh.showcase.{JsonSerde, SparkJobSpec}
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions.avg

object NestedStructureTest {
  object Position extends Enumeration { val DEV, OPS = Value }
  case class Person(name: String, age: Int, position: String, employer: Option[Employer])
  case class Employer(name: String, city: String)

  private val input = (0 to 10) map { i =>
    Person(
      "Hao" + i,
      20,
      Position.apply(i % 2).toString,
      if (Random.nextBoolean()) Some(Employer("criteo", "Paris" + i % 4)) else None
    )
  }

  private val ser = input.map(JsonSerde.serialize)
}

class NestedStructureTest extends SparkJobSpec {
  import spark.implicits._
  import NestedStructureTest._

  private val df = spark.sparkContext
    .makeRDD(ser)
    .map(p => JsonSerde.deserialize[Person](p))
    .toDF()

  def withJsonSerde(df: DataFrame): Unit = {
    df.printSchema()
    df.show(false)
  }

  "NestedStructure" can {
    "show the case" in {
      showCase("Nested Structure") {
        note("Input is a List of Person object")
        input foreach println
        note("Serialized to string, fields of Option.None are ignored")
        ser foreach println
        note("Converted to DataFrame with all fields for each row")
        df.show(false)
        note("GroupBy employer.name, if the nested field is null, the key will be null")
        val res = df
          .groupBy('employer getField "name" as "company_name")
          .agg(avg($"age"))
        res.show(false)
      }
    }
  }

} 
开发者ID:invkrh,项目名称:spark-showcase,代码行数:59,代码来源:NestedStructureTest.scala



注:本文中的org.apache.spark.sql.functions.avg类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Scala Scene类代码示例发布时间:2022-05-23
下一篇:
Scala Token类代码示例发布时间:2022-05-23
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap