• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Scala PowerIterationClustering类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Scala中org.apache.spark.mllib.clustering.PowerIterationClustering的典型用法代码示例。如果您正苦于以下问题:Scala PowerIterationClustering类的具体用法?Scala PowerIterationClustering怎么用?Scala PowerIterationClustering使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了PowerIterationClustering类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: Clustering

//设置package包名称以及导入依赖的类
package com.paypal.risk.smunf.math.learning

import com.paypal.risk.smunf.util.Json
import org.apache.spark.mllib.clustering.PowerIterationClustering
import org.apache.spark.rdd.RDD


object Clustering {
  def powerIterationClustering(
      similarities: RDD[(Long, Long, Double)],
      numClusters: Int,
      maxIteration: Int)
    : Seq[(Long, Int)] = {
    val pic = new PowerIterationClustering()
      .setK(numClusters)
      .setMaxIterations(maxIteration)
    val model = pic.run(similarities.cache())
    val clusters = model.assignments.collect()
    clusters.map(item => (item.id, item.cluster))
  }

  def clustersToJson(clusters: Seq[(Int, Seq[String])]): String = {
    val clustersReformatted = clusters.map(x => Map("cluster" -> x._1, "variables" -> x._2))
    Json.toPrettyJsonString(clustersReformatted)
  }
} 
开发者ID:yanlzhang8936,项目名称:Smunf,代码行数:27,代码来源:Clustering.scala


示例2: PICTest

//设置package包名称以及导入依赖的类
package mllib.perf.clustering

import org.json4s.JValue
import org.json4s.JsonDSL._

import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.PowerIterationClustering
import org.apache.spark.rdd.RDD

import mllib.perf.PerfTest

class PICTest(sc: SparkContext) extends PerfTest {

  val NUM_EXAMPLES = ("num-examples", "number of examples")
  val NODE_DEGREE = ("node-degree", "number of neighbors each node is connected to")
  val NUM_CENTERS = ("num-centers", "number of centers for clustering tests")
  val NUM_ITERATIONS = ("num-iterations", "number of iterations for the algorithm")

  intOptions ++= Seq(NODE_DEGREE, NUM_CENTERS, NUM_ITERATIONS)
  longOptions ++= Seq(NUM_EXAMPLES)
  val options = intOptions ++ stringOptions  ++ booleanOptions ++ longOptions ++ doubleOptions
  addOptionsToParser()

  var data: RDD[(Long, Long, Double)] = _

  override def createInputData(seed: Long): Unit = {
    val numExamples = longOptionValue(NUM_EXAMPLES)
    val nodeDegree = intOptionValue(NODE_DEGREE)
    val numPartitions = intOptionValue(NUM_PARTITIONS)

    // Generates a periodic banded matrix with bandwidth = nodeDegree
    data = sc.parallelize(0L to numExamples, numPartitions)
      .flatMap { id =>
        (((id - nodeDegree / 2) % numExamples) until id).map { nbr =>
          (id, (nbr + numExamples) % numExamples, 1D)
        }
      }
    logInfo(s"Generated ${data.count()} pairwise similarities.")
  }

  override def run(): JValue = {
    val numIterations = intOptionValue(NUM_ITERATIONS)
    val k = intOptionValue(NUM_CENTERS)
    val start = System.currentTimeMillis()
    val pic = new PowerIterationClustering()
      .setK(k)
      .setMaxIterations(numIterations)
    val model = pic.run(data)
    model.assignments.count()
    val duration = (System.currentTimeMillis() - start) / 1e3
    "time" -> duration
  }
} 
开发者ID:sakgarg,项目名称:spark-perf-mine,代码行数:54,代码来源:PICTest.scala


示例3: PICTest

//设置package包名称以及导入依赖的类
package mllib.perf.clustering

import org.json4s.JValue
import org.json4s.JsonDSL._

import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.PowerIterationClustering
import org.apache.spark.rdd.RDD

import mllib.perf.PerfTest

class PICTest(sc: SparkContext) extends PerfTest {

  val NUM_POINTS = ("num-points", "number of points")
  val NODE_DEGREE = ("node-degree", "number of neighbors each node is connected to")
  val NUM_CENTERS = ("num-centers", "number of centers for clustering tests")
  val NUM_ITERATIONS = ("num-iterations", "number of iterations for the algorithm")

  intOptions ++= Seq(NODE_DEGREE, NUM_CENTERS, NUM_ITERATIONS)
  longOptions ++= Seq(NUM_POINTS)
  val options = intOptions ++ stringOptions  ++ booleanOptions ++ longOptions ++ doubleOptions
  addOptionsToParser()

  var data: RDD[(Long, Long, Double)] = _

  override def createInputData(seed: Long): Unit = {
    val numPoints = longOptionValue(NUM_POINTS)
    val nodeDegree = intOptionValue(NODE_DEGREE)
    val numPartitions = intOptionValue(NUM_PARTITIONS)

    // Generates a periodic banded matrix with bandwidth = nodeDegree
    val data = sc.parallelize(0L to numPoints, numPartitions)
      .flatMap { id =>
        (((id - nodeDegree / 2) % numPoints) until id).map { nbr =>
          (id, (nbr + numPoints) % numPoints, 1D)
        }
      }
    logInfo(s"Generated ${data.count()} pairwise similarities.")
  }

  override def run(): JValue = {
    val numIterations = intOptionValue(NUM_ITERATIONS)
    val k = intOptionValue(NUM_CENTERS)
    val start = System.currentTimeMillis()
    val pic = new PowerIterationClustering()
      .setK(k)
      .setMaxIterations(numIterations)
    val model = pic.run(data)
    val duration = (System.currentTimeMillis() - start) / 1e3
    "time" -> duration
  }
} 
开发者ID:sakgarg,项目名称:spark-perf-mine,代码行数:53,代码来源:PICTest.scala



注:本文中的org.apache.spark.mllib.clustering.PowerIterationClustering类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Scala RandomForestRegressor类代码示例发布时间:2022-05-23
下一篇:
Scala FieldSchema类代码示例发布时间:2022-05-23
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap