本文整理汇总了Scala中org.apache.spark.ml.clustering.GaussianMixture类的典型用法代码示例。如果您正苦于以下问题:Scala GaussianMixture类的具体用法?Scala GaussianMixture怎么用?Scala GaussianMixture使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了GaussianMixture类的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。
示例1: GMMClusteringPersist
//设置package包名称以及导入依赖的类
package org.sparksamples.gmm
import java.text.SimpleDateFormat
import org.apache.spark.SparkConf
import org.apache.spark.ml.clustering.{GaussianMixture}
import org.apache.spark.sql.SparkSession
object GMMClusteringPersist {
val PATH = "/home/ubuntu/work/spark-2.0.0-bin-hadoop2.7/"
val BASE = "./data/movie_lens_libsvm_2f"
val time = System.currentTimeMillis()
val formatter = new SimpleDateFormat("dd_MM_yyyy_hh_mm_ss")
import java.util.Calendar
val calendar = Calendar.getInstance()
calendar.setTimeInMillis(time)
val date_time = formatter.format(calendar.getTime())
def main(args: Array[String]): Unit = {
val spConfig = (new SparkConf).setMaster("local[1]").setAppName("SparkApp").
set("spark.driver.allowMultipleContexts", "true")
val spark = SparkSession
.builder()
.appName("Spark SQL Example")
.config(spConfig)
.getOrCreate()
val datasetUsers = spark.read.format("libsvm").load(
BASE + "/movie_lens_2f_users_libsvm/part-00000")
datasetUsers.show(3)
val gmmUsers = new GaussianMixture().setK(5).setSeed(1L)
gmmUsers.setMaxIter(20)
val modelUsers = gmmUsers.fit(datasetUsers)
val predictedDataSetUsers = modelUsers.transform(datasetUsers)
val predictionsUsers = predictedDataSetUsers.select("prediction").rdd.map(x=> x(0))
predictionsUsers.saveAsTextFile(BASE + "/prediction/" + date_time + "/gmm_2f_users")
val dataSetItems = spark.read.format("libsvm").load(BASE +
"/movie_lens_2f_items_libsvm/part-00000")
val gmmItems = new GaussianMixture().setK(5).setSeed(1L)
val modelItems = gmmItems.fit(dataSetItems)
val predictedDataSetItems = modelItems.transform(dataSetItems)
val predictionsItems = predictedDataSetItems.select("prediction").rdd.map(x=> x(0))
predictionsItems.saveAsTextFile(BASE + "/prediction/" + date_time + "/gmm_2f_items")
spark.stop()
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:59,代码来源:GMMClusteringPersist.scala
示例2: GaussianMixtureJob
//设置package包名称以及导入依赖的类
import DTreeClassificationJob.context
import io.hydrosphere.mist.api._
import io.hydrosphere.mist.api.ml._
import io.hydrosphere.mist.api.ml.{LocalData, LocalDataColumn, PipelineLoader}
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.clustering.GaussianMixture
import org.apache.spark.sql.SparkSession
object GaussianMixtureJob extends MLMistJob {
def session: SparkSession = SparkSession
.builder()
.appName(context.appName)
.config(context.getConf)
.getOrCreate()
def train(): Map[String, Any] = {
val dataset = session.read.format("libsvm").load("jobs/data/mllib/sample_kmeans_data.txt")
val gmm = new GaussianMixture().setK(2)
val pipeline = new Pipeline().setStages(Array(gmm))
val model = pipeline.fit(dataset)
model.write.overwrite().save("models/gaussian_mixture")
Map.empty[String, Any]
}
def serve(text: List[String]): Map[String, Any] = {
import LocalPipelineModel._
val pipeline = PipelineLoader.load("models/gaussian_mixture")
val data = LocalData(
LocalDataColumn("text", text)
)
val result: LocalData = pipeline.transform(data)
Map("result" -> result.select("text", "prediction").toMapList)
}
}
开发者ID:Hydrospheredata,项目名称:mist,代码行数:41,代码来源:GaussianMixtureJob.scala
注:本文中的org.apache.spark.ml.clustering.GaussianMixture类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论