本文整理汇总了Scala中org.apache.spark.ml.feature.Word2Vec类的典型用法代码示例。如果您正苦于以下问题:Scala Word2Vec类的具体用法?Scala Word2Vec怎么用?Scala Word2Vec使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Word2Vec类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。
示例1: model
//设置package包名称以及导入依赖的类
package net.koseburak.recommendation.model
import net.koseburak.recommendation.config.AppConfig._
import net.koseburak.recommendation.constant.Field.{PlaylistField, PlaylistResultField}
import net.koseburak.recommendation.util.DataUtils
import org.apache.spark.ml.feature.Word2Vec
import org.apache.spark.sql.SparkSession
trait Generator {
def model: PlaylistModel
}
case class PlaylistGenerator(vectorSize: Int = 50, windowSize: Int = 125)
(implicit spark: SparkSession) extends Generator {
private val trainDF = DataUtils.prepareData(trainCompletePath)
override val model: PlaylistModel = {
val model = new Word2Vec()
.setMinCount(1)
.setVectorSize(vectorSize)
.setWindowSize(windowSize)
.setInputCol(PlaylistField)
.setOutputCol(PlaylistResultField)
PlaylistModel(model.fit(trainDF), vectorSize, windowSize)
}
}
开发者ID:burakkose,项目名称:word2vec-playlist-generation,代码行数:26,代码来源:PlaylistGenerator.scala
示例2: Word2VecMl
//设置package包名称以及导入依赖的类
import org.apache.spark.{SparkConf}
import org.apache.spark.ml.feature.Word2Vec
import org.apache.spark.sql.SparkSession
object Word2VecMl {
case class Record(name: String)
def main(args: Array[String]) {
val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp")
val spark = SparkSession
.builder
.appName("Word2Vec Sample").config(spConfig)
.getOrCreate()
import spark.implicits._
val rawDF = spark.sparkContext
.wholeTextFiles("./data/20news-bydate-train/alt.atheism/*")
val temp = rawDF.map( x => {
(x._2.filter(_ >= ' ').filter(! _.toString.startsWith("(")) )
})
val textDF = temp.map(x => x.split(" ")).map(Tuple1.apply)
.toDF("text")
print(textDF.first())
val word2Vec = new Word2Vec()
.setInputCol("text")
.setOutputCol("result")
.setVectorSize(3)
.setMinCount(0)
val model = word2Vec.fit(textDF)
val result = model.transform(textDF)
result.select("result").take(3).foreach(println)
val ds = model.findSynonyms("philosophers", 5).select("word")
ds.rdd.saveAsTextFile("./output/alien-synonyms" + System.nanoTime())
ds.show()
spark.stop()
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:41,代码来源:Word2VecMl.scala
示例3: Word2VecExample
//设置package包名称以及导入依赖的类
import org.apache.spark.SparkConf
import org.apache.spark.ml.feature.Word2Vec
// $example off$
import org.apache.spark.sql.SparkSession
object Word2VecExample {
def main(args: Array[String]) {
val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp")
val spark = SparkSession
.builder
.appName("Word2Vec example").config(spConfig)
.getOrCreate()
val documentDF1 = spark.createDataFrame(Seq(
"Hi I heard about Spark".split(" "),
"I wish Java could use case classes".split(" "),
"Logistic regression models are neat".split(" ")
).map(Tuple1.apply))
documentDF1.show(1)
val documentDF = spark.createDataFrame(Seq(
"Hi I heard about Spark".split(" "),
"I wish Java could use case classes".split(" "),
"Logistic regression models are neat".split(" ")
).map(Tuple1.apply)).toDF("text")
// Learn a mapping from words to Vectors.
val word2Vec = new Word2Vec()
.setInputCol("text")
.setOutputCol("result")
.setVectorSize(3)
.setMinCount(0)
val model = word2Vec.fit(documentDF)
val result = model.transform(documentDF)
result.select("result").take(3).foreach(println)
// $example off$
spark.stop()
}
}
// scalastyle:on println
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:43,代码来源:Word2VecExample.scala
示例4: Word2Vec
//设置package包名称以及导入依赖的类
package com.lhcg.ml
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
import org.apache.spark.ml.feature.Word2Vec
object Word2Vec {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Word2Vec")
// .setMaster("local[2]")
val spark = new SparkContext(conf)
val sqlContext = new SQLContext(spark)
// Input data: Each row is a bag of words from a sentence or document.
val documentDF = sqlContext.createDataFrame(Seq(
"Hi I heard about Spark".split(" "),
"I wish Java could use case classes".split(" "),
"Logistic regression models are neat".split(" ")
).map(Tuple1.apply)).toDF("text")
// Learn a mapping from words to Vectors.
val word2Vec = new Word2Vec()
.setInputCol("text")
.setOutputCol("result")
.setVectorSize(3)
.setMinCount(0)
val model = word2Vec.fit(documentDF)
val result = model.transform(documentDF)
result.select("result").take(3).foreach(println)
}
}
开发者ID:lhcg,项目名称:lovespark,代码行数:33,代码来源:Word2Vec.scala
示例5: Word2VecJob
//设置package包名称以及导入依赖的类
import java.util
import org.apache.spark.ml.feature.Word2Vec
import org.apache.spark.mllib.linalg.{Vector => LVector}
import io.hydrosphere.mist.api._
import io.hydrosphere.mist.api.ml._
import org.apache.spark.ml.Pipeline
import org.apache.spark.sql.SparkSession
object Word2VecJob extends MLMistJob {
def session: SparkSession = SparkSession
.builder()
.appName(context.appName)
.config(context.getConf)
.getOrCreate()
def train(savePath: String): Map[String, Any] = {
val documentDF = session.createDataFrame(Seq(
"Hi I heard about Spark".split(" "),
"I wish Java could use case classes".split(" "),
"Logistic regression models are neat".split(" ")
).map(Tuple1.apply)).toDF("text")
// Learn a mapping from words to Vectors.
val word2Vec = new Word2Vec()
.setInputCol("text")
.setOutputCol("result")
.setVectorSize(3)
.setMinCount(0)
val pipeline = new Pipeline().setStages(Array(word2Vec))
val model = pipeline.fit(documentDF)
model.write.overwrite().save(savePath)
Map.empty
}
def serve(modelPath: String, features: List[String]): Map[String, Any] = {
import LocalPipelineModel._
val pipeline = PipelineLoader.load(modelPath)
val data = LocalData(LocalDataColumn("text", features))
val result = pipeline.transform(data)
val response = result.select("result").toMapList.map(rowMap => {
val mapped = rowMap("result").asInstanceOf[LVector].toArray
rowMap + ("result" -> mapped)
})
Map("result" -> response)
}
}
开发者ID:Hydrospheredata,项目名称:mist,代码行数:53,代码来源:Word2VecJob.scala
注:本文中的org.apache.spark.ml.feature.Word2Vec类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论