本文整理汇总了Scala中org.apache.log4j.Logger类的典型用法代码示例。如果您正苦于以下问题:Scala Logger类的具体用法?Scala Logger怎么用?Scala Logger使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Logger类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。
示例1: LinearRegressionPipeline
//设置package包名称以及导入依赖的类
package org.sparksamples.regression.bikesharing
import org.apache.log4j.Logger
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature.{VectorAssembler, VectorIndexer}
import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.mllib.evaluation.RegressionMetrics
import org.apache.spark.sql.{DataFrame, SparkSession}
object LinearRegressionPipeline {
@transient lazy val logger = Logger.getLogger(getClass.getName)
def linearRegressionWithVectorFormat(vectorAssembler: VectorAssembler, vectorIndexer: VectorIndexer, dataFrame: DataFrame) = {
val lr = new LinearRegression()
.setFeaturesCol("features")
.setLabelCol("label")
.setRegParam(0.1)
.setElasticNetParam(1.0)
.setMaxIter(10)
val pipeline = new Pipeline().setStages(Array(vectorAssembler, vectorIndexer, lr))
val Array(training, test) = dataFrame.randomSplit(Array(0.8, 0.2), seed = 12345)
val model = pipeline.fit(training)
val fullPredictions = model.transform(test).cache()
val predictions = fullPredictions.select("prediction").rdd.map(_.getDouble(0))
val labels = fullPredictions.select("label").rdd.map(_.getDouble(0))
val RMSE = new RegressionMetrics(predictions.zip(labels)).rootMeanSquaredError
println(s" Root mean squared error (RMSE): $RMSE")
}
def linearRegressionWithSVMFormat(spark: SparkSession) = {
// Load training data
val training = spark.read.format("libsvm")
.load("./src/main/scala/org/sparksamples/regression/dataset/BikeSharing/lsvmHours.txt")
val lr = new LinearRegression()
.setMaxIter(10)
.setRegParam(0.3)
.setElasticNetParam(0.8)
// Fit the model
val lrModel = lr.fit(training)
// Print the coefficients and intercept for linear regression
println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
// Summarize the model over the training set and print out some metrics
val trainingSummary = lrModel.summary
println(s"numIterations: ${trainingSummary.totalIterations}")
println(s"objectiveHistory: ${trainingSummary.objectiveHistory.toList}")
trainingSummary.residuals.show()
println(s"RMSE: ${trainingSummary.rootMeanSquaredError}")
println(s"r2: ${trainingSummary.r2}")
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:61,代码来源:LinearRegressionPipeline.scala
示例2: MyApp
//设置package包名称以及导入依赖的类
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.log4j.{Level, Logger}
object MyApp {
def main(args: Array[String]) {
val rootLogger = Logger.getRootLogger()
rootLogger.setLevel(Level.ERROR)
val file = args(0)
val conf = new SparkConf(true).setAppName("My Application")
val sc = new SparkContext(conf)
val tf = sc.textFile(file,2)
val splits = tf.flatMap(line => line.split(" ")).map(word =>(word,1))
val counts = splits.reduceByKey((x,y)=>x+y)
System.out.println(counts.collect().mkString(", "))
sc.stop()
}
}
开发者ID:gibrano,项目名称:docker-spark,代码行数:20,代码来源:myapp.scala
示例3: KMeansCases
//设置package包名称以及导入依赖的类
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.log4j.{Level, Logger}
class KMeansCases(sc: SparkContext, dataFile: String, numOfCenters: Int, maxIterations:Int) {
//hide logger from console
Logger.getLogger("org").setLevel(Level.OFF)
Logger.getLogger("akka").setLevel(Level.OFF)
val data = sc.textFile(dataFile)
val parsedData = data.map(s => Vectors.dense(s.split('\t').map(_.toDouble))).cache()
def KMeansInitialCenters() = {
val initStartTime = System.nanoTime()
val centers = new KMeansInitialization().run(sc, dataFile, numOfCenters)
val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
println(s"Initialization to find centers took " + "%.3f".format(initTimeInSeconds) + " seconds.")
val initStartTime1 = System.nanoTime()
val model = new KMeansModel(centers)
val clusterModel = new KMeans().setK(numOfCenters).setMaxIterations(maxIterations).setInitialModel(model).run(parsedData)
val initTimeInSeconds1 = (System.nanoTime() - initStartTime1) / 1e9
println(s"Initialization with custom took " + "%.3f".format(initTimeInSeconds1) + " seconds.")
println("\nnumber of points per cluster")
clusterModel.predict(parsedData).map(x=>(x,1)).reduceByKey((a,b)=>a+b).foreach(x=>println(x._2))
}
def KMeansParallel() = {
val initStartTime = System.nanoTime()
val clusterModel = KMeans.train(parsedData, numOfCenters, maxIterations, 1, KMeans.K_MEANS_PARALLEL)
val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
println(s"Initialization with KMeansParaller took " + "%.3f".format(initTimeInSeconds) + " seconds.")
println("number of points per cluster")
clusterModel.predict(parsedData).map(x=>(x,1)).reduceByKey((a,b)=>a+b).foreach(x=>println(x._2))
}
def KMeansRandom() = {
val initStartTime = System.nanoTime()
val clusterModel = KMeans.train(parsedData, numOfCenters, maxIterations, 1, KMeans.RANDOM)
val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
println(s"Initialization with KMeasRandom took " + "%.3f".format(initTimeInSeconds) + " seconds.")
println("number of points per cluster")
clusterModel.predict(parsedData).map(x=>(x,1)).reduceByKey((a,b)=>a+b).foreach(x=>println(x._2))
}
}
开发者ID:AndyFou,项目名称:kmeans_contributions,代码行数:52,代码来源:KMeansCases.scala
示例4: driver
//设置package包名称以及导入依赖的类
import java.io._
import utils._
import SMOTE._
import org.apache.log4j.Logger
import org.apache.log4j.Level
import breeze.linalg._
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.collection.mutable.{ArrayBuffer,Map}
object driver {
def main(args: Array[String]) {
val conf = new SparkConf()
val options = args.map { arg =>
arg.dropWhile(_ == '-').split('=') match {
case Array(opt, v) => (opt -> v)
case Array(opt) => (opt -> "")
case _ => throw new IllegalArgumentException("Invalid argument: "+arg)
}
}.toMap
val rootLogger = Logger.getRootLogger()
rootLogger.setLevel(Level.ERROR)
val sc = new SparkContext(conf)
// read in general inputs
val inputDirectory = options.getOrElse("inputDirectory","")
val outputDirectory = options.getOrElse("outputDirectory","")
val numFeatures = options.getOrElse("numFeatures","0").toInt
val oversamplingPctg = options.getOrElse("oversamplingPctg","1.0").toDouble
val kNN = options.getOrElse("K","5").toInt
val delimiter = options.getOrElse("delimiter",",")
val numPartitions = options.getOrElse("numPartitions","20").toInt
SMOTE.runSMOTE(sc, inputDirectory, outputDirectory, numFeatures, oversamplingPctg, kNN, delimiter, numPartitions)
println("The algorithm has finished running")
sc.stop()
}
}
开发者ID:anathan90,项目名称:SparkSMOTE,代码行数:46,代码来源:driver.scala
示例5: info
//设置package包名称以及导入依赖的类
package com.crawler.logger
import java.beans.Transient
import net.logstash.log4j.JSONEventLayoutV1
import org.apache.log4j.Logger
import org.joda.time.DateTime
import scala.collection.mutable
trait CrawlerLogger {
def info(string: Any)
def debug(string: Any)
def trace(string: Any)
def error(string: Any)
def warning(string: Any)
}
object CrawlerLoggerFactory {
private val loggers = mutable.HashMap[String, CrawlerLogger]()
def logger(appname: String, folder: String = "apps") = synchronized {
if (loggers.contains(appname)) loggers(appname)
else {
val newLogger = new CrawlerLoggerLog4j(appname, s"$folder/$appname")
loggers(appname) = newLogger
newLogger
}
}
}
class CrawlerLoggerLog4j(loggerName: String, filename: String) extends CrawlerLogger {
@Transient val logger = Logger.getLogger(loggerName)
logger.addAppender(new org.apache.log4j.RollingFileAppender(new JSONEventLayoutV1(), s"logs/$filename-${new DateTime().toString("yyyy-MM-dd-HH-mm")}"))
override def info(message: Any) = logger.info(message)
override def debug(message: Any) = logger.debug(message)
override def trace(message: Any) = logger.trace(message)
override def error(message: Any) = logger.error(message)
override def warning(message: Any) = logger.warn(message)
}
开发者ID:mxmptv,项目名称:osn.crawler,代码行数:42,代码来源:CrawlerLogger.scala
示例6: DecisionTreePipeline
//设置package包名称以及导入依赖的类
package org.stumbleuponclassifier
import org.apache.log4j.Logger
import org.apache.spark.ml.classification.DecisionTreeClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}
import org.apache.spark.ml.{Pipeline, PipelineStage}
import org.apache.spark.sql.DataFrame
import scala.collection.mutable
object DecisionTreePipeline {
@transient lazy val logger = Logger.getLogger(getClass.getName)
def decisionTreePipeline(vectorAssembler: VectorAssembler, dataFrame: DataFrame) = {
val Array(training, test) = dataFrame.randomSplit(Array(0.9, 0.1), seed = 12345)
// Set up Pipeline
val stages = new mutable.ArrayBuffer[PipelineStage]()
val labelIndexer = new StringIndexer()
.setInputCol("label")
.setOutputCol("indexedLabel")
stages += labelIndexer
val dt = new DecisionTreeClassifier()
.setFeaturesCol(vectorAssembler.getOutputCol)
.setLabelCol("indexedLabel")
.setMaxDepth(5)
.setMaxBins(32)
.setMinInstancesPerNode(1)
.setMinInfoGain(0.0)
.setCacheNodeIds(false)
.setCheckpointInterval(10)
stages += vectorAssembler
stages += dt
val pipeline = new Pipeline().setStages(stages.toArray)
// Fit the Pipeline
val startTime = System.nanoTime()
//val model = pipeline.fit(training)
val model = pipeline.fit(dataFrame)
val elapsedTime = (System.nanoTime() - startTime) / 1e9
println(s"Training time: $elapsedTime seconds")
//val holdout = model.transform(test).select("prediction","label")
val holdout = model.transform(dataFrame).select("prediction","label")
// Select (prediction, true label) and compute test error
val evaluator = new MulticlassClassificationEvaluator()
.setLabelCol("label")
.setPredictionCol("prediction")
.setMetricName("accuracy")
val mAccuracy = evaluator.evaluate(holdout)
println("Test set accuracy = " + mAccuracy)
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:60,代码来源:DecisionTreePipeline.scala
示例7: SVMPipeline
//设置package包名称以及导入依赖的类
package org.stumbleuponclassifier
import org.apache.log4j.Logger
import org.apache.spark.SparkContext
import org.apache.spark.mllib.classification.SVMWithSGD
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
object SVMPipeline {
@transient lazy val logger = Logger.getLogger(getClass.getName)
def svmPipeline(sc: SparkContext) = {
val records = sc.textFile("/home/ubuntu/work/ml-resources/spark-ml/train_noheader.tsv").map(line => line.split("\t"))
val data = records.map { r =>
val trimmed = r.map(_.replaceAll("\"", ""))
val label = trimmed(r.size - 1).toInt
val features = trimmed.slice(4, r.size - 1).map(d => if (d == "?") 0.0 else d.toDouble)
LabeledPoint(label, Vectors.dense(features))
}
// params for SVM
val numIterations = 10
// Run training algorithm to build the model
val svmModel = SVMWithSGD.train(data, numIterations)
// Clear the default threshold.
svmModel.clearThreshold()
val svmTotalCorrect = data.map { point =>
if(svmModel.predict(point.features) == point.label) 1 else 0
}.sum()
// calculate accuracy
val svmAccuracy = svmTotalCorrect / data.count()
println(svmAccuracy)
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:42,代码来源:SVMPipeline.scala
示例8: LogUtils
//设置package包名称以及导入依赖的类
package org.iamShantanu101.spark.SentimentAnalyzer.utils
import org.apache.log4j.{Level, Logger}
import org.apache.spark.{Logging, SparkContext}
object LogUtils extends Logging {
def setLogLevels(sparkContext: SparkContext) {
sparkContext.setLogLevel(Level.WARN.toString)
val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
if (!log4jInitialized) {
logInfo(
"""Setting log level to [WARN] for streaming executions.
|To override add a custom log4j.properties to the classpath.""".stripMargin)
Logger.getRootLogger.setLevel(Level.WARN)
}
}
}
开发者ID:iamShantanu101,项目名称:Twitter-Sentiment-Analyzer-Apache-Spark-Mllib,代码行数:21,代码来源:LogUtils.scala
示例9: CSVUtil
//设置package包名称以及导入依赖的类
package com.springml.spark.netsuite.util
import com.springml.spark.netsuite.model.XPathInput
import org.apache.log4j.Logger
object CSVUtil {
@transient val logger = Logger.getLogger(this.getClass.getName)
def readCSV(csvLocation : String) : Map[String, String] = {
var resultMap : Map[String, String] = Map.empty
if (csvLocation != null) {
val bufferedSource = scala.io.Source.fromFile(csvLocation)
for (line <- bufferedSource.getLines) {
if (!line.startsWith("#")) {
val cols = line.split(",").map(_.trim)
if (cols.length != 2) {
throw new Exception("Invalid Row : " + line + "\n Please make sure rows are specified as 'Key','Value' in " + csvLocation)
}
resultMap += cols(0) -> cols(1)
} else {
logger.info("Ignoring commented line " + line)
}
}
bufferedSource.close()
}
resultMap
}
}
开发者ID:springml,项目名称:spark-netsuite,代码行数:35,代码来源:CSVUtil.scala
示例10: Logging
//设置package包名称以及导入依赖的类
package org.hpi.esb.commons.util
import org.apache.log4j.{Level, Logger}
trait Logging {
var logger: Logger = Logger.getLogger("senskaLogger")
}
object Logging {
def setToInfo() {
Logger.getRootLogger.setLevel(Level.INFO)
}
def setToDebug() {
Logger.getRootLogger.setLevel(Level.DEBUG)
}
}
开发者ID:BenReissaus,项目名称:EnterpriseStreamingBenchmark,代码行数:19,代码来源:Logging.scala
示例11: PrepArgParser
//设置package包名称以及导入依赖的类
import org.apache.log4j.{Level, Logger}
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.{SparkConf, SparkContext}
import utils.Utils
import scala.util.Try
import scalax.file.Path
class PrepArgParser(arguments: Seq[String]) extends org.rogach.scallop.ScallopConf(arguments) {
val dataset = opt[String](required = true, short = 'd',
descr = "absolute address of the libsvm dataset. This must be provided.")
val partitions = opt[Int](required = false, default = Some(4), short = 'p', validate = (0 <),
descr = "Number of spark partitions to be used. Optional.")
val dir = opt[String](required = true, default = Some("../results/"), short = 'w', descr = "working directory where results " +
"are stored. Default is \"../results\". ")
val method = opt[String](required = true, short = 'm',
descr = "Method can be either \"Regression\" or \"Classification\". This must be provided")
verify()
}
object PrepareData {
def main(args: Array[String]) {
//Spark conf
val conf = new SparkConf().setAppName("Distributed Machine Learning").setMaster("local[*]")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
//Turn off logs
val rootLogger = Logger.getRootLogger()
rootLogger.setLevel(Level.ERROR)
//Parse arguments
val parser = new PrepArgParser(args)
val dataset = parser.dataset()
var workingDir = parser.dir()
val numPartitions = parser.partitions()
val method = parser.method()
//Load data
val (train, test) = method match {
case "Classification" => Utils.loadAbsolutLibSVMBinaryClassification(dataset, numPartitions, sc)
case "Regression" => Utils.loadAbsolutLibSVMRegression(dataset, numPartitions, sc)
case _ => throw new IllegalArgumentException("The method " + method + " is not supported.")
}
// append "/" to workingDir if necessary
workingDir = workingDir + ( if (workingDir.takeRight(1) != "/") "/" else "" )
val trainPath: Path = Path.fromString(workingDir + "train")
Try(trainPath.deleteRecursively(continueOnFailure = false))
val testPath: Path = Path.fromString(workingDir + "test")
Try(testPath.deleteRecursively(continueOnFailure = false))
MLUtils.saveAsLibSVMFile(train, workingDir + "train")
MLUtils.saveAsLibSVMFile(test, workingDir + "test")
}
}
开发者ID:mlbench,项目名称:mlbench,代码行数:56,代码来源:PrepareData.scala
示例12: SparkPi
//设置package包名称以及导入依赖的类
package com.zmyuan.spark.submit
import kafka.utils.Logging
import org.apache.log4j.Logger
import scala.math.random
import org.apache.spark.sql.SparkSession
object SparkPi {
val loggerName = this.getClass.getName
lazy val logger = Logger.getLogger(loggerName)
def main(args: Array[String]) {
val spark = SparkSession
.builder
.appName("Spark Pi")
.getOrCreate()
val slices = if (args.length > 0) args(0).toInt else 2
val n = math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow
val count = spark.sparkContext.parallelize(1 until n, slices).map { i =>
val x = random * 2 - 1
val y = random * 2 - 1
if (x*x + y*y < 1) 1 else 0
}.reduce(_ + _)
logger.info("Pi is roughly " + 4.0 * count / (n - 1))
spark.stop()
}
}
// scalastyle:on println
开发者ID:zhudebin,项目名称:spark-submit,代码行数:32,代码来源:SparkPi.scala
示例13: Helper
//设置package包名称以及导入依赖的类
import org.apache.log4j.Logger
import org.apache.log4j.Level
import scala.collection.mutable.HashMap
object Helper {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger
.getLogger("org.apache.spark.storage.BlockManager")
.setLevel(Level.ERROR)
def configureTwitterCredentials(apiKey: String,
apiSecret: String,
accessToken: String,
accessTokenSecret: String) {
val configs = Map(
"apiKey" -> apiKey,
"apiSecret" -> apiSecret,
"accessToken" -> accessToken,
"accessTokenSecret" -> accessTokenSecret
)
println("Configuring Twitter OAuth")
configs.foreach {
case (key, value) =>
if (value.trim.isEmpty) {
throw new Exception(
"Error setting authentication - value for " + key + " not set -> value is " + value)
}
val fullKey = "twitter4j.oauth." + key.replace("api", "consumer")
System.setProperty(fullKey, value.trim)
println("\tProperty " + fullKey + " set as [" + value.trim + "]")
}
println()
}
}
开发者ID:joined,项目名称:ET4310-SupercomputingForBigData,代码行数:39,代码来源:Helper.scala
示例14: ModelBuilder
//设置package包名称以及导入依赖的类
package org.wikimedia.research.recommendation.job.translation
import java.io.File
import org.apache.log4j.{LogManager, Logger}
import org.apache.spark.sql.{DataFrame, SparkSession}
import scala.collection.parallel.mutable.ParArray
object ModelBuilder {
val log: Logger = LogManager.getLogger(ModelBuilder.getClass)
def buildModels(spark: SparkSession,
modelsOutputDir: Option[File],
sites: ParArray[String],
featureData: DataFrame): Unit = {
log.info("Building Models")
sites.foreach(target =>
try {
log.info("Building model for " + target)
log.info("Getting work data for " + target)
val workData: DataFrame = Utils.getWorkData(spark, featureData, target)
val Array(trainingData, testData) = workData.randomSplit(Array(0.7, 0.3))
log.info("Training model for " + target)
val model = Utils.REGRESSOR.fit(trainingData)
log.info("Writing model to file for " + target)
modelsOutputDir.foreach(o => model.write.save(new File(o, target).getAbsolutePath))
log.info("Testing model for " + target)
val predictions = model.transform(testData)
val rmse = Utils.EVALUATOR.evaluate(predictions)
log.info("Root Mean Squared Error (RMSE) on test data for " + target + " = " + rmse)
} catch {
case unknown: Throwable => log.error("Build model for " + target + " failed", unknown)
}
)
}
}
开发者ID:schana,项目名称:recommendation-translation,代码行数:41,代码来源:ModelBuilder.scala
示例15: ScorePredictor
//设置package包名称以及导入依赖的类
package org.wikimedia.research.recommendation.job.translation
import java.io.File
import org.apache.log4j.{LogManager, Logger}
import org.apache.spark.ml.regression.RandomForestRegressionModel
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession}
import scala.collection.parallel.mutable.ParArray
object ScorePredictor {
val log: Logger = LogManager.getLogger(ScorePredictor.getClass)
def predictScores(spark: SparkSession,
modelsInputDir: File,
predictionsOutputDir: Option[File],
sites: ParArray[String],
featureData: DataFrame): Unit = {
log.info("Scoring items")
val predictions: Array[DataFrame] = sites.map(target => {
try {
log.info("Scoring for " + target)
log.info("Getting work data for " + target)
val workData: DataFrame = Utils.getWorkData(spark, featureData, target, exists = false)
log.info("Loading model for " + target)
val model = RandomForestRegressionModel.load(
new File(modelsInputDir, target).getAbsolutePath)
log.info("Scoring data for " + target)
val predictions = model
.setPredictionCol(target)
.transform(workData)
.select("id", target)
predictions
} catch {
case unknown: Throwable =>
log.error("Score for " + target + " failed", unknown)
val schema = StructType(Seq(
StructField("id", StringType, nullable = false),
StructField(target, DoubleType, nullable = true)))
spark.createDataFrame(spark.sparkContext.emptyRDD[Row], schema)
}
}).toArray
val predictedScores = predictions.reduce((left, right) => left.join(right, Seq("id"), "outer"))
log.info("Saving predictions")
predictionsOutputDir.foreach(f = o =>
predictedScores.coalesce(1)
.write
.mode(SaveMode.ErrorIfExists)
.option("header", value = true)
.option("compression", "bzip2")
.csv(new File(o, "allPredictions").getAbsolutePath))
}
}
开发者ID:schana,项目名称:recommendation-translation,代码行数:59,代码来源:ScorePredictor.scala
示例16: JobRunner
//设置package包名称以及导入依赖的类
package org.wikimedia.research.recommendation.job.translation
import java.io.File
import java.text.SimpleDateFormat
import java.util.Calendar
import org.apache.log4j.{Level, LogManager, Logger}
import org.apache.spark.sql._
import scala.collection.parallel.ForkJoinTaskSupport
import scala.concurrent.forkjoin.ForkJoinPool
object JobRunner {
val log: Logger = LogManager.getLogger(JobRunner.getClass)
val SITE_PARALLELISM = 8
def main(args: Array[String]): Unit = {
log.info("Starting")
val params = ArgParser.parseArgs(args)
var sparkBuilder = SparkSession
.builder()
.appName("TranslationRecommendations")
if (params.runLocally) {
sparkBuilder = sparkBuilder.master("local[*]")
}
val spark = sparkBuilder.getOrCreate()
Logger.getLogger("org").setLevel(Level.WARN)
Logger.getLogger("org.wikimedia").setLevel(Level.INFO)
val timestamp = new SimpleDateFormat("yyyy-MM-dd-HHmmss").format(Calendar.getInstance.getTime)
log.info("Timestamp for creating files: " + timestamp)
if (params.scoreItems) {
val modelsInputDir = params.modelsDir.getOrElse(modelsOutputDir.get)
val predictionsOutputDir = params.outputDir.map(o => new File(o, timestamp + "_predictions"))
predictionsOutputDir.foreach(o => o.mkdir())
ScorePredictor.predictScores(spark, modelsInputDir, predictionsOutputDir, modelSites, featureData)
}
spark.stop()
log.info("Finished")
}
}
开发者ID:schana,项目名称:recommendation-translation,代码行数:46,代码来源:JobRunner.scala
示例17: BorderFlowClustering
//设置package包名称以及导入依赖的类
package net.sansa_stack.examples.spark.ml.clustering
import scala.collection.mutable
import org.apache.spark.sql.SparkSession
import org.apache.log4j.{ Level, Logger }
import net.sansa_stack.ml.spark.clustering.BorderFlow
object BorderFlowClustering {
def main(args: Array[String]) = {
if (args.length < 1) {
System.err.println(
"Usage: BorderFlow <input> ")
System.exit(1)
}
val input = args(0) //"src/main/resources/BorderFlow_Sample1.txt"
val optionsList = args.drop(1).map { arg =>
arg.dropWhile(_ == '-').split('=') match {
case Array(opt, v) => (opt -> v)
case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
}
}
val options = mutable.Map(optionsList: _*)
options.foreach {
case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
}
println("============================================")
println("| Border Flow example |")
println("============================================")
val spark = SparkSession.builder
.master("local[*]")
.appName(" BorderFlow example (" + input + ")")
.getOrCreate()
Logger.getRootLogger.setLevel(Level.ERROR)
BorderFlow(spark, input)
spark.stop
}
}
开发者ID:SANSA-Stack,项目名称:SANSA-Examples,代码行数:42,代码来源:BorderFlowClustering.scala
示例18: RDFGraphPIClustering
//设置package包名称以及导入依赖的类
package net.sansa_stack.examples.spark.ml.clustering
import scala.collection.mutable
import org.apache.spark.sql.SparkSession
import org.apache.log4j.{ Level, Logger }
import org.apache.spark.graphx.GraphLoader
import net.sansa_stack.ml.spark.clustering.{ RDFGraphPICClustering => RDFGraphPICClusteringAlg }
object RDFGraphPIClustering {
def main(args: Array[String]) = {
if (args.length < 3) {
System.err.println(
"Usage: RDFGraphPIClustering <input> <k> <numIterations>")
System.exit(1)
}
val input = args(0) //"src/main/resources/BorderFlow_Sample1.txt"
val k = args(1).toInt
val numIterations = args(2).toInt
val optionsList = args.drop(3).map { arg =>
arg.dropWhile(_ == '-').split('=') match {
case Array(opt, v) => (opt -> v)
case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
}
}
val options = mutable.Map(optionsList: _*)
options.foreach {
case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
}
println("============================================")
println("| Power Iteration Clustering example |")
println("============================================")
val sparkSession = SparkSession.builder
.master("local[*]")
.appName(" Power Iteration Clustering example (" + input + ")")
.getOrCreate()
Logger.getRootLogger.setLevel(Level.ERROR)
// Load the graph
val graph = GraphLoader.edgeListFile(sparkSession.sparkContext, input)
val model = RDFGraphPICClusteringAlg(sparkSession, graph, k, numIterations).run()
val clusters = model.assignments.collect().groupBy(_.cluster).mapValues(_.map(_.id))
val assignments = clusters.toList.sortBy { case (k, v) => v.length }
val assignmentsStr = assignments
.map {
case (k, v) =>
s"$k -> ${v.sorted.mkString("[", ",", "]")}"
}.mkString(",")
val sizesStr = assignments.map {
_._2.size
}.sorted.mkString("(", ",", ")")
println(s"Cluster assignments: $assignmentsStr\ncluster sizes: $sizesStr")
sparkSession.stop
}
}
开发者ID:SANSA-Stack,项目名称:SANSA-Examples,代码行数:61,代码来源:RDFGraphPIClustering.scala
示例19: RDFByModularityClustering
//设置package包名称以及导入依赖的类
package net.sansa_stack.examples.spark.ml.clustering
import scala.collection.mutable
import org.apache.spark.sql.SparkSession
import org.apache.log4j.{ Level, Logger }
import net.sansa_stack.ml.spark.clustering.{ RDFByModularityClustering => RDFByModularityClusteringAlg }
object RDFByModularityClustering {
def main(args: Array[String]) = {
if (args.length < 3) {
System.err.println(
"Usage: RDFByModularityClustering <input> <output> <numIterations>")
System.exit(1)
}
val graphFile = args(0) //"src/main/resources/Clustering_sampledata.nt",
val outputFile = args(1)
val numIterations = args(2).toInt
val optionsList = args.drop(3).map { arg =>
arg.dropWhile(_ == '-').split('=') match {
case Array(opt, v) => (opt -> v)
case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
}
}
val options = mutable.Map(optionsList: _*)
options.foreach {
case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
}
println("============================================")
println("| RDF By Modularity Clustering example |")
println("============================================")
val sparkSession = SparkSession.builder
.master("local[*]")
.appName(" RDF By Modularity Clustering example (" + graphFile + ")")
.getOrCreate()
Logger.getRootLogger.setLevel(Level.ERROR)
RDFByModularityClusteringAlg(sparkSession.sparkContext, numIterations, graphFile, outputFile)
sparkSession.stop
}
}
开发者ID:SANSA-Stack,项目名称:SANSA-Examples,代码行数:47,代码来源:RDFByModularityClustering.scala
示例20: MyLog1
//设置package包名称以及导入依赖的类
package com.chapter14.Serilazition
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.log4j.LogManager
import org.apache.log4j.Level
import org.apache.log4j.Logger
object MyLog1 extends Serializable {
def main(args: Array[String]):Unit= {
// Stting logger level as WARN
val log = LogManager.getRootLogger
log.setLevel(Level.WARN)
@transient lazy val log2 = org.apache.log4j.LogManager.getLogger("myLogger")
// Creating Spark Context
val conf = new SparkConf().setAppName("My App").setMaster("local[*]")
val sc = new SparkContext(conf)
//Started the computation and printing the logging inforamtion
//log.warn("Started")
//val i = 0
val data = sc.parallelize(0 to 100000)
data.foreach(i => log.info("My number"+ i))
data.collect()
log.warn("Finished")
}
}
开发者ID:PacktPublishing,项目名称:Scala-and-Spark-for-Big-Data-Analytics,代码行数:28,代码来源:MyLog.scala
注:本文中的org.apache.log4j.Logger类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论