Scala Logger类代码示例

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› Scala›Scala教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了Scala中org.apache.log4j.Logger类的典型用法代码示例。如果您正苦于以下问题：Scala Logger类的具体用法？Scala Logger怎么用？Scala Logger使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了Logger类的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: LinearRegressionPipeline

//设置package包名称以及导入依赖的类
package org.sparksamples.regression.bikesharing

import org.apache.log4j.Logger
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature.{VectorAssembler, VectorIndexer}
import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.mllib.evaluation.RegressionMetrics
import org.apache.spark.sql.{DataFrame, SparkSession}


object LinearRegressionPipeline {
  @transient lazy val logger = Logger.getLogger(getClass.getName)

  def linearRegressionWithVectorFormat(vectorAssembler: VectorAssembler, vectorIndexer: VectorIndexer, dataFrame: DataFrame) = {
    val lr = new LinearRegression()
      .setFeaturesCol("features")
      .setLabelCol("label")
      .setRegParam(0.1)
      .setElasticNetParam(1.0)
      .setMaxIter(10)

    val pipeline = new Pipeline().setStages(Array(vectorAssembler, vectorIndexer, lr))

    val Array(training, test) = dataFrame.randomSplit(Array(0.8, 0.2), seed = 12345)

    val model = pipeline.fit(training)

    val fullPredictions = model.transform(test).cache()
    val predictions = fullPredictions.select("prediction").rdd.map(_.getDouble(0))
    val labels = fullPredictions.select("label").rdd.map(_.getDouble(0))
    val RMSE = new RegressionMetrics(predictions.zip(labels)).rootMeanSquaredError
    println(s"  Root mean squared error (RMSE): $RMSE")
  }

  def linearRegressionWithSVMFormat(spark: SparkSession) = {
    // Load training data
    val training = spark.read.format("libsvm")
      .load("./src/main/scala/org/sparksamples/regression/dataset/BikeSharing/lsvmHours.txt")

    val lr = new LinearRegression()
      .setMaxIter(10)
      .setRegParam(0.3)
      .setElasticNetParam(0.8)

    // Fit the model
    val lrModel = lr.fit(training)

    // Print the coefficients and intercept for linear regression
    println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")

    // Summarize the model over the training set and print out some metrics
    val trainingSummary = lrModel.summary
    println(s"numIterations: ${trainingSummary.totalIterations}")
    println(s"objectiveHistory: ${trainingSummary.objectiveHistory.toList}")
    trainingSummary.residuals.show()
    println(s"RMSE: ${trainingSummary.rootMeanSquaredError}")

    println(s"r2: ${trainingSummary.r2}")
  }
}

开发者ID:PacktPublishing，项目名称:Machine-Learning-with-Spark-Second-Edition，代码行数:61，代码来源:LinearRegressionPipeline.scala

示例2: MyApp

//设置package包名称以及导入依赖的类
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.log4j.{Level, Logger}

object MyApp {
  def main(args: Array[String]) {
    val rootLogger = Logger.getRootLogger()
    rootLogger.setLevel(Level.ERROR)
    val file = args(0)
    val conf = new SparkConf(true).setAppName("My Application")
    val sc = new SparkContext(conf)
    val tf = sc.textFile(file,2)
    val splits = tf.flatMap(line => line.split(" ")).map(word =>(word,1))
    val counts = splits.reduceByKey((x,y)=>x+y)
    System.out.println(counts.collect().mkString(", "))
    sc.stop()
  }
}

开发者ID:gibrano，项目名称:docker-spark，代码行数:20，代码来源:myapp.scala

示例3: KMeansCases

//设置package包名称以及导入依赖的类
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.log4j.{Level, Logger}

class KMeansCases(sc: SparkContext, dataFile: String, numOfCenters: Int, maxIterations:Int) {
  //hide logger from console
  Logger.getLogger("org").setLevel(Level.OFF)
  Logger.getLogger("akka").setLevel(Level.OFF)

  val data = sc.textFile(dataFile)
  val parsedData = data.map(s => Vectors.dense(s.split('\t').map(_.toDouble))).cache()

  def KMeansInitialCenters() = {
    val initStartTime = System.nanoTime()
    val centers = new KMeansInitialization().run(sc, dataFile, numOfCenters)
    val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
    println(s"Initialization to find centers took " + "%.3f".format(initTimeInSeconds) + " seconds.")

    val initStartTime1 = System.nanoTime()
    val model = new KMeansModel(centers)
    val clusterModel = new KMeans().setK(numOfCenters).setMaxIterations(maxIterations).setInitialModel(model).run(parsedData)
    val initTimeInSeconds1 = (System.nanoTime() - initStartTime1) / 1e9
    println(s"Initialization with custom took " + "%.3f".format(initTimeInSeconds1) + " seconds.")

    println("\nnumber of points per cluster")
    clusterModel.predict(parsedData).map(x=>(x,1)).reduceByKey((a,b)=>a+b).foreach(x=>println(x._2))

  }

  def KMeansParallel() = {
    val initStartTime = System.nanoTime()

    val clusterModel = KMeans.train(parsedData, numOfCenters, maxIterations, 1, KMeans.K_MEANS_PARALLEL)
    val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
    println(s"Initialization with KMeansParaller took " + "%.3f".format(initTimeInSeconds) + " seconds.")
    println("number of points per cluster")
    clusterModel.predict(parsedData).map(x=>(x,1)).reduceByKey((a,b)=>a+b).foreach(x=>println(x._2))
  }

  def KMeansRandom() = {
    val initStartTime = System.nanoTime()

    val clusterModel = KMeans.train(parsedData, numOfCenters, maxIterations, 1, KMeans.RANDOM)
    val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
    println(s"Initialization with KMeasRandom took " + "%.3f".format(initTimeInSeconds) + " seconds.")
    println("number of points per cluster")

    clusterModel.predict(parsedData).map(x=>(x,1)).reduceByKey((a,b)=>a+b).foreach(x=>println(x._2))
  }
}

开发者ID:AndyFou，项目名称:kmeans_contributions，代码行数:52，代码来源:KMeansCases.scala

示例4: driver

//设置package包名称以及导入依赖的类
import java.io._
import utils._
import SMOTE._
import org.apache.log4j.Logger
import org.apache.log4j.Level
import breeze.linalg._
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.collection.mutable.{ArrayBuffer,Map}


object driver {
	
	def main(args: Array[String]) {
			
		val conf = new SparkConf()

		val options = args.map { arg =>
			arg.dropWhile(_ == '-').split('=') match {
				case Array(opt, v) => (opt -> v)
				case Array(opt) => (opt -> "")
				case _ => throw new IllegalArgumentException("Invalid argument: "+arg)
			}
		}.toMap

        val rootLogger = Logger.getRootLogger()
        rootLogger.setLevel(Level.ERROR)

		val sc = new SparkContext(conf)	

		// read in general inputs
		val inputDirectory = options.getOrElse("inputDirectory","")
		val outputDirectory = options.getOrElse("outputDirectory","")
		val numFeatures = options.getOrElse("numFeatures","0").toInt
		val oversamplingPctg = options.getOrElse("oversamplingPctg","1.0").toDouble
        val kNN = options.getOrElse("K","5").toInt
		val delimiter = options.getOrElse("delimiter",",")
		val numPartitions = options.getOrElse("numPartitions","20").toInt

		SMOTE.runSMOTE(sc, inputDirectory, outputDirectory, numFeatures, oversamplingPctg, kNN, delimiter, numPartitions)	

		println("The algorithm has finished running")
		sc.stop()
	}
}

开发者ID:anathan90，项目名称:SparkSMOTE，代码行数:46，代码来源:driver.scala

示例5: info

//设置package包名称以及导入依赖的类
package com.crawler.logger

import java.beans.Transient
import net.logstash.log4j.JSONEventLayoutV1
import org.apache.log4j.Logger
import org.joda.time.DateTime
import scala.collection.mutable



trait CrawlerLogger {
  def info(string: Any)
  def debug(string: Any)
  def trace(string: Any)
  def error(string: Any)
  def warning(string: Any)
}

object CrawlerLoggerFactory {
  private val loggers = mutable.HashMap[String, CrawlerLogger]()

  def logger(appname: String, folder: String = "apps") = synchronized {
    if (loggers.contains(appname)) loggers(appname)
    else {
      val newLogger = new CrawlerLoggerLog4j(appname, s"$folder/$appname")
      loggers(appname) = newLogger
      newLogger
    }
  }
}

class CrawlerLoggerLog4j(loggerName: String, filename: String) extends CrawlerLogger {
  @Transient val logger = Logger.getLogger(loggerName)
  logger.addAppender(new org.apache.log4j.RollingFileAppender(new JSONEventLayoutV1(), s"logs/$filename-${new DateTime().toString("yyyy-MM-dd-HH-mm")}"))

  override def info(message: Any) = logger.info(message)
  override def debug(message: Any) = logger.debug(message)
  override def trace(message: Any) = logger.trace(message)
  override def error(message: Any) = logger.error(message)
  override def warning(message: Any) = logger.warn(message)
}

开发者ID:mxmptv，项目名称:osn.crawler，代码行数:42，代码来源:CrawlerLogger.scala

示例6: DecisionTreePipeline

//设置package包名称以及导入依赖的类
package org.stumbleuponclassifier

import org.apache.log4j.Logger
import org.apache.spark.ml.classification.DecisionTreeClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}
import org.apache.spark.ml.{Pipeline, PipelineStage}
import org.apache.spark.sql.DataFrame

import scala.collection.mutable


object DecisionTreePipeline {
  @transient lazy val logger = Logger.getLogger(getClass.getName)

  def decisionTreePipeline(vectorAssembler: VectorAssembler, dataFrame: DataFrame) = {
    val Array(training, test) = dataFrame.randomSplit(Array(0.9, 0.1), seed = 12345)

    // Set up Pipeline
    val stages = new mutable.ArrayBuffer[PipelineStage]()

    val labelIndexer = new StringIndexer()
      .setInputCol("label")
      .setOutputCol("indexedLabel")
    stages += labelIndexer

    val dt = new DecisionTreeClassifier()
      .setFeaturesCol(vectorAssembler.getOutputCol)
      .setLabelCol("indexedLabel")
      .setMaxDepth(5)
      .setMaxBins(32)
      .setMinInstancesPerNode(1)
      .setMinInfoGain(0.0)
      .setCacheNodeIds(false)
      .setCheckpointInterval(10)

    stages += vectorAssembler
    stages += dt
    val pipeline = new Pipeline().setStages(stages.toArray)

    // Fit the Pipeline
    val startTime = System.nanoTime()
    //val model = pipeline.fit(training)
    val model = pipeline.fit(dataFrame)
    val elapsedTime = (System.nanoTime() - startTime) / 1e9
    println(s"Training time: $elapsedTime seconds")

    //val holdout = model.transform(test).select("prediction","label")
    val holdout = model.transform(dataFrame).select("prediction","label")

    // Select (prediction, true label) and compute test error
    val evaluator = new MulticlassClassificationEvaluator()
      .setLabelCol("label")
      .setPredictionCol("prediction")
      .setMetricName("accuracy")
    val mAccuracy = evaluator.evaluate(holdout)
    println("Test set accuracy = " + mAccuracy)
  }
}

开发者ID:PacktPublishing，项目名称:Machine-Learning-with-Spark-Second-Edition，代码行数:60，代码来源:DecisionTreePipeline.scala

示例7: SVMPipeline

//设置package包名称以及导入依赖的类
package org.stumbleuponclassifier

import org.apache.log4j.Logger
import org.apache.spark.SparkContext
import org.apache.spark.mllib.classification.SVMWithSGD
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint


object SVMPipeline {
  @transient lazy val logger = Logger.getLogger(getClass.getName)

  def svmPipeline(sc: SparkContext) = {
    val records = sc.textFile("/home/ubuntu/work/ml-resources/spark-ml/train_noheader.tsv").map(line => line.split("\t"))

    val data = records.map { r =>
      val trimmed = r.map(_.replaceAll("\"", ""))
      val label = trimmed(r.size - 1).toInt
      val features = trimmed.slice(4, r.size - 1).map(d => if (d == "?") 0.0 else d.toDouble)
      LabeledPoint(label, Vectors.dense(features))
    }

    // params for SVM
    val numIterations = 10

    // Run training algorithm to build the model
    val svmModel = SVMWithSGD.train(data, numIterations)

    // Clear the default threshold.
    svmModel.clearThreshold()

    val svmTotalCorrect = data.map { point =>
      if(svmModel.predict(point.features) == point.label) 1 else 0
    }.sum()

    // calculate accuracy
    val svmAccuracy = svmTotalCorrect / data.count()
    println(svmAccuracy)
  }

}

开发者ID:PacktPublishing，项目名称:Machine-Learning-with-Spark-Second-Edition，代码行数:42，代码来源:SVMPipeline.scala

示例8: LogUtils

//设置package包名称以及导入依赖的类
package org.iamShantanu101.spark.SentimentAnalyzer.utils

import org.apache.log4j.{Level, Logger}
import org.apache.spark.{Logging, SparkContext}


object LogUtils extends Logging {

  def setLogLevels(sparkContext: SparkContext) {

    sparkContext.setLogLevel(Level.WARN.toString)
    val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
    if (!log4jInitialized) {
      logInfo(
        """Setting log level to [WARN] for streaming executions.
          |To override add a custom log4j.properties to the classpath.""".stripMargin)
      Logger.getRootLogger.setLevel(Level.WARN)
    }
  }
}

开发者ID:iamShantanu101，项目名称:Twitter-Sentiment-Analyzer-Apache-Spark-Mllib，代码行数:21，代码来源:LogUtils.scala

示例9: CSVUtil

//设置package包名称以及导入依赖的类
package com.springml.spark.netsuite.util

import com.springml.spark.netsuite.model.XPathInput
import org.apache.log4j.Logger


object CSVUtil {
  @transient val logger = Logger.getLogger(this.getClass.getName)

  def readCSV(csvLocation : String) : Map[String, String] = {
    var resultMap : Map[String, String] = Map.empty

    if (csvLocation != null) {
      val bufferedSource = scala.io.Source.fromFile(csvLocation)

      for (line <- bufferedSource.getLines) {
        if (!line.startsWith("#")) {
          val cols = line.split(",").map(_.trim)
          if (cols.length != 2) {
            throw new Exception("Invalid Row : " + line + "\n Please make sure rows are specified as 'Key','Value' in " + csvLocation)
          }

          resultMap += cols(0) -> cols(1)
        } else {
          logger.info("Ignoring commented line " + line)
        }
      }
      bufferedSource.close()
    }

    resultMap
  }

}

开发者ID:springml，项目名称:spark-netsuite，代码行数:35，代码来源:CSVUtil.scala

示例10: Logging

//设置package包名称以及导入依赖的类
package org.hpi.esb.commons.util

import org.apache.log4j.{Level, Logger}

trait Logging {
  var logger: Logger = Logger.getLogger("senskaLogger")
}

object Logging {

  def setToInfo() {
    Logger.getRootLogger.setLevel(Level.INFO)
  }

  def setToDebug() {
    Logger.getRootLogger.setLevel(Level.DEBUG)
  }
}

开发者ID:BenReissaus，项目名称:EnterpriseStreamingBenchmark，代码行数:19，代码来源:Logging.scala

示例11: PrepArgParser

//设置package包名称以及导入依赖的类
import org.apache.log4j.{Level, Logger}
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.{SparkConf, SparkContext}
import utils.Utils

import scala.util.Try
import scalax.file.Path



class PrepArgParser(arguments: Seq[String]) extends org.rogach.scallop.ScallopConf(arguments) {
  val dataset = opt[String](required = true, short = 'd',
    descr = "absolute address of the libsvm dataset. This must be provided.")
  val partitions = opt[Int](required = false, default = Some(4), short = 'p', validate = (0 <),
    descr = "Number of spark partitions to be used. Optional.")
  val dir = opt[String](required = true, default = Some("../results/"), short = 'w', descr = "working directory where results " +
    "are stored. Default is \"../results\". ")
  val method = opt[String](required = true, short = 'm',
    descr = "Method can be either \"Regression\" or \"Classification\". This must be provided")
  verify()
}

object PrepareData {
  def main(args: Array[String]) {
    //Spark conf
    val conf = new SparkConf().setAppName("Distributed Machine Learning").setMaster("local[*]")
    val sc = new SparkContext(conf)
    val sqlContext = new org.apache.spark.sql.SQLContext(sc)
    //Turn off logs
    val rootLogger = Logger.getRootLogger()
    rootLogger.setLevel(Level.ERROR)
    //Parse arguments
    val parser = new PrepArgParser(args)
    val dataset = parser.dataset()
    var workingDir = parser.dir()
    val numPartitions = parser.partitions()
    val method = parser.method()

    //Load data
    val (train, test) = method match {
      case "Classification" => Utils.loadAbsolutLibSVMBinaryClassification(dataset, numPartitions, sc)
      case "Regression" => Utils.loadAbsolutLibSVMRegression(dataset, numPartitions, sc)
      case _ => throw new IllegalArgumentException("The method " + method + " is not supported.")
    }

    // append "/" to workingDir if necessary
    workingDir = workingDir + ( if (workingDir.takeRight(1) != "/") "/" else "" )
    val trainPath: Path = Path.fromString(workingDir + "train")
    Try(trainPath.deleteRecursively(continueOnFailure = false))
    val testPath: Path = Path.fromString(workingDir + "test")
    Try(testPath.deleteRecursively(continueOnFailure = false))
    MLUtils.saveAsLibSVMFile(train, workingDir + "train")
    MLUtils.saveAsLibSVMFile(test, workingDir + "test")
  }
}

开发者ID:mlbench，项目名称:mlbench，代码行数:56，代码来源:PrepareData.scala

示例12: SparkPi

//设置package包名称以及导入依赖的类
package com.zmyuan.spark.submit

import kafka.utils.Logging
import org.apache.log4j.Logger

import scala.math.random

import org.apache.spark.sql.SparkSession


object SparkPi {
  val loggerName = this.getClass.getName
  lazy val logger = Logger.getLogger(loggerName)

  def main(args: Array[String]) {
    val spark = SparkSession
      .builder
      .appName("Spark Pi")
      .getOrCreate()
    val slices = if (args.length > 0) args(0).toInt else 2
    val n = math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow
    val count = spark.sparkContext.parallelize(1 until n, slices).map { i =>
        val x = random * 2 - 1
        val y = random * 2 - 1
        if (x*x + y*y < 1) 1 else 0
      }.reduce(_ + _)
    logger.info("Pi is roughly " + 4.0 * count / (n - 1))
    spark.stop()
  }
}
// scalastyle:on println

开发者ID:zhudebin，项目名称:spark-submit，代码行数:32，代码来源:SparkPi.scala

示例13: Helper

//设置package包名称以及导入依赖的类
import org.apache.log4j.Logger
import org.apache.log4j.Level
import scala.collection.mutable.HashMap

object Helper {
  Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
  Logger
    .getLogger("org.apache.spark.storage.BlockManager")
    .setLevel(Level.ERROR)

  
  def configureTwitterCredentials(apiKey: String,
                                  apiSecret: String,
                                  accessToken: String,
                                  accessTokenSecret: String) {
    val configs = Map(
      "apiKey" -> apiKey,
      "apiSecret" -> apiSecret,
      "accessToken" -> accessToken,
      "accessTokenSecret" -> accessTokenSecret
    )

    println("Configuring Twitter OAuth")

    configs.foreach {
      case (key, value) =>
        if (value.trim.isEmpty) {
          throw new Exception(
            "Error setting authentication - value for " + key + " not set -> value is " + value)
        }
        val fullKey = "twitter4j.oauth." + key.replace("api", "consumer")
        System.setProperty(fullKey, value.trim)
        println("\tProperty " + fullKey + " set as [" + value.trim + "]")
    }

    println()
  }
}

开发者ID:joined，项目名称:ET4310-SupercomputingForBigData，代码行数:39，代码来源:Helper.scala

示例14: ModelBuilder

//设置package包名称以及导入依赖的类
package org.wikimedia.research.recommendation.job.translation

import java.io.File

import org.apache.log4j.{LogManager, Logger}
import org.apache.spark.sql.{DataFrame, SparkSession}

import scala.collection.parallel.mutable.ParArray

object ModelBuilder {
  val log: Logger = LogManager.getLogger(ModelBuilder.getClass)

  def buildModels(spark: SparkSession,
                  modelsOutputDir: Option[File],
                  sites: ParArray[String],
                  featureData: DataFrame): Unit = {
    log.info("Building Models")
    sites.foreach(target =>
      try {
        log.info("Building model for " + target)
        log.info("Getting work data for " + target)
        val workData: DataFrame = Utils.getWorkData(spark, featureData, target)
        val Array(trainingData, testData) = workData.randomSplit(Array(0.7, 0.3))

        log.info("Training model for " + target)
        val model = Utils.REGRESSOR.fit(trainingData)

        log.info("Writing model to file for " + target)
        modelsOutputDir.foreach(o => model.write.save(new File(o, target).getAbsolutePath))

        log.info("Testing model for " + target)
        val predictions = model.transform(testData)
        val rmse = Utils.EVALUATOR.evaluate(predictions)
        log.info("Root Mean Squared Error (RMSE) on test data for " + target + " = " + rmse)
      } catch {
        case unknown: Throwable => log.error("Build model for " + target + " failed", unknown)
      }
    )
  }
}

开发者ID:schana，项目名称:recommendation-translation，代码行数:41，代码来源:ModelBuilder.scala

示例15: ScorePredictor

//设置package包名称以及导入依赖的类
package org.wikimedia.research.recommendation.job.translation

import java.io.File

import org.apache.log4j.{LogManager, Logger}
import org.apache.spark.ml.regression.RandomForestRegressionModel
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession}

import scala.collection.parallel.mutable.ParArray

object ScorePredictor {
  val log: Logger = LogManager.getLogger(ScorePredictor.getClass)

  def predictScores(spark: SparkSession,
                    modelsInputDir: File,
                    predictionsOutputDir: Option[File],
                    sites: ParArray[String],
                    featureData: DataFrame): Unit = {
    log.info("Scoring items")

    val predictions: Array[DataFrame] = sites.map(target => {
      try {
        log.info("Scoring for " + target)
        log.info("Getting work data for " + target)
        val workData: DataFrame = Utils.getWorkData(spark, featureData, target, exists = false)
        log.info("Loading model for " + target)
        val model = RandomForestRegressionModel.load(
          new File(modelsInputDir, target).getAbsolutePath)
        log.info("Scoring data for " + target)
        val predictions = model
          .setPredictionCol(target)
          .transform(workData)
          .select("id", target)

        predictions
      } catch {
        case unknown: Throwable =>
          log.error("Score for " + target + " failed", unknown)
          val schema = StructType(Seq(
            StructField("id", StringType, nullable = false),
            StructField(target, DoubleType, nullable = true)))
          spark.createDataFrame(spark.sparkContext.emptyRDD[Row], schema)
      }
    }).toArray

    val predictedScores = predictions.reduce((left, right) => left.join(right, Seq("id"), "outer"))

    log.info("Saving predictions")
    predictionsOutputDir.foreach(f = o =>
      predictedScores.coalesce(1)
        .write
        .mode(SaveMode.ErrorIfExists)
        .option("header", value = true)
        .option("compression", "bzip2")
        .csv(new File(o, "allPredictions").getAbsolutePath))
  }
}

开发者ID:schana，项目名称:recommendation-translation，代码行数:59，代码来源:ScorePredictor.scala

示例16: JobRunner

//设置package包名称以及导入依赖的类
package org.wikimedia.research.recommendation.job.translation

import java.io.File
import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.log4j.{Level, LogManager, Logger}
import org.apache.spark.sql._

import scala.collection.parallel.ForkJoinTaskSupport
import scala.concurrent.forkjoin.ForkJoinPool

object JobRunner {
  val log: Logger = LogManager.getLogger(JobRunner.getClass)
  val SITE_PARALLELISM = 8

  def main(args: Array[String]): Unit = {
    log.info("Starting")
    val params = ArgParser.parseArgs(args)

    var sparkBuilder = SparkSession
      .builder()
      .appName("TranslationRecommendations")
    if (params.runLocally) {
      sparkBuilder = sparkBuilder.master("local[*]")
    }
    val spark = sparkBuilder.getOrCreate()
    Logger.getLogger("org").setLevel(Level.WARN)
    Logger.getLogger("org.wikimedia").setLevel(Level.INFO)

    val timestamp = new SimpleDateFormat("yyyy-MM-dd-HHmmss").format(Calendar.getInstance.getTime)
    log.info("Timestamp for creating files: " + timestamp)

    
    if (params.scoreItems) {
      val modelsInputDir = params.modelsDir.getOrElse(modelsOutputDir.get)
      val predictionsOutputDir = params.outputDir.map(o => new File(o, timestamp + "_predictions"))
      predictionsOutputDir.foreach(o => o.mkdir())
      ScorePredictor.predictScores(spark, modelsInputDir, predictionsOutputDir, modelSites, featureData)
    }

    spark.stop()
    log.info("Finished")
  }
}

开发者ID:schana，项目名称:recommendation-translation，代码行数:46，代码来源:JobRunner.scala

示例17: BorderFlowClustering

//设置package包名称以及导入依赖的类
package net.sansa_stack.examples.spark.ml.clustering

import scala.collection.mutable
import org.apache.spark.sql.SparkSession
import org.apache.log4j.{ Level, Logger }
import net.sansa_stack.ml.spark.clustering.BorderFlow

object BorderFlowClustering {
  def main(args: Array[String]) = {
    if (args.length < 1) {
      System.err.println(
        "Usage: BorderFlow <input> ")
      System.exit(1)
    }
    val input = args(0) //"src/main/resources/BorderFlow_Sample1.txt"
    val optionsList = args.drop(1).map { arg =>
      arg.dropWhile(_ == '-').split('=') match {
        case Array(opt, v) => (opt -> v)
        case _             => throw new IllegalArgumentException("Invalid argument: " + arg)
      }
    }
    val options = mutable.Map(optionsList: _*)

    options.foreach {
      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
    }
    println("============================================")
    println("| Border Flow example                      |")
    println("============================================")

    val spark = SparkSession.builder
      .master("local[*]")
      .appName(" BorderFlow example (" + input + ")")
      .getOrCreate()
    Logger.getRootLogger.setLevel(Level.ERROR)

    BorderFlow(spark, input)

    spark.stop
  }
}

开发者ID:SANSA-Stack，项目名称:SANSA-Examples，代码行数:42，代码来源:BorderFlowClustering.scala

示例18: RDFGraphPIClustering

//设置package包名称以及导入依赖的类
package net.sansa_stack.examples.spark.ml.clustering

import scala.collection.mutable
import org.apache.spark.sql.SparkSession
import org.apache.log4j.{ Level, Logger }
import org.apache.spark.graphx.GraphLoader
import net.sansa_stack.ml.spark.clustering.{ RDFGraphPICClustering => RDFGraphPICClusteringAlg }

object RDFGraphPIClustering {
  def main(args: Array[String]) = {
    if (args.length < 3) {
      System.err.println(
        "Usage: RDFGraphPIClustering <input> <k> <numIterations>")
      System.exit(1)
    }
    val input = args(0) //"src/main/resources/BorderFlow_Sample1.txt"
    val k = args(1).toInt
    val numIterations = args(2).toInt
    val optionsList = args.drop(3).map { arg =>
      arg.dropWhile(_ == '-').split('=') match {
        case Array(opt, v) => (opt -> v)
        case _             => throw new IllegalArgumentException("Invalid argument: " + arg)
      }
    }
    val options = mutable.Map(optionsList: _*)

    options.foreach {
      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
    }
    println("============================================")
    println("| Power Iteration Clustering   example     |")
    println("============================================")

    val sparkSession = SparkSession.builder
      .master("local[*]")
      .appName(" Power Iteration Clustering example (" + input + ")")
      .getOrCreate()
    Logger.getRootLogger.setLevel(Level.ERROR)

    // Load the graph 
    val graph = GraphLoader.edgeListFile(sparkSession.sparkContext, input)

    val model = RDFGraphPICClusteringAlg(sparkSession, graph, k, numIterations).run()

    val clusters = model.assignments.collect().groupBy(_.cluster).mapValues(_.map(_.id))
    val assignments = clusters.toList.sortBy { case (k, v) => v.length }
    val assignmentsStr = assignments
      .map {
        case (k, v) =>
          s"$k -> ${v.sorted.mkString("[", ",", "]")}"
      }.mkString(",")
    val sizesStr = assignments.map {
      _._2.size
    }.sorted.mkString("(", ",", ")")
    println(s"Cluster assignments: $assignmentsStr\ncluster sizes: $sizesStr")

    sparkSession.stop
  }

}

开发者ID:SANSA-Stack，项目名称:SANSA-Examples，代码行数:61，代码来源:RDFGraphPIClustering.scala

示例19: RDFByModularityClustering

//设置package包名称以及导入依赖的类
package net.sansa_stack.examples.spark.ml.clustering

import scala.collection.mutable
import org.apache.spark.sql.SparkSession
import org.apache.log4j.{ Level, Logger }
import net.sansa_stack.ml.spark.clustering.{ RDFByModularityClustering => RDFByModularityClusteringAlg }

object RDFByModularityClustering {

  def main(args: Array[String]) = {
    if (args.length < 3) {
      System.err.println(
        "Usage: RDFByModularityClustering <input> <output> <numIterations>")
      System.exit(1)
    }
    val graphFile = args(0) //"src/main/resources/Clustering_sampledata.nt",
    val outputFile = args(1)
    val numIterations = args(2).toInt
    val optionsList = args.drop(3).map { arg =>
      arg.dropWhile(_ == '-').split('=') match {
        case Array(opt, v) => (opt -> v)
        case _             => throw new IllegalArgumentException("Invalid argument: " + arg)
      }
    }
    val options = mutable.Map(optionsList: _*)

    options.foreach {
      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
    }
    println("============================================")
    println("| RDF By Modularity Clustering example     |")
    println("============================================")

    val sparkSession = SparkSession.builder
      .master("local[*]")
      .appName(" RDF By Modularity Clustering example (" + graphFile + ")")
      .getOrCreate()
    Logger.getRootLogger.setLevel(Level.ERROR)

    RDFByModularityClusteringAlg(sparkSession.sparkContext, numIterations, graphFile, outputFile)

    sparkSession.stop

  }

}

开发者ID:SANSA-Stack，项目名称:SANSA-Examples，代码行数:47，代码来源:RDFByModularityClustering.scala

示例20: MyLog1

//设置package包名称以及导入依赖的类
package com.chapter14.Serilazition

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.log4j.LogManager
import org.apache.log4j.Level
import org.apache.log4j.Logger

object MyLog1 extends Serializable {
 def main(args: Array[String]):Unit= {
   // Stting logger level as WARN
   val log = LogManager.getRootLogger
   log.setLevel(Level.WARN)
   @transient lazy val log2 = org.apache.log4j.LogManager.getLogger("myLogger")

   // Creating Spark Context
   val conf = new SparkConf().setAppName("My App").setMaster("local[*]")
   val sc = new SparkContext(conf)

   //Started the computation and printing the logging inforamtion
   //log.warn("Started")
   //val i = 0
   val data = sc.parallelize(0 to 100000)
   data.foreach(i => log.info("My number"+ i))
   data.collect()
   log.warn("Finished")
 }
}

开发者ID:PacktPublishing，项目名称:Scala-and-Spark-for-Big-Data-Analytics，代码行数:28，代码来源:MyLog.scala

注：本文中的org.apache.log4j.Logger类示例整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。