Scala BinaryClassificationMetrics类代码示例

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› Scala›Scala教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了Scala中org.apache.spark.mllib.evaluation.BinaryClassificationMetrics类的典型用法代码示例。如果您正苦于以下问题：Scala BinaryClassificationMetrics类的具体用法？Scala BinaryClassificationMetrics怎么用？Scala BinaryClassificationMetrics使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了BinaryClassificationMetrics类的5个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: DecisionTreeTest

//设置package包名称以及导入依赖的类
package cn.edu.bjtu


import org.apache.spark.SparkConf
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.DecisionTree
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.SparkSession

object DecisionTreeTest {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf()
      .setAppName("DecisionTreeTest")
      .setMaster("spark://master:7077")
      .setJars(Array("/home/hadoop/DecisionTree.jar"))

    val spark = SparkSession.builder()
      .config(sparkConf)
      .getOrCreate()

    spark.sparkContext.setLogLevel("WARN")

    // Load and parse the data file.
    val data = MLUtils.loadLibSVMFile(spark.sparkContext, "hdfs://master:9000/sample_formatted.txt")

    // Split the data into training and test sets (30% held out for testing)
    val splits = data.randomSplit(Array(0.7, 0.3))

    val (training, test) = (splits(0), splits(1))

    // Train a DecisionTree model.
    //  Empty categoricalFeaturesInfo indicates all features are continuous.
    val numClasses = 2
    val categoricalFeaturesInfo = Map[Int, Int]()
    val impurity = "entropy" // Also, we can use entrophy
    val maxDepth = 14
    val maxBins = 16384

    val model = DecisionTree.trainClassifier(training, numClasses, categoricalFeaturesInfo,
      impurity, maxDepth, maxBins)

    val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
      val prediction = model.predict(features)
      (prediction, label)
    }
    val metrics = new BinaryClassificationMetrics(predictionAndLabels)
    val auROC = metrics.areaUnderROC()
    println("Area under ROC = " + auROC)
    println("Sensitivity = " + predictionAndLabels.filter(x => x._1 == x._2 && x._1 == 1.0).count().toDouble / predictionAndLabels.filter(x => x._2 == 1.0).count().toDouble)
    println("Specificity = " + predictionAndLabels.filter(x => x._1 == x._2 && x._1 == 0.0).count().toDouble / predictionAndLabels.filter(x => x._2 == 0.0).count().toDouble)
    println("Accuracy = " + predictionAndLabels.filter(x => x._1 == x._2).count().toDouble / predictionAndLabels.count().toDouble)
  }
}

开发者ID:XiaoyuGuo，项目名称:DataFusionClass，代码行数:55，代码来源:DecisionTreeTest.scala

示例2: SVMTest

//设置package包名称以及导入依赖的类
package cn.edu.bjtu


import org.apache.spark.SparkConf
import org.apache.spark.mllib.classification.SVMWithSGD
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.SparkSession

object SVMTest {
  def main(args: Array[String]): Unit = {

    val sparkConf = new SparkConf()
      .setAppName("SVMTest")
      .setMaster("spark://master:7077")
      .setJars(Array("/home/hadoop/SVM.jar"))

    val spark = SparkSession.builder()
      .config(sparkConf)
      .getOrCreate()

    spark.sparkContext.setLogLevel("WARN")

    val data = MLUtils.loadLibSVMFile(spark.sparkContext, "hdfs://master:9000/sample_formatted.txt")
    // Split data into training (80%) and test (20%).
    val splits = data.randomSplit(Array(0.7, 0.3), seed = 11L)
    val training = splits(0).cache()
    val test = splits(1)

    // Run training algorithm to build the model
    val numIterations = 100
    val model = SVMWithSGD.train(training, numIterations)

    // Clear the default threshold.
    model.setThreshold(-5000)

    // Compute raw scores on the test set.
    val scoreAndLabels = test.map { point =>
      val score = model.predict(point.features)
      (score, point.label)
    }

    // Get evaluation metrics.
    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
    val auROC = metrics.areaUnderROC()
    println("Area under ROC = " + auROC)
    println("Sensitivity = " + scoreAndLabels.filter(x => x._1 == x._2 && x._1 == 1.0).count().toDouble / scoreAndLabels.filter(x => x._2 == 1.0).count().toDouble)
    println("Specificity = " + scoreAndLabels.filter(x => x._1 == x._2 && x._1 == 0.0).count().toDouble / scoreAndLabels.filter(x => x._2 == 0.0).count().toDouble)
    println("Accuracy = " + scoreAndLabels.filter(x => x._1 == x._2).count().toDouble / scoreAndLabels.count().toDouble)
  }
}

开发者ID:XiaoyuGuo，项目名称:DataFusionClass，代码行数:52，代码来源:SVMTest.scala

示例3: LogisticRegressionTest

//设置package包名称以及导入依赖的类
package cn.edu.bjtu


import org.apache.spark.SparkConf
import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.SparkSession

object LogisticRegressionTest {
  def main(args: Array[String]): Unit = {

    val sparkConf = new SparkConf()
      .setAppName("LogisticRegressionTest")
      .setMaster("spark://master:7077")
      .setJars(Array("/home/hadoop/LogisticRegression.jar"))

    val spark = SparkSession.builder()
      .config(sparkConf)
      .getOrCreate()

    spark.sparkContext.setLogLevel("WARN")

    val data = MLUtils.loadLibSVMFile(spark.sparkContext, "hdfs://master:9000/sample_formatted.txt")

    val splits = data.randomSplit(Array(0.7, 0.3), seed = 11L)
    val training = splits(0).cache()
    val test = splits(1)

    // Run training algorithm to build the model
    val model = new LogisticRegressionWithLBFGS()
      .setNumClasses(2)
      .run(training)

    // Compute raw scores on the test set.
    val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
      val prediction = model.predict(features)
      (prediction, label)
    }

    // Get evaluation metrics.
    val metrics = new BinaryClassificationMetrics(predictionAndLabels)
    val auROC = metrics.areaUnderROC()
    println("Area under ROC = " + auROC)
    println("Sensitivity = " + predictionAndLabels.filter(x => x._1 == x._2 && x._1 == 1.0).count().toDouble / predictionAndLabels.filter(x => x._2 == 1.0).count().toDouble)
    println("Specificity = " + predictionAndLabels.filter(x => x._1 == x._2 && x._1 == 0.0).count().toDouble / predictionAndLabels.filter(x => x._2 == 0.0).count().toDouble)
    println("Accuracy = " + predictionAndLabels.filter(x => x._1 == x._2).count().toDouble / predictionAndLabels.count().toDouble)
  }
}

开发者ID:XiaoyuGuo，项目名称:DataFusionClass，代码行数:51，代码来源:LogisticRegressionTest.scala

示例4: RandomForestTest

//设置package包名称以及导入依赖的类
package cn.edu.bjtu


import org.apache.spark.SparkConf
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.RandomForest
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.SparkSession
object RandomForestTest {
  def main(args: Array[String]): Unit = {

    val sparkConf = new SparkConf()
      .setAppName("RandomForestTest")
      .setMaster("spark://master:7077")
      .setJars(Array("/home/hadoop/RandomForest.jar"))

    val spark = SparkSession.builder()
      .config(sparkConf)
      .getOrCreate()

    spark.sparkContext.setLogLevel("WARN")

    // Load and parse the data file.
    val data = MLUtils.loadLibSVMFile(spark.sparkContext, "hdfs://master:9000/sample_formatted.txt")

    // Split the data into training and test sets (30% held out for testing)
    val splits = data.randomSplit(Array(0.7, 0.3))

    val (training, test) = (splits(0), splits(1))

    // Train a RandomForest model.
    // Empty categoricalFeaturesInfo indicates all features are continuous.
    val numClasses = 2
    val categoricalFeaturesInfo = Map[Int, Int]()
    val numTrees = 3 // Use more in practice.
    val featureSubsetStrategy = "18" // Let the algorithm choose.
    val impurity = "gini"
    val maxDepth = 14
    val maxBins = 16384

    val model = RandomForest.trainClassifier(training, numClasses, categoricalFeaturesInfo,
      numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins)

    val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
      val prediction = model.predict(features)
      (prediction, label)
    }
    val metrics = new BinaryClassificationMetrics(predictionAndLabels)
    val auROC = metrics.areaUnderROC()
    println("Area under ROC = " + auROC)
    println("Sensitivity = " + predictionAndLabels.filter(x => x._1 == x._2 && x._1 == 1.0).count().toDouble / predictionAndLabels.filter(x => x._2 == 1.0).count().toDouble)
    println("Specificity = " + predictionAndLabels.filter(x => x._1 == x._2 && x._1 == 0.0).count().toDouble / predictionAndLabels.filter(x => x._2 == 0.0).count().toDouble)
    println("Accuracy = " + predictionAndLabels.filter(x => x._1 == x._2).count().toDouble / predictionAndLabels.count().toDouble)
  }
}

开发者ID:XiaoyuGuo，项目名称:DataFusionClass，代码行数:57，代码来源:RandomForestTest.scala

示例5: BinomialValidation

//设置package包名称以及导入依赖的类
package com.bistel.wordcount.logisticRegression

import org.apache.spark.SparkContext
import org.apache.spark.mllib.classification.LogisticRegressionModel
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.regression.LabeledPoint




class BinomialValidation(sc: SparkContext,
                         model: LogisticRegressionModel,
                         numTasks: Int) {

  def metrics(validationSet: Array[LabeledPoint]): Quality = {
    val featuresLabels = validationSet.map(lbPt =>
      (lbPt.label, lbPt.features)).unzip
    val predicted_rdd = model.predict(
      sc.makeRDD(featuresLabels._2, numTasks)
    )

    val scoreAndLabels = sc.makeRDD(featuresLabels._1,
      numTasks).zip(predicted_rdd)

    val successes = scoreAndLabels.map {
      case (e, p) => Math.abs(e - p)
    }.filter(_ < 0.1)

    // Mean sum of square errors
    val msse = scoreAndLabels.map {
      case (e, p) => (e - p) * (e - p)
    }.sum

    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
    Quality(metrics.fMeasureByThreshold().collect,
      msse,
      successes.count.toDouble / validationSet.length)
  }
}

开发者ID:jacob119，项目名称:WordCount，代码行数:40，代码来源:BinomialValidation.scala

注：本文中的org.apache.spark.mllib.evaluation.BinaryClassificationMetrics类示例整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。