本文整理汇总了Scala中org.apache.spark.SparkException类的典型用法代码示例。如果您正苦于以下问题:Scala SparkException类的具体用法?Scala SparkException怎么用?Scala SparkException使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SparkException类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。
示例1: LocalIndexToString
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.SparkException
import org.apache.spark.ml.feature.IndexToString
class LocalIndexToString(override val sparkTransformer: IndexToString) extends LocalTransformer[IndexToString] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val labels = sparkTransformer.getLabels
val indexer = (index: Double) => {
val idx = index.toInt
if (0 <= idx && idx < labels.length) {
labels(idx)
} else {
throw new SparkException(s"Unseen index: $index ??")
}
}
val newColumn = LocalDataColumn(sparkTransformer.getOutputCol, column.data map {
case d: Double => indexer(d)
case d => throw new IllegalArgumentException(s"Unknown data to index: $d")
})
localData.withColumn(newColumn)
case None => localData
}
}
}
object LocalIndexToString extends LocalModel[IndexToString] {
override def load(metadata: Metadata, data: Map[String, Any]): IndexToString = {
val ctor = classOf[IndexToString].getDeclaredConstructor(classOf[String])
ctor.setAccessible(true)
ctor
.newInstance(metadata.uid)
.setLabels(metadata.paramMap("labels").asInstanceOf[List[String]].to[Array])
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
}
override implicit def getTransformer(transformer: IndexToString): LocalTransformer[IndexToString] = new LocalIndexToString(transformer)
}
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:43,代码来源:LocalIndexToString.scala
示例2: App3PublishFailAndRecover
//设置package包名称以及导入依赖的类
package com.buransky.understandingSparkStreamingState
import net.manub.embeddedkafka.EmbeddedKafka._
import BaseApp._
import org.apache.spark.SparkException
object App3PublishFailAndRecover extends BaseApp {
override def main(args: Array[String]): Unit = {
withRunningKafka {
for (i <- 0 to args.length) {
publishStringMessageToKafka(kafkaTopic, "a")
publishStringMessageToKafka(kafkaTopic, "b")
publishStringMessageToKafka(kafkaTopic, "c")
publishStringMessageToKafka(kafkaTopic, "d")
}
// First step is to simulate a failure ...
BaseApp.failOn = "c"
BaseApp.murder = false
try {
withSsc() { inputStream =>
inputStream.mapWithState(stringStateSpec)
}
}
catch {
case ex: SparkException if ex.getCause.getMessage == "Fail!" => // This is expected
case other: Throwable =>
log.error("WTF", other)
throw other
}
// ... second step is to try to recover from it.
BaseApp.failOn = ""
withSsc() { inputStream =>
inputStream.mapWithState(stringStateSpec)
}
}
}
}
开发者ID:RadoBuransky,项目名称:understanding-spark-streaming-state,代码行数:41,代码来源:App3PublishFailAndRecover.scala
示例3: LocalStringIndexerModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.spark_ml_serving.preprocessors
import io.hydrosphere.spark_ml_serving._
import org.apache.spark.SparkException
import org.apache.spark.ml.feature.StringIndexerModel
import scala.collection.mutable
class LocalStringIndexerModel(override val sparkTransformer: StringIndexerModel) extends LocalTransformer[StringIndexerModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val labelToIndex = {
val n = sparkTransformer.labels.length
val map = new mutable.HashMap[String, Double]
var i = 0
while (i < n) {
map.update(sparkTransformer.labels(i), i)
i += 1
}
map
}
val indexer = (label: String) => {
if (labelToIndex.contains(label)) {
labelToIndex(label)
} else {
throw new SparkException(s"Unseen label: $label.")
}
}
val newColumn = LocalDataColumn(sparkTransformer.getOutputCol, column.data map { feature =>
val str = feature.asInstanceOf[String]
indexer(str)
})
localData.withColumn(newColumn)
case None => localData
}
}
}
object LocalStringIndexerModel extends LocalModel[StringIndexerModel] {
override def load(metadata: Metadata, data: Map[String, Any]): StringIndexerModel = {
new StringIndexerModel(metadata.uid, data("labels").asInstanceOf[List[String]].to[Array])
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
.setHandleInvalid(metadata.paramMap("handleInvalid").asInstanceOf[String])
}
override implicit def getTransformer(transformer: StringIndexerModel): LocalTransformer[StringIndexerModel] = new LocalStringIndexerModel(transformer)
}
开发者ID:Hydrospheredata,项目名称:spark-ml-serving,代码行数:50,代码来源:LocalStringIndexerModel.scala
示例4: ContentToHash
//设置package包名称以及导入依赖的类
package com.xuzq.hotNews
import org.apache.spark.SparkException
import org.apache.spark.unsafe.hash.Murmur3_x86_32._
import org.apache.spark.unsafe.types.UTF8String
class ContentToHash {
val seed = 42
def getHashCode(word:String, mod: Int): Int ={
return nonNegativeMod(murmur3Hash(word), mod)
}
def nonNegativeMod(x: Int, mod: Int): Int = {
val rawMod = x % mod
rawMod + (if (rawMod < 0) mod else 0)
}
def murmur3Hash(term: Any): Int = {
term match {
case null => seed
case b: Boolean => hashInt(if (b) 1 else 0, seed)
case b: Byte => hashInt(b, seed)
case s: Short => hashInt(s, seed)
case i: Int => hashInt(i, seed)
case l: Long => hashLong(l, seed)
case f: Float => hashInt(java.lang.Float.floatToIntBits(f), seed)
case d: Double => hashLong(java.lang.Double.doubleToLongBits(d), seed)
case s: String =>
val utf8 = UTF8String.fromString(s)
hashUnsafeBytes(utf8.getBaseObject, utf8.getBaseOffset, utf8.numBytes(), seed)
case _ => throw new SparkException("HashingTF with murmur3 algorithm does not " +
s"support type ${term.getClass.getCanonicalName} of input data.")
}
}
}
开发者ID:ZanderXu,项目名称:HotNews,代码行数:39,代码来源:ContentToHash.scala
示例5: SimhashIndexing
//设置package包名称以及导入依赖的类
package io.gzet.story
import java.net.URL
import com.datastax.spark.connector._
import io.gzet.story.model.Article
import io.gzet.story.util.SimhashUtils._
import io.gzet.story.util.{HtmlFetcher, Tokenizer}
import io.gzet.utils.spark.gdelt.GKGParser
import org.apache.lucene.analysis.en.EnglishAnalyzer
import org.apache.spark.{Logging, SparkConf, SparkContext, SparkException}
import scala.util.Try
object SimhashIndexing extends SimpleConfig with Logging {
def main(args: Array[String]) = {
val sc = new SparkContext(new SparkConf().setAppName("GDELT Indexing"))
if (args.isEmpty)
throw new SparkException("usage: <gdeltInputDir>")
val gdeltInputDir = args.head
val gkgRDD = sc.textFile(gdeltInputDir)
.map(GKGParser.toJsonGKGV2)
.map(GKGParser.toCaseClass2)
val urlRDD = gkgRDD.map(g => g.documentId.getOrElse("NA"))
.filter(url => Try(new URL(url)).isSuccess)
.distinct()
.repartition(partitions)
val contentRDD = urlRDD.mapPartitions({ it =>
val html = new HtmlFetcher(gooseConnectionTimeout, gooseSocketTimeout)
it map html.fetch
})
val corpusRDD = contentRDD.mapPartitions({ it =>
val analyzer = new EnglishAnalyzer()
it.map(content => (content, Tokenizer.lucene(content.body, analyzer)))
}).filter({ case (content, corpus) =>
corpus.length > minWords
})
//CREATE TABLE gzet.articles ( hash int PRIMARY KEY, url text, title text, body text );
corpusRDD.mapValues(_.mkString(" ").simhash).map({ case (content, simhash) =>
Article(simhash, content.body, content.title, content.url)
}).saveToCassandra(cassandraKeyspace, cassandraTable)
}
}
开发者ID:PacktPublishing,项目名称:Mastering-Spark-for-Data-Science,代码行数:54,代码来源:SimhashIndexing.scala
示例6: LocalIndexToString
//设置package包名称以及导入依赖的类
package io.hydrosphere.mist.api.ml.preprocessors
import io.hydrosphere.mist.api.ml._
import org.apache.spark.SparkException
import org.apache.spark.ml.feature.IndexToString
class LocalIndexToString(override val sparkTransformer: IndexToString) extends LocalTransformer[IndexToString] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val labels = sparkTransformer.getLabels
val indexer = (index: Double) => {
val idx = index.toInt
if (0 <= idx && idx < labels.length) {
labels(idx)
} else {
throw new SparkException(s"Unseen index: $index ??")
}
}
val newColumn = LocalDataColumn(sparkTransformer.getOutputCol, column.data map {
case d: Double => indexer(d)
case d => throw new IllegalArgumentException(s"Unknown data to index: $d")
})
localData.withColumn(newColumn)
case None => localData
}
}
}
object LocalIndexToString extends LocalModel[IndexToString] {
override def load(metadata: Metadata, data: Map[String, Any]): IndexToString = {
val ctor = classOf[IndexToString].getDeclaredConstructor(classOf[String])
ctor.setAccessible(true)
ctor
.newInstance(metadata.uid)
.setLabels(metadata.paramMap("labels").asInstanceOf[List[String]].to[Array])
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
}
override implicit def getTransformer(transformer: IndexToString): LocalTransformer[IndexToString] = new LocalIndexToString(transformer)
}
开发者ID:Hydrospheredata,项目名称:mist,代码行数:43,代码来源:LocalIndexToString.scala
示例7: LocalStringIndexerModel
//设置package包名称以及导入依赖的类
package io.hydrosphere.mist.api.ml.preprocessors
import io.hydrosphere.mist.api.ml._
import org.apache.spark.SparkException
import org.apache.spark.ml.feature.StringIndexerModel
import scala.collection.mutable
class LocalStringIndexerModel(override val sparkTransformer: StringIndexerModel) extends LocalTransformer[StringIndexerModel] {
override def transform(localData: LocalData): LocalData = {
localData.column(sparkTransformer.getInputCol) match {
case Some(column) =>
val labelToIndex = {
val n = sparkTransformer.labels.length
val map = new mutable.HashMap[String, Double]
var i = 0
while (i < n) {
map.update(sparkTransformer.labels(i), i)
i += 1
}
map
}
val indexer = (label: String) => {
if (labelToIndex.contains(label)) {
labelToIndex(label)
} else {
throw new SparkException(s"Unseen label: $label.")
}
}
val newColumn = LocalDataColumn(sparkTransformer.getOutputCol, column.data map { feature =>
val str = feature.asInstanceOf[String]
indexer(str)
})
localData.withColumn(newColumn)
case None => localData
}
}
}
object LocalStringIndexerModel extends LocalModel[StringIndexerModel] {
override def load(metadata: Metadata, data: Map[String, Any]): StringIndexerModel = {
new StringIndexerModel(metadata.uid, data("labels").asInstanceOf[List[String]].to[Array])
.setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
.setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
.setHandleInvalid(metadata.paramMap("handleInvalid").asInstanceOf[String])
}
override implicit def getTransformer(transformer: StringIndexerModel): LocalTransformer[StringIndexerModel] = new LocalStringIndexerModel(transformer)
}
开发者ID:Hydrospheredata,项目名称:mist,代码行数:50,代码来源:LocalStringIndexerModel.scala
示例8: StructTypeToMleap
//设置package包名称以及导入依赖的类
package org.apache.spark.ml.mleap.converter
import com.truecar.mleap.runtime.types
import org.apache.spark.SparkException
import org.apache.spark.mllib.linalg.VectorUDT
import org.apache.spark.sql.types._
case class StructTypeToMleap(schema: StructType) {
def toMleap: types.StructType = {
val leapFields = schema.fields.map {
field =>
val sparkType = field.dataType
val sparkTypeName = sparkType.typeName
val dataType = sparkType match {
case _: NumericType | BooleanType => types.DoubleType
case _: StringType => types.StringType
case _: VectorUDT => types.VectorType
case dataType: ArrayType if dataType.elementType == StringType => types.StringArrayType
case _ => throw new SparkException(s"unsupported MLeap datatype: $sparkTypeName")
}
types.StructField(field.name, dataType)
}
types.StructType(leapFields)
}
}
开发者ID:TrueCar,项目名称:mleap,代码行数:28,代码来源:StructTypeToMleap.scala
注:本文中的org.apache.spark.SparkException类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论