本文整理汇总了Scala中org.apache.hadoop.fs.FileSystem类的典型用法代码示例。如果您正苦于以下问题:Scala FileSystem类的具体用法?Scala FileSystem怎么用?Scala FileSystem使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了FileSystem类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。
示例1: HdfsUtils
//设置package包名称以及导入依赖的类
package com.appleeye.spark.utils
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkContext
object HdfsUtils {
def pathExists(path: String, sc: SparkContext): Boolean = {
val conf = sc.hadoopConfiguration
val fs = FileSystem.get(conf)
fs.exists(new Path(path))
}
def getFullPath(path:String, sc: SparkContext): String = {
val conf = sc.hadoopConfiguration
val fs = FileSystem.get(conf)
fs.getFileStatus(new Path(path)).getPath().toString
}
def getAllFiles(path:String, sc: SparkContext): Seq[String] = {
val conf = sc.hadoopConfiguration
val fs = FileSystem.get(conf)
val files = fs.listStatus(new Path(path))
files.map(_.getPath().toString)
}
}
开发者ID:MiracleZhou,项目名称:SparkStreaming,代码行数:26,代码来源:HdfsUtils.scala
示例2: HdfsClientConf
//设置package包名称以及导入依赖的类
package pub.ayada.scala.hdfsutils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.fs.FileSystem
class HdfsClientConf private (val coreStiteXMLPath: String, val hdfsStiteXMLPath: String) {
private val conf = new Configuration();
conf.addResource(new Path(coreStiteXMLPath));
conf.addResource(new Path(hdfsStiteXMLPath));
def getHdfsClientConf(): Configuration = conf
def getHdpFileSystem(): FileSystem = FileSystem.get(conf);
}
object HdfsClientConf {
private var instance: HdfsClientConf = null
def getOneTimeInstance(coreStiteXMLPath: String, hdfsStiteXMLPath: String): Configuration = {
new HdfsClientConf(coreStiteXMLPath, hdfsStiteXMLPath).getHdfsClientConf()
}
def setSingltonInstance(coreStiteXMLPath: String, hdfsStiteXMLPath: String): Configuration = {
if (instance == null)
instance = new HdfsClientConf(coreStiteXMLPath, hdfsStiteXMLPath)
instance.getHdfsClientConf()
}
def getSingletonInstance(): HdfsClientConf = {
if (instance == null)
throw new NullPointerException("Instanciate HdfsClientConf before retriving")
instance
}
}
开发者ID:k-ayada,项目名称:HdfsUtils,代码行数:36,代码来源:HdfsClientConf.scala
示例3: HDFS
//设置package包名称以及导入依赖的类
package org.mireynol.util
import java.io.{BufferedInputStream, OutputStreamWriter}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.mutable.ListBuffer
import scala.io.Source
object HDFS {
def log : Logger = LoggerFactory.getLogger( HDFS.getClass )
val hadoop : FileSystem = {
val conf = new Configuration( )
conf.set( "fs.defaultFS", "hdfs://localhost:9000" )
FileSystem.get( conf )
}
def readAndMap( path : String, mapper : ( String ) => Unit ) = {
if ( hadoop.exists( new Path( path ) ) ) {
val is = new BufferedInputStream( hadoop.open( new Path( path ) ) )
Source.fromInputStream( is ).getLines( ).foreach( mapper )
}
else {
// TODO - error logic here
}
}
def write( filename : String, content : Iterator[ String ] ) = {
val path = new Path( filename )
val out = new OutputStreamWriter( hadoop.create( path, false ) )
content.foreach( str => out.write( str + "\n" ) )
out.flush( )
out.close( )
}
def ls( path : String ) : List[ String ] = {
val files = hadoop.listFiles( new Path( path ), false )
val filenames = ListBuffer[ String ]( )
while ( files.hasNext ) filenames += files.next( ).getPath( ).toString( )
filenames.toList
}
def rm( path : String, recursive : Boolean ) : Unit = {
if ( hadoop.exists( new Path( path ) ) ) {
println( "deleting file : " + path )
hadoop.delete( new Path( path ), recursive )
}
else {
println( "File/Directory" + path + " does not exist" )
log.warn( "File/Directory" + path + " does not exist" )
}
}
def cat( path : String ) = Source.fromInputStream( hadoop.open( new Path( path ) ) ).getLines( ).foreach( println )
}
开发者ID:reynoldsm88,项目名称:spark-drools,代码行数:61,代码来源:HDFS.scala
示例4: BenchmarkYarnPrepare
//设置package包名称以及导入依赖的类
package com.microsoft.spark.perf
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
object BenchmarkYarnPrepare {
private def uploadFile(localPath: String, remoteWorkingDir: String): Unit = {
new Path(remoteWorkingDir).getFileSystem(new Configuration()).
copyFromLocalFile(new Path(localPath), new Path(remoteWorkingDir))
}
private def createRemoteWorkingDir(
remoteWorkingDir: String,
localJarPath: String,
sparkSubmitParamsPath: String,
benchmarkParamsPath: String): Unit = {
uploadFile(localJarPath, remoteWorkingDir + "/spark-benchmark.jar")
uploadFile(sparkSubmitParamsPath, remoteWorkingDir + "/spark.conf")
uploadFile(benchmarkParamsPath, remoteWorkingDir + "/benchmark.conf")
}
def main(args: Array[String]): Unit = {
val remoteWorkingDir = args(0)
val localJarPath = args(1)
val sparkSubmitParamsFilePath = args(2)
val benchmarkParamsFilePath = args(3)
createRemoteWorkingDir(remoteWorkingDir, localJarPath, sparkSubmitParamsFilePath,
benchmarkParamsFilePath)
}
}
开发者ID:hdinsight,项目名称:SparkPerf,代码行数:33,代码来源:BenchmarkYarnPrepare.scala
示例5: BaseOutputFormat
//设置package包名称以及导入依赖的类
package kr.acon.lib.io
import java.io.DataOutputStream
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.io.compress.GzipCodec
import org.apache.hadoop.mapred.FileOutputFormat
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapred.RecordWriter
import org.apache.hadoop.util.Progressable
import org.apache.hadoop.util.ReflectionUtils
import it.unimi.dsi.fastutil.longs.LongOpenHashBigSet
abstract class BaseOutputFormat extends FileOutputFormat[Long, LongOpenHashBigSet] {
@inline def getRecordWriter(out: DataOutputStream): RecordWriter[Long, LongOpenHashBigSet]
@inline override def getRecordWriter(ignored: FileSystem,
job: JobConf,
name: String,
progress: Progressable) = {
val isCompressed = FileOutputFormat.getCompressOutput(job)
if (!isCompressed) {
val file = FileOutputFormat.getTaskOutputPath(job, name)
val fs = file.getFileSystem(job)
val fileOut = fs.create(file, progress)
getRecordWriter(fileOut)
} else {
val codecClass = FileOutputFormat.getOutputCompressorClass(job, classOf[GzipCodec])
val codec = ReflectionUtils.newInstance(codecClass, job)
val file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension())
val fs = file.getFileSystem(job)
val fileOut = fs.create(file, progress)
val fileOutWithCodec = new DataOutputStream(codec.createOutputStream(fileOut))
getRecordWriter(fileOutWithCodec)
}
}
}
开发者ID:chan150,项目名称:TrillionG,代码行数:39,代码来源:BaseOutputFormat.scala
示例6: FileUtils
//设置package包名称以及导入依赖的类
package com.asto.dmp.xxx.util
import com.asto.dmp.xxx.base.Constants
import com.asto.dmp.ycd.base.Constants
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.Logging
import org.apache.spark.rdd.RDD
object FileUtils extends Logging {
private val conf = new Configuration()
conf.set("fs.defaultFS", Constants.Hadoop.DEFAULT_FS)
conf.set("mapreduce.jobtracker.address", Constants.Hadoop.JOBTRACKER_ADDRESS)
def deleteFilesInHDFS(paths: String*) = {
paths.foreach { path =>
val filePath = new Path(path)
val HDFSFilesSystem = filePath.getFileSystem(new Configuration())
if (HDFSFilesSystem.exists(filePath)) {
logInfo(s"?????$filePath")
HDFSFilesSystem.delete(filePath, true)
}
}
}
def saveAsTextFile[T <: Product](rdd: RDD[T], savePath: String) = {
deleteFilesInHDFS(savePath)
logInfo(s"?${savePath}?????")
rdd.map(_.productIterator.mkString(Constants.OutputPath.SEPARATOR)).coalesce(1).saveAsTextFile(savePath)
}
def saveAsTextFile(text: String, savePath: String) = {
deleteFilesInHDFS(savePath)
logInfo(s"?${savePath}?????")
val out = FileSystem.get(conf).create(new Path(savePath))
out.write(text.getBytes)
out.flush()
out.close()
}
}
开发者ID:zj-lingxin,项目名称:asto-sparksql,代码行数:42,代码来源:FileUtils.scala
示例7: Converter
//设置package包名称以及导入依赖的类
package com.dataoptimo.imgprocessing.convert
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.Text
import org.apache.hadoop.io.BytesWritable
import org.apache.hadoop.fs.{Path, FileSystem}
import org.apache.hadoop.io.IOUtils
import org.apache.hadoop.io.SequenceFile
import java.io.IOException
import java.lang.IllegalArgumentException
class Converter(conf: Configuration) {
def imageToSequence(srcPath: String, dstPath: String){
try {
val fs = FileSystem.get(conf);
val inPath = new Path(srcPath);
val outPath = new Path(dstPath);
val key = new Text();
val value = new BytesWritable();
val in = fs.open(inPath);
val buffer = new Array[Byte](in.available())
in.read(buffer);
var writer = SequenceFile.createWriter(fs, conf, outPath, key.getClass(),value.getClass());
writer.append(new Text(inPath.getName()), new BytesWritable(buffer));
IOUtils.closeStream(writer);
}
catch {
case io: IOException => println(io.getMessage)
case illegalArgument: IllegalArgumentException => println(illegalArgument.getMessage)
}
}
}
开发者ID:mfawadalam,项目名称:imgprocessing,代码行数:36,代码来源:Converters.scala
示例8: HdfsWriteTest
//设置package包名称以及导入依赖的类
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{Path, FileSystem}
import org.scalatest._
class HdfsWriteTest extends FlatSpec with Matchers {
"Hello" should "have tests" in {
def write(uri: String, filePath: String, data: Array[Byte]) = {
// System.setProperty("HADOOP_USER_NAME", "Mariusz")
val path = new Path(filePath)
val conf = new Configuration()
conf.set("fs.defaultFS", uri)
val fs = FileSystem.get(conf)
val os = fs.create(path)
os.write(data)
fs.close()
}
write("hdfs://0.0.0.0:19000", "hdfs://0.0.0.0:19000/user/cloudera/test.txt", "Hello World".getBytes)
}
}
开发者ID:ralreiroe,项目名称:embarcadero,代码行数:24,代码来源:HdfsWriteTest.scala
示例9: main
//设置package包名称以及导入依赖的类
package io.github.qf6101.topwords
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.SparkSession
def main(args: Array[String]) {
// setup spark session
val spark = SparkSession.builder().master("local[1]").appName(this.getClass.toString).getOrCreate()
val inputFile = "test_data/story_of_stone.txt"
val outputFile = "test_data/test_output"
val files = FileSystem.get(spark.sparkContext.hadoopConfiguration)
if (files.exists(new Path(outputFile))) files.delete(new Path(outputFile), true)
val corpus = spark.sparkContext.textFile(inputFile)
new TopWORDS(
tauL = 10,
tauF = 5,
textLenThld = 2000,
useProbThld = 1E-8,
numIterations = 10,
convergeTol = 1E-3,
wordBoundaryThld = 0.0)
.run(corpus, outputFile + "/dictionary", outputFile + "/segmented_texts")
}
}
开发者ID:qf6101,项目名称:topwords,代码行数:26,代码来源:TestTopWORDS.scala
示例10: SessionDataFileHDFSWriter
//设置package包名称以及导入依赖的类
package com.malaska.spark.training.streaming.dstream.sessionization
import java.io.BufferedWriter
import java.io.FileWriter
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.conf.Configuration
import java.io.OutputStreamWriter
import org.apache.hadoop.fs.Path
import java.util.Random
object SessionDataFileHDFSWriter {
val eol = System.getProperty("line.separator");
def main(args: Array[String]) {
if (args.length == 0) {
println("SessionDataFileWriter {tempDir} {distDir} {numberOfFiles} {numberOfEventsPerFile} {waitBetweenFiles}");
return;
}
val conf = new Configuration
conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"))
conf.addResource(new Path("/etc/hadoop/conf/mapred-site.xml"))
conf.addResource(new Path("/etc/hadoop/conf/hdfs-site.xml"))
val fs = FileSystem.get(new Configuration)
val rootTempDir = args(0)
val rootDistDir = args(1)
val files = args(2).toInt
val loops = args(3).toInt
val waitBetweenFiles = args(4).toInt
val r = new Random
for (f <- 1 to files) {
val rootName = "/weblog." + System.currentTimeMillis()
val tmpPath = new Path(rootTempDir + rootName + ".tmp")
val writer = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath)))
print(f + ": [")
val randomLoops = loops + r.nextInt(loops)
for (i <- 1 to randomLoops) {
writer.write(SessionDataGenerator.getNextEvent + eol)
if (i%100 == 0) {
print(".")
}
}
println("]")
writer.close
val distPath = new Path(rootDistDir + rootName + ".dat")
fs.rename(tmpPath, distPath)
Thread.sleep(waitBetweenFiles)
}
println("Done")
}
}
开发者ID:TedBear42,项目名称:spark_training,代码行数:58,代码来源:SessionDataFileHDFSWriter.scala
示例11: main
//设置package包名称以及导入依赖的类
package com.aluxian.tweeather.scripts
import java.io.File
import org.apache.hadoop.fs.FileSystem
import org.apache.log4j.PropertyConfigurator
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.{Minutes, Seconds}
import org.apache.spark.{SparkConf, SparkContext}
trait Script {
protected lazy val streamingTimeout = sys.props.get("tw.streaming.timeout") // in seconds
.map(_.toLong * 1000).getOrElse(-1L)
protected lazy val streamingInterval = sys.props.get("tw.streaming.interval") // in seconds
.map(s => Seconds(s.toLong)).getOrElse(Minutes(5))
protected lazy val scriptName = "Tweeather_" + getClass.getSimpleName.stripSuffix("$")
protected lazy val sc = new SparkContext(
new SparkConf()
.setIfMissing("spark.app.name", scriptName)
.setIfMissing("spark.eventLog.dir", "tw/logs")
.setIfMissing("spark.eventLog.enabled", "true")
.setIfMissing("spark.streaming.stopGracefullyOnShutdown", "true")
.setIfMissing("spark.streaming.blockInterval", "30s")
)
protected lazy val hdfs = FileSystem.get(sc.hadoopConfiguration)
protected lazy val sqlc = new SQLContext(sc)
def main(args: Array[String]) {
// Log4j properties
Option(getClass.getResource("/com/aluxian/tweeather/res/log4j.properties")) match {
case Some(url) => PropertyConfigurator.configure(url)
case None => System.err.println("Unable to load log4j.properties")
}
// Ensure the event log directory exists
new File("tw/logs").mkdirs()
}
}
开发者ID:cnajeefa,项目名称:Tourism-Sentiment-Analysis,代码行数:44,代码来源:Script.scala
示例12: Import
//设置package包名称以及导入依赖的类
package spark.in.space
import java.io.File
import java.net.URI
import com.amazonaws.auth.AnonymousAWSCredentials
import com.amazonaws.event.{ProgressEvent, ProgressListener}
import com.amazonaws.services.s3.transfer.TransferManager
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.{SparkConf, SparkContext}
object Import {
val log = org.slf4j.LoggerFactory.getLogger("Import")
val bucket = sys.env("SPARK_S3_BUCKET_NAME")
val n1gram = "ngrams/books/20090715/eng-us-all/1gram/data"
val dlFile = "/app/googleNGram1Gram.lzo.sequenceLongString"
val publicBucket = "datasets.elasticmapreduce"
val parqFile = s"s3n://${bucket}${dlFile}.parquet"
def main(args: Array[String]): Unit = {
if(! new File(dlFile).exists()) {
println("downloading")
val tx = new TransferManager(new AnonymousAWSCredentials())
val download = tx.download(publicBucket, n1gram, new File(dlFile))
download.addProgressListener(new ProgressListener {
override def progressChanged(progressEvent: ProgressEvent): Unit = {
println(s"type: ${progressEvent.getEventType.name()} bytes: ${progressEvent.getBytes} transferred:${progressEvent.getBytesTransferred}")
}
})
download.waitForCompletion()
}
val conf = new SparkConf().setAppName("spark-singularity")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
val theRDD = sc.sequenceFile[Long,String](s"file://$dlFile", 80)
val theParsed = theRDD.map{ case (_, row) => rowToNGram(row) }
val df = theParsed.toDF
FileSystem.get(new URI(s"s3n://$bucket"), sc.hadoopConfiguration).delete(new Path(dlFile), true)
df.write.parquet(parqFile)
}
def rowToNGram(in: String) : NGram = {
val split = in.split("\t")
NGram(split(0), split(1), split(2).toInt, split(3).toInt, split(4).toInt)
}
}
case class NGram(ngram:String, year:String, occurrences:Int, pages:Int, books:Int)
开发者ID:heroku,项目名称:spark-singularity,代码行数:55,代码来源:Import.scala
示例13: FileUtils
//设置package包名称以及导入依赖的类
package com.asto.dmp.articlecate.utils
import java.io._
import com.asto.dmp.articlecate.base.Props
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.Logging
import org.apache.spark.rdd.RDD
object FileUtils extends Logging {
private val conf = new Configuration()
conf.set("fs.defaultFS", Props.get("fs.defaultFS"))
conf.set("mapreduce.jobtracker.address", Props.get("mapreduce.jobtracker.address"))
private val fileSystem = FileSystem.get(conf) //hdfs????
def mkdirs(folderPath: String): Unit = {
val path = new Path(folderPath)
if (!fileSystem.exists(path)) {
fileSystem.mkdirs(path)
}
}
def readContextFromHDFS(path: String) = {
readLinesFromHDFS(path).mkString("\n")
}
def readLinesFromHDFS(path: String): Array[String] = {
val pt = new Path(path)
val br: BufferedReader = new BufferedReader(new InputStreamReader(fileSystem.open(pt)))
val lines = scala.collection.mutable.ArrayBuffer[String]()
var line = br.readLine()
while (line != null) {
lines += line
line = br.readLine()
}
lines.toArray
}
}
开发者ID:luciuschina,项目名称:ArticleCategories,代码行数:42,代码来源:FileUtils.scala
示例14: globPath
//设置package包名称以及导入依赖的类
package com.paypal.risk.madmen20.util
import java.io.{OutputStreamWriter, BufferedReader, InputStreamReader}
import org.apache.hadoop.fs.{FileUtil, FileSystem, Path}
import org.apache.spark.SparkContext
def globPath(path: String)(implicit sc: SparkContext): Seq[String] = {
val conf = sc.hadoopConfiguration
val glob = new Path(path)
val fileSystem = FileSystem.get(conf)
val allStatus = fileSystem.globStatus(glob)
val allPath = FileUtil.stat2Paths(allStatus)
allPath.map(_.toString)
}
def deleteFolder(path: String)(implicit sc: SparkContext): Boolean = {
val conf = sc.hadoopConfiguration
val fileSystem = FileSystem.get(conf)
fileSystem.delete(new Path(path), true)
}
}
开发者ID:yanlzhang8936,项目名称:madmen20,代码行数:24,代码来源:HdfsUtil.scala
示例15: BTERGeneratorTest
//设置package包名称以及导入依赖的类
package org.dama.datasynth.common.generators.structure
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.dama.datasynth.common._
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}
@RunWith(classOf[JUnitRunner])
class BTERGeneratorTest extends FlatSpec with Matchers with BeforeAndAfterAll {
"BTERGenerator " should "not crash and produce a graph in /tmp/bterEdges" in {
val currentDirectory = new java.io.File(".").getCanonicalPath
val bterGenerator = new BTERGenerator(utils.FileUtils.File("file://"+currentDirectory+"/src/main/resources/degrees/dblp"),
utils.FileUtils.File("file://"+currentDirectory+"/src/main/resources/ccs/dblp"));
bterGenerator.run(1000000, new Configuration(), "hdfs:///tmp/bterEdges")
val fileSystem = FileSystem.get(new Configuration())
fileSystem.exists(new Path("/tmp/bterEdges")) should be (true)
}
override protected def afterAll(): Unit = {
val fileSystem = FileSystem.get(new Configuration())
fileSystem.delete(new Path("/tmp/bterEdges"), true)
}
}
开发者ID:DAMA-UPC,项目名称:DataSynth,代码行数:30,代码来源:BTERGeneratorTest.scala
示例16: DataProcess
//设置package包名称以及导入依赖的类
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{Path, FileSystem}
import org.apache.spark.{SparkConf, SparkContext}
object DataProcess {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("dataProcess").setMaster(Path.localSparkMaster)
val sc = new SparkContext(conf)
val huzhouData = sc.textFile("hdfs://localhost:9000/huzhou_cell_records/HuZhou.csv").map(line => line.split(",")).collect()
val bHuzhou = sc.broadcast(huzhouData)
val config = new Configuration()
config.addResource(new Path(Path.localConfPath + "/core-site.xml"))
config.addResource(new Path(Path.localConfPath + "/hdfs-site.xml"))
config.addResource(new Path(Path.localConfPath + "/yarn-site.xml"))
val fs = FileSystem.get(config)
val list = fs.listStatus(new Path("hdfs://localhost:9000/huzhou_cell_records/"))
for (f <- list) {
if (!f.getPath().toString().endsWith(".csv")) {
val data = sc.textFile(f.getPath().toString())
val dataHuZhou = data.flatMap(line => line.split("\\|", -1))
.filter(line => line != "")
.map(line => line.split(";", -1))
.filter(city => city(34).trim != "" && city(35).trim != "")
.map(city => (city(34).trim, city(35).trim))
val dataProcess = dataHuZhou.map {
t =>
val huzhou = bHuzhou.value
var i: Int = 0
var str: String = null
var flag: Boolean = true
while (i < huzhou.length && flag) {
if (t._1 == huzhou(i)(2) && t._2 == huzhou(i)(3)) {
str = huzhou(i)(5) + "," + huzhou(i)(6)
flag = false
}
i += 1
}
str
}
val fileName = f.getPath().toString().split("/")(f.getPath().toString().split("/").length - 1)
val newPath = new Path("hdfs://localhost:9000/huzhou_cell_records_lnglat/" + fileName)
dataProcess.filter(line => line != null).saveAsTextFile(newPath.toString())
}
}
sc.stop()
}
}
开发者ID:yuanzhaokang,项目名称:ParallelizeHeatmap,代码行数:55,代码来源:DataProcess.scala
示例17: ImageUtils
//设置package包名称以及导入依赖的类
import java.awt.image.BufferedImage
import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import javax.imageio.ImageIO
import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
object ImageUtils {
def readImage(hdfsPath: String): BufferedImage = {
val configuration = new Configuration()
configuration.addResource(new Path(Path.localConfPath + "/core-site.xml"))
configuration.addResource(new Path(Path.localConfPath + "/hadoop/hdfs-site.xml"))
configuration.addResource(new Path(Path.localConfPath + "/hadoop/yarn-site.xml"))
val fs = FileSystem.get(configuration)
val is = fs.open(new Path(hdfsPath))
val buffer = IOUtils.toByteArray(is)
val out = new ByteArrayOutputStream()
out.write(buffer, 0, buffer.length)
val b = out.toByteArray
out.close()
val picture = ImageIO.read(new ByteArrayInputStream(b))
is.close()
picture
}
def writeImage(image: BufferedImage, hdfsPath: String): Unit = {
val configuration = new Configuration()
configuration.addResource(new Path(Path.localConfPath + "/core-site.xml"))
configuration.addResource(new Path(Path.localConfPath + "/hdfs-site.xml"))
configuration.addResource(new Path(Path.localConfPath + "/yarn-site.xml"))
val fs = FileSystem.get(configuration)
val os = fs.create(new Path(hdfsPath))
val baos = new ByteArrayOutputStream()
val ios = ImageIO.createImageOutputStream(baos)
ImageIO.write(image, "png", ios)
val is = new ByteArrayInputStream(baos.toByteArray)
IOUtils.copy(is, os)
is.close()
ios.close()
baos.close()
os.close()
}
}
开发者ID:yuanzhaokang,项目名称:ParallelizeHeatmap,代码行数:48,代码来源:ImageUtils.scala
示例18: PeapodGenerator
//设置package包名称以及导入依赖的类
package generic
import java.io.File
import java.net.URI
import java.text.SimpleDateFormat
import java.util.Date
import org.apache.hadoop.fs.{FileSystem, Path}
import peapod.Peapod
import sativum.{Hive, Sativum}
import scala.util.Random
object PeapodGenerator {
def peapod() = {
val sdf = new SimpleDateFormat("ddMMyy-hhmmss")
val rawPath = System.getProperty("java.io.tmpdir") + "workflow-" + sdf.format(new Date()) + Random.nextInt()
val path = new Path("file://",rawPath.replace("\\","/")).toString
val fs = FileSystem.get(new URI(path), Spark.sc.hadoopConfiguration)
fs.mkdirs(new Path(path))
fs.deleteOnExit(new Path(path))
val w = new Peapod(
path=path,
raw="")(generic.Spark.sc)
w
}
def sativum() = {
val sdf = new SimpleDateFormat("ddMMyy-hhmmss")
val rawPath = System.getProperty("java.io.tmpdir") + "workflow-" + sdf.format(new Date()) + Random.nextInt()
val path = new Path("file://",rawPath.replace("\\","/")).toString
val fs = FileSystem.get(new URI(path), Spark.sc.hadoopConfiguration)
fs.mkdirs(new Path(path))
fs.deleteOnExit(new Path(path))
val w = new Sativum(
path= path,
raw="")(generic.Spark.sc)
w
}
}
开发者ID:mindfulmachines,项目名称:sativum,代码行数:41,代码来源:PeapodGenerator.scala
示例19: Configurations
//设置package包名称以及导入依赖的类
package org.ieee.codemeow.geometric.spark
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.common.base.Preconditions
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
object Configurations {
def fromFile(_path: String): MainConfiguration ={
val config = new Configuration
val path = new Path(_path)
val fs = FileSystem.get(path.toUri, config)
val file = fs.open(path)
val mapper = new ObjectMapper(new YAMLFactory())
mapper.registerModule(DefaultScalaModule)
mapper.readValue(file, classOf[MainConfiguration])
}
}
class MainConfiguration (
@JsonProperty("appName") _appName: String,
@JsonProperty("bbox") _bbox: (Double, Double, Double, Double), // lat1, lon1, lat2, lon2
@JsonProperty("layers") _layers: Array[LayerConfiguration],
@JsonProperty("sequenceFileDir") _sequenceFileDir: String
) extends Serializable{
val appName = Preconditions.checkNotNull(_appName, "appName cannot be null": Object)
val bbox = _bbox
val layers = Preconditions.checkNotNull(_layers, "layers cannot be null": Object)
val sequenceFileDir = Preconditions.checkNotNull(_sequenceFileDir, "sequenceFileDir cannot be null": Object)
}
class LayerConfiguration (
@JsonProperty("layerName") _layerName: String,
@JsonProperty("minZoom") _minZoom: Int,
@JsonProperty("maxZoom") _maxZoom: Int,
@JsonProperty("dataProvider") _dataProvider: String,
@JsonProperty("kwargs") _kwargs: Map[String, Any]
) extends Serializable{
val layerName = Preconditions.checkNotNull(_layerName, "layerName cannot be null": Object)
val minZoom = Preconditions.checkNotNull(_minZoom, "minZoom cannot be null": Object)
val maxZoom = Preconditions.checkNotNull(_maxZoom, "maxZoom cannot be null": Object)
val dataProvider = Preconditions.checkNotNull(_dataProvider, "dataProvider cannot be null": Object)
val kwargs = Preconditions.checkNotNull(_kwargs, "kwargs cannot be null": Object)
}
开发者ID:codemeow5,项目名称:Vector-Tile-Spark-Process,代码行数:52,代码来源:Configurations.scala
示例20: HdfsPath
//设置package包名称以及导入依赖的类
package org.karps.ops
import scala.util.{Failure, Success, Try}
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.SparkSession
import karps.core.{interface => I}
import karps.core.{computation => C}
case class HdfsPath(s: String)
case class HdfsStamp(s: String)
case class HdfsResourceResult(
stampReturnPath: HdfsPath,
stampReturnError: Option[String],
stampReturn: Option[HdfsStamp])
object HdfsResourceResult {
def toProto(r: HdfsResourceResult): I.HdfsResourceStatus = {
var x = I.HdfsResourceStatus(
path = Some(C.ResourcePath(r.stampReturnPath.s)))
for (txt <- r.stampReturnError) {
x = x.withError(txt)
}
for (s <- r.stampReturn) {
x = x.withReturn(s.s)
}
x
}
}
object SourceStamps {
def getStamps(sess: SparkSession, ps: Seq[HdfsPath]): Seq[HdfsResourceResult] = {
val fs = FileSystem.get(sess.sparkContext.hadoopConfiguration)
ps.map(getStamp(fs, _)).zip(ps).map {
case (Success(s), p) => HdfsResourceResult(p, None, Option(s))
case (Failure(e), p) =>
HdfsResourceResult(p, Option(e.getMessage), None)
}
}
private def getStamp(fs: FileSystem, p: HdfsPath): Try[HdfsStamp] = {
Try {
val p2 = new Path(p.s)
val s = fs.getFileStatus(p2)
HdfsStamp(s.getModificationTime.toString)
}
}
}
开发者ID:tjhunter,项目名称:karps,代码行数:53,代码来源:stamps.scala
注:本文中的org.apache.hadoop.fs.FileSystem类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论