Scala FileSystem类代码示例

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› Scala›Scala教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了Scala中org.apache.hadoop.fs.FileSystem类的典型用法代码示例。如果您正苦于以下问题：Scala FileSystem类的具体用法？Scala FileSystem怎么用？Scala FileSystem使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了FileSystem类的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: HdfsUtils

//设置package包名称以及导入依赖的类
package com.appleeye.spark.utils

import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkContext

object HdfsUtils {
  def pathExists(path: String, sc: SparkContext): Boolean = {
    val conf = sc.hadoopConfiguration
    val fs = FileSystem.get(conf)
    fs.exists(new Path(path))
  }

  def getFullPath(path:String, sc: SparkContext): String = {
    val conf = sc.hadoopConfiguration
    val fs = FileSystem.get(conf)
    fs.getFileStatus(new Path(path)).getPath().toString
  }

  def getAllFiles(path:String, sc: SparkContext): Seq[String] = {
    val conf = sc.hadoopConfiguration
    val fs = FileSystem.get(conf)
    val files = fs.listStatus(new Path(path))
    files.map(_.getPath().toString)
  }
}

开发者ID:MiracleZhou，项目名称:SparkStreaming，代码行数:26，代码来源:HdfsUtils.scala

示例2: HdfsClientConf

//设置package包名称以及导入依赖的类
package pub.ayada.scala.hdfsutils

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.fs.FileSystem

class HdfsClientConf private (val coreStiteXMLPath: String, val hdfsStiteXMLPath: String) {
    private val conf = new Configuration();
    conf.addResource(new Path(coreStiteXMLPath));
    conf.addResource(new Path(hdfsStiteXMLPath));

    def getHdfsClientConf(): Configuration = conf

    def getHdpFileSystem(): FileSystem = FileSystem.get(conf);
}
object HdfsClientConf {

    private var instance: HdfsClientConf = null

    def getOneTimeInstance(coreStiteXMLPath: String, hdfsStiteXMLPath: String): Configuration = {
        new HdfsClientConf(coreStiteXMLPath, hdfsStiteXMLPath).getHdfsClientConf()
    }
    def setSingltonInstance(coreStiteXMLPath: String, hdfsStiteXMLPath: String): Configuration = {
        if (instance == null)
            instance = new HdfsClientConf(coreStiteXMLPath, hdfsStiteXMLPath)

        instance.getHdfsClientConf()
    }
    def getSingletonInstance(): HdfsClientConf = {
        if (instance == null)
            throw new NullPointerException("Instanciate HdfsClientConf before retriving")

        instance
    }
}

开发者ID:k-ayada，项目名称:HdfsUtils，代码行数:36，代码来源:HdfsClientConf.scala

示例3: HDFS

//设置package包名称以及导入依赖的类
package org.mireynol.util

import java.io.{BufferedInputStream, OutputStreamWriter}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.mutable.ListBuffer
import scala.io.Source

object HDFS {

  def log : Logger = LoggerFactory.getLogger( HDFS.getClass )

  val hadoop : FileSystem = {
    val conf = new Configuration( )
    conf.set( "fs.defaultFS", "hdfs://localhost:9000" )
    FileSystem.get( conf )
  }

  def readAndMap( path : String, mapper : ( String ) => Unit ) = {
    if ( hadoop.exists( new Path( path ) ) ) {
      val is = new BufferedInputStream( hadoop.open( new Path( path ) ) )
      Source.fromInputStream( is ).getLines( ).foreach( mapper )
    }
    else {
      // TODO - error logic here
    }
  }

  def write( filename : String, content : Iterator[ String ] ) = {
    val path = new Path( filename )
    val out = new OutputStreamWriter( hadoop.create( path, false ) )
    content.foreach( str => out.write( str + "\n" ) )
    out.flush( )
    out.close( )
  }

  def ls( path : String ) : List[ String ] = {
    val files = hadoop.listFiles( new Path( path ), false )
    val filenames = ListBuffer[ String ]( )
    while ( files.hasNext ) filenames += files.next( ).getPath( ).toString( )
    filenames.toList
  }

  def rm( path : String, recursive : Boolean ) : Unit = {
    if ( hadoop.exists( new Path( path ) ) ) {
      println( "deleting file : " + path )
      hadoop.delete( new Path( path ), recursive )
    }
    else {
      println( "File/Directory" + path + " does not exist" )
      log.warn( "File/Directory" + path + " does not exist" )
    }
  }

  def cat( path : String ) = Source.fromInputStream( hadoop.open( new Path( path ) ) ).getLines( ).foreach( println )

}

开发者ID:reynoldsm88，项目名称:spark-drools，代码行数:61，代码来源:HDFS.scala

示例4: BenchmarkYarnPrepare

//设置package包名称以及导入依赖的类
package com.microsoft.spark.perf

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

object BenchmarkYarnPrepare {

  private def uploadFile(localPath: String, remoteWorkingDir: String): Unit = {
    new Path(remoteWorkingDir).getFileSystem(new Configuration()).
      copyFromLocalFile(new Path(localPath), new Path(remoteWorkingDir))
  }

  private def createRemoteWorkingDir(
      remoteWorkingDir: String,
      localJarPath: String,
      sparkSubmitParamsPath: String,
      benchmarkParamsPath: String): Unit = {
    uploadFile(localJarPath, remoteWorkingDir + "/spark-benchmark.jar")
    uploadFile(sparkSubmitParamsPath, remoteWorkingDir + "/spark.conf")
    uploadFile(benchmarkParamsPath, remoteWorkingDir + "/benchmark.conf")
  }

  def main(args: Array[String]): Unit = {
    val remoteWorkingDir = args(0)
    val localJarPath = args(1)
    val sparkSubmitParamsFilePath = args(2)
    val benchmarkParamsFilePath = args(3)

    createRemoteWorkingDir(remoteWorkingDir, localJarPath, sparkSubmitParamsFilePath,
      benchmarkParamsFilePath)
  }
}

开发者ID:hdinsight，项目名称:SparkPerf，代码行数:33，代码来源:BenchmarkYarnPrepare.scala

示例5: BaseOutputFormat

//设置package包名称以及导入依赖的类
package kr.acon.lib.io

import java.io.DataOutputStream

import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.io.compress.GzipCodec
import org.apache.hadoop.mapred.FileOutputFormat
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapred.RecordWriter
import org.apache.hadoop.util.Progressable
import org.apache.hadoop.util.ReflectionUtils

import it.unimi.dsi.fastutil.longs.LongOpenHashBigSet

abstract class BaseOutputFormat extends FileOutputFormat[Long, LongOpenHashBigSet] {
  @inline def getRecordWriter(out: DataOutputStream): RecordWriter[Long, LongOpenHashBigSet]

  @inline override def getRecordWriter(ignored: FileSystem,
                               job: JobConf,
                               name: String,
                               progress: Progressable) = {
    val isCompressed = FileOutputFormat.getCompressOutput(job)
    if (!isCompressed) {
      val file = FileOutputFormat.getTaskOutputPath(job, name)
      val fs = file.getFileSystem(job)
      val fileOut = fs.create(file, progress)
      getRecordWriter(fileOut)
    } else {
      val codecClass = FileOutputFormat.getOutputCompressorClass(job, classOf[GzipCodec])
      val codec = ReflectionUtils.newInstance(codecClass, job)
      val file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension())
      val fs = file.getFileSystem(job)
      val fileOut = fs.create(file, progress)
      val fileOutWithCodec = new DataOutputStream(codec.createOutputStream(fileOut))
      getRecordWriter(fileOutWithCodec)
    }
  }
}

开发者ID:chan150，项目名称:TrillionG，代码行数:39，代码来源:BaseOutputFormat.scala

示例6: FileUtils

//设置package包名称以及导入依赖的类
package com.asto.dmp.xxx.util

import com.asto.dmp.xxx.base.Constants
import com.asto.dmp.ycd.base.Constants
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.Logging
import org.apache.spark.rdd.RDD


object FileUtils extends Logging {
  private val conf = new Configuration()
  conf.set("fs.defaultFS", Constants.Hadoop.DEFAULT_FS)
  conf.set("mapreduce.jobtracker.address", Constants.Hadoop.JOBTRACKER_ADDRESS)

  def deleteFilesInHDFS(paths: String*) = {
    paths.foreach { path =>
      val filePath = new Path(path)
      val HDFSFilesSystem = filePath.getFileSystem(new Configuration())
      if (HDFSFilesSystem.exists(filePath)) {
        logInfo(s"?????$filePath")
        HDFSFilesSystem.delete(filePath, true)
      }
    }
  }

  def saveAsTextFile[T <: Product](rdd: RDD[T], savePath: String) = {
    deleteFilesInHDFS(savePath)
    logInfo(s"?${savePath}?????")
    rdd.map(_.productIterator.mkString(Constants.OutputPath.SEPARATOR)).coalesce(1).saveAsTextFile(savePath)
  }

  def saveAsTextFile(text: String, savePath: String) = {
    deleteFilesInHDFS(savePath)
    logInfo(s"?${savePath}?????")
    val out = FileSystem.get(conf).create(new Path(savePath))
    out.write(text.getBytes)
    out.flush()
    out.close()
  }
}

开发者ID:zj-lingxin，项目名称:asto-sparksql，代码行数:42，代码来源:FileUtils.scala

示例7: Converter

//设置package包名称以及导入依赖的类
package com.dataoptimo.imgprocessing.convert
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.Text
import org.apache.hadoop.io.BytesWritable
import org.apache.hadoop.fs.{Path, FileSystem}
import org.apache.hadoop.io.IOUtils
import org.apache.hadoop.io.SequenceFile
import java.io.IOException
import java.lang.IllegalArgumentException

class Converter(conf: Configuration) {
  
  def imageToSequence(srcPath: String, dstPath: String){
    try {
    val fs = FileSystem.get(conf);
    val inPath = new Path(srcPath);
    val outPath = new Path(dstPath);
    val key = new Text();
    val value = new BytesWritable();
    val in = fs.open(inPath);
    val buffer = new Array[Byte](in.available())
    in.read(buffer);
    var writer = SequenceFile.createWriter(fs, conf, outPath, key.getClass(),value.getClass());
    writer.append(new Text(inPath.getName()), new BytesWritable(buffer));
    IOUtils.closeStream(writer);
    }
    catch {
      case io: IOException => println(io.getMessage)
      case illegalArgument: IllegalArgumentException => println(illegalArgument.getMessage)
    }
    
    
    
  }
}

开发者ID:mfawadalam，项目名称:imgprocessing，代码行数:36，代码来源:Converters.scala

示例8: HdfsWriteTest

//设置package包名称以及导入依赖的类
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{Path, FileSystem}
import org.scalatest._

class HdfsWriteTest extends FlatSpec with Matchers {
  "Hello" should "have tests" in {

    def write(uri: String, filePath: String, data: Array[Byte]) = {
//      System.setProperty("HADOOP_USER_NAME", "Mariusz")
      val path = new Path(filePath)
      val conf = new Configuration()
      conf.set("fs.defaultFS", uri)
      val fs = FileSystem.get(conf)
      val os = fs.create(path)
      os.write(data)
      fs.close()
    }

    write("hdfs://0.0.0.0:19000", "hdfs://0.0.0.0:19000/user/cloudera/test.txt", "Hello World".getBytes)


  }
}

开发者ID:ralreiroe，项目名称:embarcadero，代码行数:24，代码来源:HdfsWriteTest.scala

示例9: main

//设置package包名称以及导入依赖的类
package io.github.qf6101.topwords

import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.SparkSession


  def main(args: Array[String]) {
    // setup spark session
    val spark = SparkSession.builder().master("local[1]").appName(this.getClass.toString).getOrCreate()
    val inputFile = "test_data/story_of_stone.txt"
    val outputFile = "test_data/test_output"
    val files = FileSystem.get(spark.sparkContext.hadoopConfiguration)
    if (files.exists(new Path(outputFile))) files.delete(new Path(outputFile), true)
    val corpus = spark.sparkContext.textFile(inputFile)
    new TopWORDS(
      tauL = 10,
      tauF = 5,
      textLenThld = 2000,
      useProbThld = 1E-8,
      numIterations = 10,
      convergeTol = 1E-3,
      wordBoundaryThld = 0.0)
      .run(corpus, outputFile + "/dictionary", outputFile + "/segmented_texts")
  }
}

开发者ID:qf6101，项目名称:topwords，代码行数:26，代码来源:TestTopWORDS.scala

示例10: SessionDataFileHDFSWriter

//设置package包名称以及导入依赖的类
package com.malaska.spark.training.streaming.dstream.sessionization

import java.io.BufferedWriter
import java.io.FileWriter
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.conf.Configuration
import java.io.OutputStreamWriter
import org.apache.hadoop.fs.Path
import java.util.Random

object SessionDataFileHDFSWriter {
  
  val eol = System.getProperty("line.separator");  
  
  def main(args: Array[String]) {
    if (args.length == 0) {
        println("SessionDataFileWriter {tempDir} {distDir} {numberOfFiles} {numberOfEventsPerFile} {waitBetweenFiles}");
        return;
    }
    val conf = new Configuration
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"))
    conf.addResource(new Path("/etc/hadoop/conf/mapred-site.xml"))
    conf.addResource(new Path("/etc/hadoop/conf/hdfs-site.xml"))
    
    val fs = FileSystem.get(new Configuration)
    val rootTempDir = args(0)
    val rootDistDir = args(1)
    val files = args(2).toInt
    val loops = args(3).toInt
    val waitBetweenFiles = args(4).toInt
    val r = new Random
    for (f <- 1 to files) {
      val rootName = "/weblog." + System.currentTimeMillis()
      val tmpPath = new Path(rootTempDir + rootName + ".tmp")
      val writer = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath)))
      
      print(f + ": [")
      
      val randomLoops = loops + r.nextInt(loops)
      
      for (i <- 1 to randomLoops) {
        writer.write(SessionDataGenerator.getNextEvent + eol)
        if (i%100 == 0) {
          print(".")
        }
      }
      println("]")
      writer.close
      
      val distPath = new Path(rootDistDir + rootName + ".dat")
      
      fs.rename(tmpPath, distPath)
      Thread.sleep(waitBetweenFiles)
    }
    println("Done")
  }
}

开发者ID:TedBear42，项目名称:spark_training，代码行数:58，代码来源:SessionDataFileHDFSWriter.scala

示例11: main

//设置package包名称以及导入依赖的类
package com.aluxian.tweeather.scripts

import java.io.File

import org.apache.hadoop.fs.FileSystem
import org.apache.log4j.PropertyConfigurator
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.{Minutes, Seconds}
import org.apache.spark.{SparkConf, SparkContext}


trait Script {

  protected lazy val streamingTimeout = sys.props.get("tw.streaming.timeout") // in seconds
    .map(_.toLong * 1000).getOrElse(-1L)
  protected lazy val streamingInterval = sys.props.get("tw.streaming.interval") // in seconds
    .map(s => Seconds(s.toLong)).getOrElse(Minutes(5))

  protected lazy val scriptName = "Tweeather_" + getClass.getSimpleName.stripSuffix("$")
  protected lazy val sc = new SparkContext(
    new SparkConf()
      .setIfMissing("spark.app.name", scriptName)
      .setIfMissing("spark.eventLog.dir", "tw/logs")
      .setIfMissing("spark.eventLog.enabled", "true")
      .setIfMissing("spark.streaming.stopGracefullyOnShutdown", "true")
      .setIfMissing("spark.streaming.blockInterval", "30s")
  )

  protected lazy val hdfs = FileSystem.get(sc.hadoopConfiguration)
  protected lazy val sqlc = new SQLContext(sc)

  def main(args: Array[String]) {
    // Log4j properties
    Option(getClass.getResource("/com/aluxian/tweeather/res/log4j.properties")) match {
      case Some(url) => PropertyConfigurator.configure(url)
      case None => System.err.println("Unable to load log4j.properties")
    }

    // Ensure the event log directory exists
    new File("tw/logs").mkdirs()
  }

}

开发者ID:cnajeefa，项目名称:Tourism-Sentiment-Analysis，代码行数:44，代码来源:Script.scala

示例12: Import

//设置package包名称以及导入依赖的类
package spark.in.space

import java.io.File
import java.net.URI

import com.amazonaws.auth.AnonymousAWSCredentials
import com.amazonaws.event.{ProgressEvent, ProgressListener}
import com.amazonaws.services.s3.transfer.TransferManager
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.{SparkConf, SparkContext}



object Import {
  val log = org.slf4j.LoggerFactory.getLogger("Import")
  val bucket = sys.env("SPARK_S3_BUCKET_NAME")
  val n1gram = "ngrams/books/20090715/eng-us-all/1gram/data"
  val dlFile = "/app/googleNGram1Gram.lzo.sequenceLongString"
  val publicBucket = "datasets.elasticmapreduce"
  val parqFile = s"s3n://${bucket}${dlFile}.parquet"

  def main(args: Array[String]): Unit = {
    if(! new File(dlFile).exists()) {
      println("downloading")
      val tx = new TransferManager(new AnonymousAWSCredentials())
      val download = tx.download(publicBucket, n1gram, new File(dlFile))
      download.addProgressListener(new ProgressListener {
        override def progressChanged(progressEvent: ProgressEvent): Unit = {
          println(s"type: ${progressEvent.getEventType.name()} bytes: ${progressEvent.getBytes} transferred:${progressEvent.getBytesTransferred}")
        }
      })
      download.waitForCompletion()
    }
    val conf = new SparkConf().setAppName("spark-singularity")
    val sc = new SparkContext(conf)
    val sqlContext = new org.apache.spark.sql.SQLContext(sc)
    import sqlContext.implicits._
    val theRDD = sc.sequenceFile[Long,String](s"file://$dlFile", 80)
    val theParsed = theRDD.map{ case (_, row) => rowToNGram(row) }
    val df = theParsed.toDF
    FileSystem.get(new URI(s"s3n://$bucket"), sc.hadoopConfiguration).delete(new Path(dlFile), true)
    df.write.parquet(parqFile)

  }

  def rowToNGram(in: String) : NGram = {
    val split = in.split("\t")
    NGram(split(0), split(1), split(2).toInt, split(3).toInt, split(4).toInt)
  }
}



case class NGram(ngram:String, year:String, occurrences:Int, pages:Int, books:Int)

开发者ID:heroku，项目名称:spark-singularity，代码行数:55，代码来源:Import.scala

示例13: FileUtils

//设置package包名称以及导入依赖的类
package com.asto.dmp.articlecate.utils

import java.io._

import com.asto.dmp.articlecate.base.Props
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.Logging
import org.apache.spark.rdd.RDD


object FileUtils extends Logging {
  private val conf = new Configuration()
  conf.set("fs.defaultFS", Props.get("fs.defaultFS"))
  conf.set("mapreduce.jobtracker.address", Props.get("mapreduce.jobtracker.address"))
  private val fileSystem = FileSystem.get(conf) //hdfs????

  def mkdirs(folderPath: String): Unit = {
    val path = new Path(folderPath)
    if (!fileSystem.exists(path)) {
      fileSystem.mkdirs(path)
    }
  }
  
  def readContextFromHDFS(path: String) = {
    readLinesFromHDFS(path).mkString("\n")
  }

  def readLinesFromHDFS(path: String): Array[String] = {
    val pt = new Path(path)
    val br: BufferedReader = new BufferedReader(new InputStreamReader(fileSystem.open(pt)))
    val lines = scala.collection.mutable.ArrayBuffer[String]()
    var line = br.readLine()
    while (line != null) {
      lines += line
      line = br.readLine()
    }
    lines.toArray
  }

}

开发者ID:luciuschina，项目名称:ArticleCategories，代码行数:42，代码来源:FileUtils.scala

示例14: globPath

//设置package包名称以及导入依赖的类
package com.paypal.risk.madmen20.util

import java.io.{OutputStreamWriter, BufferedReader, InputStreamReader}

import org.apache.hadoop.fs.{FileUtil, FileSystem, Path}
import org.apache.spark.SparkContext


  def globPath(path: String)(implicit sc: SparkContext): Seq[String] = {
    val conf = sc.hadoopConfiguration
    val glob = new Path(path)
    val fileSystem = FileSystem.get(conf)
    val allStatus = fileSystem.globStatus(glob)
    val allPath = FileUtil.stat2Paths(allStatus)
    allPath.map(_.toString)
  }

  def deleteFolder(path: String)(implicit sc: SparkContext): Boolean = {
    val conf = sc.hadoopConfiguration
    val fileSystem = FileSystem.get(conf)
    fileSystem.delete(new Path(path), true)
  }
}

开发者ID:yanlzhang8936，项目名称:madmen20，代码行数:24，代码来源:HdfsUtil.scala

示例15: BTERGeneratorTest

//设置package包名称以及导入依赖的类
package org.dama.datasynth.common.generators.structure

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.dama.datasynth.common._
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}



@RunWith(classOf[JUnitRunner])
class BTERGeneratorTest extends FlatSpec with Matchers with BeforeAndAfterAll {

  "BTERGenerator " should "not crash and produce a graph in /tmp/bterEdges" in {

    val currentDirectory = new java.io.File(".").getCanonicalPath
    val bterGenerator = new BTERGenerator(utils.FileUtils.File("file://"+currentDirectory+"/src/main/resources/degrees/dblp"),
                             utils.FileUtils.File("file://"+currentDirectory+"/src/main/resources/ccs/dblp"));
    bterGenerator.run(1000000, new Configuration(), "hdfs:///tmp/bterEdges")
    val fileSystem = FileSystem.get(new Configuration())
    fileSystem.exists(new Path("/tmp/bterEdges")) should be (true)
  }

  override protected def afterAll(): Unit = {
    val fileSystem = FileSystem.get(new Configuration())
    fileSystem.delete(new Path("/tmp/bterEdges"), true)
  }
}

开发者ID:DAMA-UPC，项目名称:DataSynth，代码行数:30，代码来源:BTERGeneratorTest.scala

示例16: DataProcess

//设置package包名称以及导入依赖的类
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{Path, FileSystem}
import org.apache.spark.{SparkConf, SparkContext}

object DataProcess {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("dataProcess").setMaster(Path.localSparkMaster)
    val sc = new SparkContext(conf)
    val huzhouData = sc.textFile("hdfs://localhost:9000/huzhou_cell_records/HuZhou.csv").map(line => line.split(",")).collect()
    val bHuzhou = sc.broadcast(huzhouData)

    val config = new Configuration()
    config.addResource(new Path(Path.localConfPath + "/core-site.xml"))
    config.addResource(new Path(Path.localConfPath + "/hdfs-site.xml"))
    config.addResource(new Path(Path.localConfPath + "/yarn-site.xml"))
    val fs = FileSystem.get(config)
    val list = fs.listStatus(new Path("hdfs://localhost:9000/huzhou_cell_records/"))


    for (f <- list) {
      if (!f.getPath().toString().endsWith(".csv")) {
        val data = sc.textFile(f.getPath().toString())

        val dataHuZhou = data.flatMap(line => line.split("\\|", -1))
          .filter(line => line != "")
          .map(line => line.split(";", -1))
          .filter(city => city(34).trim != "" && city(35).trim != "")
          .map(city => (city(34).trim, city(35).trim))

        val dataProcess = dataHuZhou.map {
          t =>
            val huzhou = bHuzhou.value
            var i: Int = 0
            var str: String = null
            var flag: Boolean = true
            while (i < huzhou.length && flag) {
              if (t._1 == huzhou(i)(2) && t._2 == huzhou(i)(3)) {
                str = huzhou(i)(5) + "," + huzhou(i)(6)
                flag = false
              }
              i += 1
            }
            str
        }

        val fileName = f.getPath().toString().split("/")(f.getPath().toString().split("/").length - 1)
        val newPath = new Path("hdfs://localhost:9000/huzhou_cell_records_lnglat/" + fileName)
        dataProcess.filter(line => line != null).saveAsTextFile(newPath.toString())
      }
    }

    sc.stop()
  }
}

开发者ID:yuanzhaokang，项目名称:ParallelizeHeatmap，代码行数:55，代码来源:DataProcess.scala

示例17: ImageUtils

//设置package包名称以及导入依赖的类
import java.awt.image.BufferedImage
import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import javax.imageio.ImageIO

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}


object ImageUtils {

  def readImage(hdfsPath: String): BufferedImage = {
    val configuration = new Configuration()
    configuration.addResource(new Path(Path.localConfPath + "/core-site.xml"))
    configuration.addResource(new Path(Path.localConfPath + "/hadoop/hdfs-site.xml"))
    configuration.addResource(new Path(Path.localConfPath + "/hadoop/yarn-site.xml"))
    val fs = FileSystem.get(configuration)
    val is = fs.open(new Path(hdfsPath))
    val buffer = IOUtils.toByteArray(is)
    val out = new ByteArrayOutputStream()
    out.write(buffer, 0, buffer.length)
    val b = out.toByteArray
    out.close()
    val picture = ImageIO.read(new ByteArrayInputStream(b))
    is.close()
    picture
  }

  def writeImage(image: BufferedImage, hdfsPath: String): Unit = {
    val configuration = new Configuration()
    configuration.addResource(new Path(Path.localConfPath + "/core-site.xml"))
    configuration.addResource(new Path(Path.localConfPath + "/hdfs-site.xml"))
    configuration.addResource(new Path(Path.localConfPath + "/yarn-site.xml"))
    val fs = FileSystem.get(configuration)
    val os = fs.create(new Path(hdfsPath))
    val baos = new ByteArrayOutputStream()
    val ios = ImageIO.createImageOutputStream(baos)
    ImageIO.write(image, "png", ios)
    val is = new ByteArrayInputStream(baos.toByteArray)
    IOUtils.copy(is, os)

    is.close()
    ios.close()
    baos.close()
    os.close()
  }
}

开发者ID:yuanzhaokang，项目名称:ParallelizeHeatmap，代码行数:48，代码来源:ImageUtils.scala

示例18: PeapodGenerator

//设置package包名称以及导入依赖的类
package generic

import java.io.File
import java.net.URI
import java.text.SimpleDateFormat
import java.util.Date

import org.apache.hadoop.fs.{FileSystem, Path}
import peapod.Peapod
import sativum.{Hive, Sativum}

import scala.util.Random


object PeapodGenerator {
  def peapod() = {
    val sdf = new SimpleDateFormat("ddMMyy-hhmmss")
    val rawPath = System.getProperty("java.io.tmpdir") + "workflow-" + sdf.format(new Date()) + Random.nextInt()
    val path = new Path("file://",rawPath.replace("\\","/")).toString
    val fs = FileSystem.get(new URI(path), Spark.sc.hadoopConfiguration)
    fs.mkdirs(new Path(path))
    fs.deleteOnExit(new Path(path))
    val w = new Peapod(
      path=path,
      raw="")(generic.Spark.sc)
    w
  }
  def sativum() = {
    val sdf = new SimpleDateFormat("ddMMyy-hhmmss")
    val rawPath = System.getProperty("java.io.tmpdir") + "workflow-" + sdf.format(new Date()) + Random.nextInt()
    val path = new Path("file://",rawPath.replace("\\","/")).toString
    val fs = FileSystem.get(new URI(path), Spark.sc.hadoopConfiguration)
    fs.mkdirs(new Path(path))
    fs.deleteOnExit(new Path(path))
    val w = new Sativum(
      path= path,
      raw="")(generic.Spark.sc)
    w
  }
}

开发者ID:mindfulmachines，项目名称:sativum，代码行数:41，代码来源:PeapodGenerator.scala

示例19: Configurations

//设置package包名称以及导入依赖的类
package org.ieee.codemeow.geometric.spark


import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.common.base.Preconditions
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}




object Configurations {
  def fromFile(_path: String): MainConfiguration ={
    val config = new Configuration
    val path = new Path(_path)
    val fs = FileSystem.get(path.toUri, config)
    val file = fs.open(path)
    val mapper = new ObjectMapper(new YAMLFactory())
    mapper.registerModule(DefaultScalaModule)
    mapper.readValue(file, classOf[MainConfiguration])
  }
}

class MainConfiguration (
                          @JsonProperty("appName") _appName: String,
                          @JsonProperty("bbox") _bbox: (Double, Double, Double, Double), // lat1, lon1, lat2, lon2
                          @JsonProperty("layers") _layers: Array[LayerConfiguration],
                          @JsonProperty("sequenceFileDir") _sequenceFileDir: String
                        ) extends Serializable{
  val appName = Preconditions.checkNotNull(_appName, "appName cannot be null": Object)
  val bbox = _bbox
  val layers = Preconditions.checkNotNull(_layers, "layers cannot be null": Object)
  val sequenceFileDir = Preconditions.checkNotNull(_sequenceFileDir, "sequenceFileDir cannot be null": Object)
}

class LayerConfiguration (
                         @JsonProperty("layerName") _layerName: String,
                         @JsonProperty("minZoom") _minZoom: Int,
                         @JsonProperty("maxZoom") _maxZoom: Int,
                         @JsonProperty("dataProvider") _dataProvider: String,
                         @JsonProperty("kwargs") _kwargs: Map[String, Any]
                         ) extends Serializable{
  val layerName = Preconditions.checkNotNull(_layerName, "layerName cannot be null": Object)
  val minZoom = Preconditions.checkNotNull(_minZoom, "minZoom cannot be null": Object)
  val maxZoom = Preconditions.checkNotNull(_maxZoom, "maxZoom cannot be null": Object)
  val dataProvider = Preconditions.checkNotNull(_dataProvider, "dataProvider cannot be null": Object)
  val kwargs = Preconditions.checkNotNull(_kwargs, "kwargs cannot be null": Object)
}

开发者ID:codemeow5，项目名称:Vector-Tile-Spark-Process，代码行数:52，代码来源:Configurations.scala

示例20: HdfsPath

//设置package包名称以及导入依赖的类
package org.karps.ops

import scala.util.{Failure, Success, Try}

import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.SparkSession

import karps.core.{interface => I}
import karps.core.{computation => C}

case class HdfsPath(s: String)

case class HdfsStamp(s: String)

case class HdfsResourceResult(
    stampReturnPath: HdfsPath,
    stampReturnError: Option[String],
    stampReturn: Option[HdfsStamp])

object HdfsResourceResult {
  def toProto(r: HdfsResourceResult): I.HdfsResourceStatus = {
    var x = I.HdfsResourceStatus(
      path = Some(C.ResourcePath(r.stampReturnPath.s)))
    for (txt <- r.stampReturnError) {
      x = x.withError(txt)
    }
    for (s <- r.stampReturn) {
      x = x.withReturn(s.s)
    }
    x
  }
}    
    
object SourceStamps {

  def getStamps(sess: SparkSession, ps: Seq[HdfsPath]): Seq[HdfsResourceResult] = {
    val fs = FileSystem.get(sess.sparkContext.hadoopConfiguration)
    ps.map(getStamp(fs, _)).zip(ps).map {
      case (Success(s), p) => HdfsResourceResult(p, None, Option(s))
      case (Failure(e), p) =>
        HdfsResourceResult(p, Option(e.getMessage), None)
    }
  }

  private def getStamp(fs: FileSystem, p: HdfsPath): Try[HdfsStamp] = {
    Try {
      val p2 = new Path(p.s)
      val s = fs.getFileStatus(p2)
      HdfsStamp(s.getModificationTime.toString)
    }
  }
}

开发者ID:tjhunter，项目名称:karps，代码行数:53，代码来源:stamps.scala

注：本文中的org.apache.hadoop.fs.FileSystem类示例整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。