本文整理汇总了Java中parquet.hadoop.ParquetWriter类的典型用法代码示例。如果您正苦于以下问题:Java ParquetWriter类的具体用法?Java ParquetWriter怎么用?Java ParquetWriter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ParquetWriter类属于parquet.hadoop包,在下文中一共展示了ParquetWriter类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: build
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
public PFileWriter build(){
try {
this.parquetWriter = new ParquetWriter<Group>(
file,
gws,
CompressionCodecName.SNAPPY,
1024,
1024,
512,
true,
false,
ParquetProperties.WriterVersion.PARQUET_1_0,
conf);
}catch (IOException ioe){
LOG.error(ioe.toString());
}
return this;
}
开发者ID:grokcoder,项目名称:pbase,代码行数:19,代码来源:PFileWriter.java
示例2: initWriter
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
public static ParquetWriter<Group> initWriter(String fileName, Map<String, String> metas)
throws IOException{
GroupWriteSupport.setSchema(schema, conf);
ParquetWriter<Group> writer = new ParquetWriter<Group>(
initFile(fileName),
new GroupWriteSupport(metas),
CompressionCodecName.SNAPPY,
1024,
1024,
512,
true,
false,
ParquetProperties.WriterVersion.PARQUET_1_0,
conf);
return writer;
}
开发者ID:grokcoder,项目名称:pbase,代码行数:22,代码来源:GenerateParquetFile.java
示例3: run
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
/**
* Write the file.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.IOFileOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.IOFileOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.IOFileOpts.OUTPUT));
AvroParquetWriter<Stock> writer =
new AvroParquetWriter<Stock>(outputPath, Stock.SCHEMA$,
CompressionCodecName.SNAPPY,
ParquetWriter.DEFAULT_BLOCK_SIZE,
ParquetWriter.DEFAULT_PAGE_SIZE,
true);
for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {
writer.write(stock);
}
writer.close();
return 0;
}
开发者ID:Hanmourang,项目名称:hiped2,代码行数:35,代码来源:ParquetAvroStockWriter.java
示例4: getWriter
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
/**
* Build a {@link ParquetWriter<Group>} for given file path with a block size.
* @param blockSize
* @param stagingFile
* @return
* @throws IOException
*/
public ParquetWriter<Group> getWriter(int blockSize, Path stagingFile)
throws IOException {
State state = this.destination.getProperties();
int pageSize = state.getPropAsInt(getProperty(WRITER_PARQUET_PAGE_SIZE), DEFAULT_PAGE_SIZE);
int dictPageSize = state.getPropAsInt(getProperty(WRITER_PARQUET_DICTIONARY_PAGE_SIZE), DEFAULT_BLOCK_SIZE);
boolean enableDictionary =
state.getPropAsBoolean(getProperty(WRITER_PARQUET_DICTIONARY), DEFAULT_IS_DICTIONARY_ENABLED);
boolean validate = state.getPropAsBoolean(getProperty(WRITER_PARQUET_VALIDATE), DEFAULT_IS_VALIDATING_ENABLED);
String rootURI = state.getProp(WRITER_FILE_SYSTEM_URI, LOCAL_FS_URI);
Path absoluteStagingFile = new Path(rootURI, stagingFile);
CompressionCodecName codec = getCodecFromConfig();
GroupWriteSupport support = new GroupWriteSupport();
Configuration conf = new Configuration();
GroupWriteSupport.setSchema(this.schema, conf);
ParquetProperties.WriterVersion writerVersion = getWriterVersion();
return new ParquetWriter<>(absoluteStagingFile, support, codec, blockSize, pageSize, dictPageSize, enableDictionary,
validate, writerVersion, conf);
}
开发者ID:apache,项目名称:incubator-gobblin,代码行数:26,代码来源:ParquetDataWriterBuilder.java
示例5: main
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
public static void main(String []args) throws IOException{
int fileNum = 10; //num of files constructed
int fileRecordNum = 50; //record num of each file
int rowKey = 0;
for(int i = 0; i < fileNum; ++ i ) {
Map<String, String> metas = new HashMap<>();
metas.put(HConstants.START_KEY, genRowKey("%10d", rowKey + 1));
metas.put(HConstants.END_KEY, genRowKey("%10d", rowKey + fileRecordNum));
ParquetWriter<Group> writer = initWriter("pfile/scanner_test_file" + i, metas);
for (int j = 0; j < fileRecordNum; ++j) {
rowKey ++;
Group group = sfg.newGroup().append("rowkey", genRowKey("%10d", rowKey))
.append("cf:name", "wangxiaoyi" + rowKey)
.append("cf:age", String.format("%10d", rowKey))
.append("cf:job", "student")
.append("timestamp", System.currentTimeMillis());
writer.write(group);
}
writer.close();
}
}
开发者ID:grokcoder,项目名称:pbase,代码行数:28,代码来源:GenerateParquetFile.java
示例6: main
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
Path root = new Path("hdfs://10.214.208.11:9000/parquet/");//文件夹路径
Configuration configuration = new Configuration();
MessageType schema = MessageTypeParser.parseMessageType( //parquet文件模式
" message people { " +
"required binary rowkey;" +
"required binary cf:name;" +
"required binary cf:age;" +
"required int64 timestamp;"+
" }");
GroupWriteSupport.setSchema(schema, configuration);
SimpleGroupFactory sfg = new SimpleGroupFactory(schema);
Path file = new Path(root, "people002.parquet");
Map<String, String> meta = new HashMap<String, String>();
meta.put("startkey", "1");
meta.put("endkey", "2");
ParquetWriter<Group> writer = new ParquetWriter<Group>(
file,
new GroupWriteSupport(meta),
CompressionCodecName.UNCOMPRESSED,
1024,
1024,
512,
true,
false,
ParquetProperties.WriterVersion.PARQUET_1_0,
configuration);
Group group = sfg.newGroup().append("rowkey", "1")
.append("cf:name", "wangxiaoyi")
.append("cf:age", "24")
.append("timestamp", System.currentTimeMillis());
for (int i = 0; i < 10000; ++i) {
writer.write(
sfg.newGroup()
.append("name", "wangxiaoyi" + i)
.append("age", i));
}
writer.close();
}
开发者ID:grokcoder,项目名称:pbase,代码行数:56,代码来源:TestParquetWrite.java
示例7: writeToFile
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
public static void writeToFile(File f, List<User> users) throws IOException {
Configuration conf = new Configuration();
GroupWriteSupport.setSchema(schema, conf);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getAbsolutePath()), conf, new GroupWriteSupport(null));
for (User u : users) {
writer.write(groupFromUser(u));
}
writer.close();
}
开发者ID:grokcoder,项目名称:pbase,代码行数:11,代码来源:PhoneBookWriter.java
示例8: getDefaultBlockSize
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
public String getDefaultBlockSize() {
int defaultBlockSize = ParquetWriter.DEFAULT_BLOCK_SIZE;
boolean bigDataPluginInstalled = false;
try {
bigDataPluginInstalled = isBigDataPluginInstalled();
} catch ( KettleException ex )
{
logError( ex.getMessage() );
}
if( bigDataPluginInstalled )
{
/* try {
int hdfsBlockSize = BigDataHelper.getHdfsConfiguredBlockSize();
if( hdfsBlockSize > 0 )
{
defaultBlockSize = hdfsBlockSize;
}
} catch ( KettleException ex )
{
logDebug( "Error fetching Hadoop configuration " + ex.getMessage() );
}
*/
}
int mbBlockSize = defaultBlockSize / 1024 / 1024;
return Integer.toString( mbBlockSize );
}
开发者ID:inquidia,项目名称:ParquetPlugin,代码行数:28,代码来源:ParquetOutputMeta.java
示例9: closeFile
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
private boolean closeFile() {
boolean retval = false;
if( data.parquetWriters != null ) {
Iterator<ParquetWriter> openFiles = data.parquetWriters.iterator();
while ( openFiles.hasNext() ) {
ParquetWriter writer = openFiles.next();
if ( writer != null ) {
try {
writer.close();
} catch ( Exception e ) {
logBasic( "Error trying to close file. This may not be a problem.");
logDetailed( "Stack trace from error trying to close file:", e );
}
writer = null;
}
}
if ( log.isDebug() ) {
logDebug( "Closed all open parquet writers." );
}
}
return retval;
}
开发者ID:inquidia,项目名称:ParquetPlugin,代码行数:29,代码来源:ParquetOutput.java
示例10: open
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
@Override
public void open() {
Preconditions.checkState(state.equals(ReaderWriterState.NEW),
"Unable to open a writer from state:%s", state);
logger.debug(
"Opening data file with pathTmp:{} (final path will be path:{})",
pathTmp, path);
try {
CompressionCodecName codecName = CompressionCodecName.UNCOMPRESSED;
if (enableCompression) {
if (SnappyCodec.isNativeCodeLoaded()) {
codecName = CompressionCodecName.SNAPPY;
} else {
logger.warn("Compression enabled, but Snappy native code not loaded. " +
"Parquet file will not be compressed.");
}
}
avroParquetWriter = new AvroParquetWriter<E>(fileSystem.makeQualified(pathTmp),
schema, codecName, DEFAULT_BLOCK_SIZE,
ParquetWriter.DEFAULT_PAGE_SIZE);
} catch (IOException e) {
throw new DatasetWriterException("Unable to create writer to path:" + pathTmp, e);
}
state = ReaderWriterState.OPEN;
}
开发者ID:cloudera,项目名称:cdk,代码行数:29,代码来源:ParquetFileSystemDatasetWriter.java
示例11: main
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
public static void main(String[] args) {
String inputFile = null;
String outputFile = null;
HelpFormatter formatter = new HelpFormatter();
// create Options object
Options options = new Options();
// add t option
options.addOption("i", true, "input avro file");
options.addOption("o", true, "output Parquet file");
CommandLineParser parser = new DefaultParser();
CommandLine cmd;
try {
cmd = parser.parse(options, args);
inputFile = cmd.getOptionValue("i");
if (inputFile == null) {
formatter.printHelp("AvroToParquet", options);
return;
}
outputFile = cmd.getOptionValue("o");
} catch (ParseException exc) {
System.err.println("Problem with command line parameters: " + exc.getMessage());
return;
}
File avroFile = new File(inputFile);
if (!avroFile.exists()) {
System.err.println("Could not open file: " + inputFile);
return;
}
try {
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader;
dataFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader);
Schema avroSchema = dataFileReader.getSchema();
// choose compression scheme
CompressionCodecName compressionCodecName = CompressionCodecName.SNAPPY;
// set Parquet file block size and page size values
int blockSize = 256 * 1024 * 1024;
int pageSize = 64 * 1024;
String base = FilenameUtils.removeExtension(avroFile.getAbsolutePath()) + ".parquet";
if(outputFile != null) {
File file = new File(outputFile);
base = file.getAbsolutePath();
}
Path outputPath = new Path("file:///"+base);
// the ParquetWriter object that will consume Avro GenericRecords
ParquetWriter<GenericRecord> parquetWriter;
parquetWriter = new AvroParquetWriter<GenericRecord>(outputPath, avroSchema, compressionCodecName, blockSize, pageSize);
for (GenericRecord record : dataFileReader) {
parquetWriter.write(record);
}
dataFileReader.close();
parquetWriter.close();
} catch (IOException e) {
System.err.println("Caught exception: " + e.getMessage());
}
}
开发者ID:CohesionForce,项目名称:avroToParquet,代码行数:68,代码来源:AvroToParquet.java
示例12: getDefaultPageSize
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
public String getDefaultPageSize()
{
int defaultPageSize = ParquetWriter.DEFAULT_PAGE_SIZE;
int mbPageSize = defaultPageSize / 1024;
return Integer.toString( mbPageSize );
}
开发者ID:inquidia,项目名称:ParquetPlugin,代码行数:7,代码来源:ParquetOutputMeta.java
示例13: getDefaultIsDictionaryEnabled
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
public boolean getDefaultIsDictionaryEnabled()
{
return ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED;
}
开发者ID:inquidia,项目名称:ParquetPlugin,代码行数:5,代码来源:ParquetOutputMeta.java
示例14: ParquetOutputData
import parquet.hadoop.ParquetWriter; //导入依赖的package包/类
public ParquetOutputData() {
super();
daf = new SimpleDateFormat();
dafs = new DateFormatSymbols();
defaultDateFormat = new SimpleDateFormat();
defaultDateFormatSymbols = new DateFormatSymbols();
openFiles = new ArrayList<String>();
parquetWriters = new ArrayList<ParquetWriter>();
}
开发者ID:inquidia,项目名称:ParquetPlugin,代码行数:14,代码来源:ParquetOutputData.java
注:本文中的parquet.hadoop.ParquetWriter类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论