本文整理汇总了Java中parquet.schema.MessageTypeParser类的典型用法代码示例。如果您正苦于以下问题:Java MessageTypeParser类的具体用法?Java MessageTypeParser怎么用?Java MessageTypeParser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
MessageTypeParser类属于parquet.schema包,在下文中一共展示了MessageTypeParser类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: initScanFilter
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
/**
* init the scan filter with the read schema
* @param scan
*/
public void initScanFilter(Scan scan){
String schema = new String(scan.getAttribute(HConstants.SCAN_TABLE_SCHEMA));
try {
if (scan != null && schema != null && !schema.isEmpty()) {
MessageType readSchema = MessageTypeParser.parseMessageType(schema);
//readSchema.getFields();
List<Type> types = readSchema.getFields();
for(Type type : types){
String columnName = type.getName();
if(columnName.startsWith("cf"))// fetch the real column name
columnName = columnName.substring(3);
filterColumns.add(columnName.getBytes());
}
}
}catch (Exception e){
//TODO: send the exception back to the client
LOG.error("parse the message schema error" + e);
}
}
开发者ID:grokcoder,项目名称:pbase,代码行数:25,代码来源:PMemStoreImpl.java
示例2: runMapReduceJob
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void runMapReduceJob(CompressionCodecName codec) throws IOException, ClassNotFoundException, InterruptedException {
final FileSystem fileSystem = parquetPath.getFileSystem(conf);
fileSystem.delete(parquetPath, true);
fileSystem.delete(outputPath, true);
{
jobConf.setInputFormat(TextInputFormat.class);
TextInputFormat.addInputPath(jobConf, inputPath);
jobConf.setNumReduceTasks(0);
jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);
DeprecatedParquetOutputFormat.setCompression(jobConf, codec);
DeprecatedParquetOutputFormat.setOutputPath(jobConf, parquetPath);
DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, GroupWriteSupport.class);
GroupWriteSupport.setSchema(MessageTypeParser.parseMessageType(writeSchema), jobConf);
jobConf.setMapperClass(DeprecatedMapper.class);
mapRedJob = JobClient.runJob(jobConf);
}
}
开发者ID:grokcoder,项目名称:pbase,代码行数:21,代码来源:DeprecatedOutputFormatTest.java
示例3: run
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(ExampleParquetMapReduce.class);
job.setInputFormatClass(ExampleInputFormat.class);
FileInputFormat.setInputPaths(job, inputPath);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputFormatClass(ExampleOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
ExampleOutputFormat.setSchema(
job,
MessageTypeParser.parseMessageType(writeSchema));
return job.waitForCompletion(true) ? 0 : 1;
}
开发者ID:Hanmourang,项目名称:hiped2,代码行数:42,代码来源:ExampleParquetMapReduce.java
示例4: createParquetWriter
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
/**
* create writer for parquet in PStoreFile
* @return {@link org.apache.hadoop.hbase.regionserver.PStoreFile.Writer}
*/
public PStoreFile.Writer createParquetWriter(Map<String, String> meta){
PStoreFile.Writer writer = null;
MessageType schema = null;
String schema_str = Bytes.toString(getHRegion().getTableDesc().getValue(HConstants.SCHEMA.getBytes()));
if(schema_str != null){
schema = MessageTypeParser.parseMessageType(schema_str);
}else {
LOG.error("No schema found! for " + this.getTableName());
return null;
}
Path filePath = new Path(fs.createTempName() + ".parquet");
try {
writer = new PStoreFile.WriterBuilder(conf, getFileSystem(), schema, filePath)
.addMetaData(meta)
.build();
}catch (IOException ioe){
LOG.error(ioe);
}
return writer;
}
开发者ID:grokcoder,项目名称:pbase,代码行数:32,代码来源:HStore.java
示例5: getParquetInputSplit
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException {
MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
long length = 0;
for (BlockMetaData block : this.getRowGroups()) {
List<ColumnChunkMetaData> columns = block.getColumns();
for (ColumnChunkMetaData column : columns) {
if (requested.containsPath(column.getPath().toArray())) {
length += column.getTotalSize();
}
}
}
BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1);
long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();
long[] rowGroupOffsets = new long[this.getRowGroupCount()];
for (int i = 0; i < rowGroupOffsets.length; i++) {
rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos();
}
return new ParquetInputSplit(
fileStatus.getPath(),
hdfsBlock.getOffset(),
end,
length,
hdfsBlock.getHosts(),
rowGroupOffsets
);
}
开发者ID:grokcoder,项目名称:pbase,代码行数:31,代码来源:ParquetInputFormat.java
示例6: end
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
private static long end(List<BlockMetaData> blocks, String requestedSchema) {
MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
long length = 0;
for (BlockMetaData block : blocks) {
List<ColumnChunkMetaData> columns = block.getColumns();
for (ColumnChunkMetaData column : columns) {
if (requested.containsPath(column.getPath().toArray())) {
length += column.getTotalSize();
}
}
}
return length;
}
开发者ID:grokcoder,项目名称:pbase,代码行数:15,代码来源:ParquetInputSplit.java
示例7: main
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
Path root = new Path("hdfs://10.214.208.11:9000/parquet/");//文件夹路径
Configuration configuration = new Configuration();
MessageType schema = MessageTypeParser.parseMessageType( //parquet文件模式
" message people { " +
"required binary rowkey;" +
"required binary cf:name;" +
"required binary cf:age;" +
"required int64 timestamp;"+
" }");
GroupWriteSupport.setSchema(schema, configuration);
SimpleGroupFactory sfg = new SimpleGroupFactory(schema);
Path file = new Path(root, "people002.parquet");
Map<String, String> meta = new HashMap<String, String>();
meta.put("startkey", "1");
meta.put("endkey", "2");
ParquetWriter<Group> writer = new ParquetWriter<Group>(
file,
new GroupWriteSupport(meta),
CompressionCodecName.UNCOMPRESSED,
1024,
1024,
512,
true,
false,
ParquetProperties.WriterVersion.PARQUET_1_0,
configuration);
Group group = sfg.newGroup().append("rowkey", "1")
.append("cf:name", "wangxiaoyi")
.append("cf:age", "24")
.append("timestamp", System.currentTimeMillis());
for (int i = 0; i < 10000; ++i) {
writer.write(
sfg.newGroup()
.append("name", "wangxiaoyi" + i)
.append("age", i));
}
writer.close();
}
开发者ID:grokcoder,项目名称:pbase,代码行数:56,代码来源:TestParquetWrite.java
示例8: testInitWithoutSpecifyingRequestSchema
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testInitWithoutSpecifyingRequestSchema() throws Exception {
GroupReadSupport s = new GroupReadSupport();
Configuration configuration = new Configuration();
Map<String, String> keyValueMetaData = new HashMap<String, String>();
MessageType fileSchema = MessageTypeParser.parseMessageType(fullSchemaStr);
ReadSupport.ReadContext context = s.init(configuration, keyValueMetaData, fileSchema);
assertEquals(context.getRequestedSchema(), fileSchema);
}
开发者ID:grokcoder,项目名称:pbase,代码行数:11,代码来源:GroupReadSupportTest.java
示例9: testInitWithPartialSchema
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testInitWithPartialSchema() {
GroupReadSupport s = new GroupReadSupport();
Configuration configuration = new Configuration();
Map<String, String> keyValueMetaData = new HashMap<String, String>();
MessageType fileSchema = MessageTypeParser.parseMessageType(fullSchemaStr);
MessageType partialSchema = MessageTypeParser.parseMessageType(partialSchemaStr);
configuration.set(ReadSupport.PARQUET_READ_SCHEMA, partialSchemaStr);
ReadSupport.ReadContext context = s.init(configuration, keyValueMetaData, fileSchema);
assertEquals(context.getRequestedSchema(), partialSchema);
}
开发者ID:grokcoder,项目名称:pbase,代码行数:13,代码来源:GroupReadSupportTest.java
示例10: setUp
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Before
public void setUp() {
blocks = new ArrayList<BlockMetaData>();
for (int i = 0; i < 10; i++) {
blocks.add(newBlock(i * 10, 10));
}
schema = MessageTypeParser.parseMessageType("message doc { required binary foo; }");
fileMetaData = new FileMetaData(schema, new HashMap<String, String>(), "parquet-mr");
}
开发者ID:grokcoder,项目名称:pbase,代码行数:10,代码来源:TestInputFormat.java
示例11: testWriteMode
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testWriteMode() throws Exception {
File testDir = new File("target/test/TestParquetFileWriter/");
testDir.mkdirs();
File testFile = new File(testDir, "testParquetFile");
testFile = testFile.getAbsoluteFile();
testFile.createNewFile();
MessageType schema = MessageTypeParser.parseMessageType(
"message m { required group a {required binary b;} required group "
+ "c { required int64 d; }}");
Configuration conf = new Configuration();
ParquetFileWriter writer = null;
boolean exceptionThrown = false;
Path path = new Path(testFile.toURI());
try {
writer = new ParquetFileWriter(conf, schema, path,
ParquetFileWriter.Mode.CREATE);
} catch (IOException ioe1) {
exceptionThrown = true;
}
assertTrue(exceptionThrown);
exceptionThrown = false;
try {
writer = new ParquetFileWriter(conf, schema, path,
ParquetFileWriter.Mode.OVERWRITE);
} catch (IOException ioe2) {
exceptionThrown = true;
}
assertTrue(!exceptionThrown);
testFile.delete();
}
开发者ID:grokcoder,项目名称:pbase,代码行数:33,代码来源:TestParquetFileWriter.java
示例12: testWriteReadStatisticsAllNulls
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Test
public void testWriteReadStatisticsAllNulls() throws Exception {
File testFile = new File("target/test/TestParquetFileWriter/testParquetFile").getAbsoluteFile();
testFile.delete();
writeSchema = "message example {\n" +
"required binary content;\n" +
"}";
Path path = new Path(testFile.toURI());
MessageType schema = MessageTypeParser.parseMessageType(writeSchema);
Configuration configuration = new Configuration();
GroupWriteSupport.setSchema(schema, configuration);
ParquetWriter<Group> writer = new ParquetWriter<Group>(path, configuration, new GroupWriteSupport(null));
Group r1 = new SimpleGroup(schema);
writer.write(r1);
writer.close();
ParquetMetadata readFooter = ParquetFileReader.readFooter(configuration, path);
// assert the statistics object is not empty
assertTrue((readFooter.getBlocks().get(0).getColumns().get(0).getStatistics().isEmpty()) == false);
// assert the number of nulls are correct for the first block
assertEquals(1, (readFooter.getBlocks().get(0).getColumns().get(0).getStatistics().getNumNulls()));
}
开发者ID:grokcoder,项目名称:pbase,代码行数:30,代码来源:TestParquetFileWriter.java
示例13: setUp
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
@Before
public void setUp() {
GroupWriteSupport.setSchema(MessageTypeParser.parseMessageType(writeSchema), conf);
expectPoolSize = Math.round(ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax
() * MemoryManager.DEFAULT_MEMORY_POOL_RATIO);
rowGroupSize = (int) Math.floor(expectPoolSize / 2);
conf.setInt(ParquetOutputFormat.BLOCK_SIZE, rowGroupSize);
codec = CompressionCodecName.UNCOMPRESSED;
}
开发者ID:grokcoder,项目名称:pbase,代码行数:10,代码来源:TestMemoryManager.java
示例14: runMapReduceJob
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void runMapReduceJob(CompressionCodecName codec) throws IOException, ClassNotFoundException, InterruptedException {
final FileSystem fileSystem = parquetPath.getFileSystem(conf);
fileSystem.delete(parquetPath, true);
fileSystem.delete(outputPath, true);
{
writeJob = new Job(conf, "write");
TextInputFormat.addInputPath(writeJob, inputPath);
writeJob.setInputFormatClass(TextInputFormat.class);
writeJob.setNumReduceTasks(0);
ExampleOutputFormat.setCompression(writeJob, codec);
ExampleOutputFormat.setOutputPath(writeJob, parquetPath);
writeJob.setOutputFormatClass(ExampleOutputFormat.class);
writeJob.setMapperClass(ReadMapper.class);
ExampleOutputFormat.setSchema(
writeJob,
MessageTypeParser.parseMessageType(
writeSchema));
writeJob.submit();
waitForJob(writeJob);
}
{
jobConf.set(ReadSupport.PARQUET_READ_SCHEMA, readSchema);
jobConf.set(ParquetInputFormat.READ_SUPPORT_CLASS, GroupReadSupport.class.getCanonicalName());
jobConf.setInputFormat(MyDeprecatedInputFormat.class);
MyDeprecatedInputFormat.setInputPaths(jobConf, parquetPath);
jobConf.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class);
org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(jobConf, outputPath);
jobConf.setMapperClass(DeprecatedWriteMapper.class);
jobConf.setNumReduceTasks(0);
mapRedJob = JobClient.runJob(jobConf);
}
}
开发者ID:grokcoder,项目名称:pbase,代码行数:34,代码来源:DeprecatedInputFormatTest.java
示例15: testTajoToParquetConversion
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void testTajoToParquetConversion(
Schema tajoSchema, String schemaString) throws Exception {
TajoSchemaConverter converter = new TajoSchemaConverter();
MessageType schema = converter.convert(tajoSchema);
MessageType expected = MessageTypeParser.parseMessageType(schemaString);
assertEquals("converting " + schema + " to " + schemaString,
expected.toString(), schema.toString());
}
开发者ID:gruter,项目名称:tajo-cdh,代码行数:9,代码来源:TestSchemaConverter.java
示例16: testParquetToTajoConversion
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void testParquetToTajoConversion(
Schema tajoSchema, String schemaString) throws Exception {
TajoSchemaConverter converter = new TajoSchemaConverter();
Schema schema = converter.convert(
MessageTypeParser.parseMessageType(schemaString));
assertEquals("converting " + schemaString + " to " + tajoSchema,
tajoSchema.toString(), schema.toString());
}
开发者ID:gruter,项目名称:tajo-cdh,代码行数:9,代码来源:TestSchemaConverter.java
示例17: getSchema
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
public static MessageType getSchema(Configuration configuration) {
return MessageTypeParser.parseMessageType(configuration.get(PARQUET_SCHEMA_PROPERTY_NAME));
}
开发者ID:EXASOL,项目名称:hadoop-etl-udfs,代码行数:4,代码来源:TupleWriteSupport.java
示例18: getResultFromGroup
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
public List<Cell> getResultFromGroup(Group group){
List<Cell> cells = new LinkedList<>();
SimpleGroupFactory sgf = new SimpleGroupFactory(MessageTypeParser.parseMessageType(""));
sgf.newGroup();
byte[] row = group.getBinary("rowkey", 0).getBytes();
GroupType type = group.getType();
for(Type t : type.getFields()){
byte [] value = group.getBinary(t.getName(), 0).getBytes();
Cell cell = new KeyValue(row, "cf".getBytes(), t.getName().getBytes(), value);
cells.add(cell);
}
return cells;
}
开发者ID:grokcoder,项目名称:pbase,代码行数:23,代码来源:RSRpcServices.java
示例19: getStoreScanner
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
/**
* get the store scanner of parquet type
* @return
*/
public RecordScanner getStoreScanner(Scan scan, long readPt){
RecordScanner scanner = null;
lock.readLock().lock();
try{
List<RecordScanner> scanners = new LinkedList<>();
byte[] startRow = scan.getStartRow();
//add memstore scanner
RecordScanner memScanner = this.pMemStore.getScanner(scan);
if(pMemStore.size() > 0 && pMemStore.getEndKey() != null && Bytes.compareTo(startRow, this.pMemStore.getEndKey()) <= 0){
memScanner.seek(startRow);
if(memScanner.hasNext()){
scanners.add(memScanner);
}
}
//add memstore snapshot scanner
RecordScanner snapshotScanner = pMemStore.getSnapshotScanner(scan);
snapshotScanner.seek(startRow);
if(snapshotScanner.hasNext()){
scanners.add(snapshotScanner);
}
MessageType readSchema = null;
String sSchema = new String(scan.getAttribute(HConstants.SCAN_TABLE_SCHEMA));
//TODO: verify the schema
if(sSchema != null) {
readSchema = MessageTypeParser.parseMessageType(sSchema);
}
scanners.addAll(loadPFileScanner(startRow, readSchema));
scanner = new PStoreScanner(this, scan, readPt, scanners);
}finally {
lock.readLock().unlock();
}
return scanner;
}
开发者ID:grokcoder,项目名称:pbase,代码行数:49,代码来源:HStore.java
示例20: runMapReduceJob
import parquet.schema.MessageTypeParser; //导入依赖的package包/类
private void runMapReduceJob(CompressionCodecName codec, Map<String, String> extraConf) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration(this.conf);
for (Map.Entry<String, String> entry : extraConf.entrySet()) {
conf.set(entry.getKey(), entry.getValue());
}
final FileSystem fileSystem = parquetPath.getFileSystem(conf);
fileSystem.delete(parquetPath, true);
fileSystem.delete(outputPath, true);
{
writeJob = new Job(conf, "write");
TextInputFormat.addInputPath(writeJob, inputPath);
writeJob.setInputFormatClass(TextInputFormat.class);
writeJob.setNumReduceTasks(0);
ParquetOutputFormat.setCompression(writeJob, codec);
ParquetOutputFormat.setOutputPath(writeJob, parquetPath);
writeJob.setOutputFormatClass(ParquetOutputFormat.class);
writeJob.setMapperClass(readMapperClass);
ParquetOutputFormat.setWriteSupportClass(writeJob, MyWriteSupport.class);
GroupWriteSupport.setSchema(
MessageTypeParser.parseMessageType(writeSchema),
writeJob.getConfiguration());
writeJob.submit();
waitForJob(writeJob);
}
{
conf.set(ReadSupport.PARQUET_READ_SCHEMA, readSchema);
readJob = new Job(conf, "read");
readJob.setInputFormatClass(ParquetInputFormat.class);
ParquetInputFormat.setReadSupportClass(readJob, MyReadSupport.class);
ParquetInputFormat.setInputPaths(readJob, parquetPath);
readJob.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(readJob, outputPath);
readJob.setMapperClass(writeMapperClass);
readJob.setNumReduceTasks(0);
readJob.submit();
waitForJob(readJob);
}
}
开发者ID:grokcoder,项目名称:pbase,代码行数:42,代码来源:TestInputOutputFormat.java
注:本文中的parquet.schema.MessageTypeParser类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论