本文整理汇总了Java中parquet.hadoop.metadata.ColumnChunkMetaData类的典型用法代码示例。如果您正苦于以下问题:Java ColumnChunkMetaData类的具体用法?Java ColumnChunkMetaData怎么用?Java ColumnChunkMetaData使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ColumnChunkMetaData类属于parquet.hadoop.metadata包,在下文中一共展示了ColumnChunkMetaData类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: PageReader
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
PageReader(ColumnReader<?> parentStatus, FileSystem fs, Path path, ColumnChunkMetaData columnChunkMetaData)
throws ExecutionSetupException{
this.parentColumnReader = parentStatus;
allocatedDictionaryBuffers = new ArrayList<ByteBuf>();
codecFactory = parentColumnReader.parentReader.getCodecFactory();
long start = columnChunkMetaData.getFirstDataPageOffset();
try {
FSDataInputStream f = fs.open(path);
this.dataReader = new ColumnDataReader(f, start, columnChunkMetaData.getTotalSize());
loadDictionaryIfExists(parentStatus, columnChunkMetaData, f);
} catch (IOException e) {
throw new ExecutionSetupException("Error opening or reading metadata for parquet file at location: "
+ path.getName(), e);
}
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:19,代码来源:PageReader.java
示例2: ColumnReader
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
protected ColumnReader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException {
this.parentReader = parentReader;
this.columnDescriptor = descriptor;
this.columnChunkMetaData = columnChunkMetaData;
this.isFixedLength = fixedLength;
this.schemaElement = schemaElement;
this.valueVec = v;
this.pageReader = new PageReader(this, parentReader.getFileSystem(), parentReader.getHadoopPath(), columnChunkMetaData);
if (columnDescriptor.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
if (columnDescriptor.getType() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
dataTypeLengthInBits = columnDescriptor.getTypeLength() * 8;
} else {
dataTypeLengthInBits = ParquetRecordReader.getTypeLengthInBits(columnDescriptor.getType());
}
}
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:20,代码来源:ColumnReader.java
示例3: add
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
private static void add(ParquetMetadata footer) {
for (BlockMetaData blockMetaData : footer.getBlocks()) {
++blockCount;
MessageType schema = footer.getFileMetaData().getSchema();
recordCount += blockMetaData.getRowCount();
List<ColumnChunkMetaData> columns = blockMetaData.getColumns();
for (ColumnChunkMetaData columnMetaData : columns) {
ColumnDescriptor desc = schema.getColumnDescription(columnMetaData.getPath().toArray());
add(
desc,
columnMetaData.getValueCount(),
columnMetaData.getTotalSize(),
columnMetaData.getTotalUncompressedSize(),
columnMetaData.getEncodings(),
columnMetaData.getStatistics());
}
}
}
开发者ID:grokcoder,项目名称:pbase,代码行数:19,代码来源:PrintFooter.java
示例4: endColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
/**
* end a column (once all rep, def and data have been written)
*
* @throws IOException
*/
public void endColumn() throws IOException {
state = state.endColumn();
if (DEBUG) LOG.debug(out.getPos() + ": end column");
currentBlock.addColumn(ColumnChunkMetaData.get(
currentChunkPath,
currentChunkType,
currentChunkCodec,
currentEncodings,
currentStatistics,
currentChunkFirstDataPage,
currentChunkDictionaryPageOffset,
currentChunkValueCount,
compressedLength,
uncompressedLength));
if (DEBUG) LOG.info("ended Column chumk: " + currentColumn);
currentColumn = null;
this.currentBlock.setTotalByteSize(currentBlock.getTotalByteSize() + uncompressedLength);
this.uncompressedLength = 0;
this.compressedLength = 0;
}
开发者ID:grokcoder,项目名称:pbase,代码行数:26,代码来源:ParquetFileWriter.java
示例5: visit
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
@Override
public <T extends Comparable<T>> Boolean visit(Lt<T> lt) {
Column<T> filterColumn = lt.getColumn();
T value = lt.getValue();
ColumnChunkMetaData columnChunk = getColumnChunk(filterColumn.getColumnPath());
Statistics<T> stats = columnChunk.getStatistics();
if (stats.isEmpty()) {
// we have no statistics available, we cannot drop any chunks
return false;
}
if (isAllNulls(columnChunk)) {
// we are looking for records where v < someValue
// this chunk is all nulls, so we can drop it
return true;
}
// drop if value <= min
return value.compareTo(stats.genericGetMin()) <= 0;
}
开发者ID:grokcoder,项目名称:pbase,代码行数:22,代码来源:StatisticsFilter.java
示例6: testClearExceptionForNots
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
@Test
public void testClearExceptionForNots() {
List<ColumnChunkMetaData> columnMetas = Arrays.asList(
getDoubleColumnMeta(new DoubleStatistics(), 0L),
getIntColumnMeta(new IntStatistics(), 0L));
FilterPredicate pred = and(not(eq(doubleColumn, 12.0)), eq(intColumn, 17));
try {
canDrop(pred, columnMetas);
fail("This should throw");
} catch (IllegalArgumentException e) {
assertEquals("This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter?"
+ " not(eq(double.column, 12.0))", e.getMessage());
}
}
开发者ID:grokcoder,项目名称:pbase,代码行数:17,代码来源:TestStatisticsFilter.java
示例7: ParquetFileReader
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
public ParquetFileReader(
Configuration configuration,
Path file,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
throws IOException
{
this.file = file;
this.inputStream = file.getFileSystem(configuration).open(file);
this.blocks = blocks;
if (!blocks.isEmpty()) {
for (ColumnDescriptor columnDescriptor : columns) {
for (ColumnChunkMetaData metadata : blocks.get(0).getColumns()) {
if (metadata.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) {
columnMetadata.put(columnDescriptor, metadata);
}
}
}
}
this.codecFactory = new ParquetCodecFactory(configuration);
}
开发者ID:y-lan,项目名称:presto,代码行数:22,代码来源:ParquetFileReader.java
示例8: showDetails
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
public static void showDetails(PrettyPrintWriter out, List<ColumnChunkMetaData> ccmeta) {
Map<String,Object> chunks = new LinkedHashMap<String,Object>();
for (ColumnChunkMetaData cmeta : ccmeta) {
String[] path = cmeta.getPath().toArray();
Map<String,Object> current = chunks;
for (int i = 0; i < path.length - 1; ++i) {
String next = path[i];
if (!current.containsKey(next)) {
current.put(next, new LinkedHashMap<String,Object>());
}
current = (Map<String,Object>)current.get(next);
}
current.put(path[path.length - 1], cmeta);
}
showColumnChunkDetails(out, chunks, 0);
}
开发者ID:wesleypeck,项目名称:parquet-tools,代码行数:21,代码来源:MetadataUtils.java
示例9: VarCharColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
VarCharColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, VarCharVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
varCharVector = v;
mutator = v.getMutator();
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:8,代码来源:VarLengthColumnReaders.java
示例10: NullableVarCharColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
NullableVarCharColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, NullableVarCharVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
vector = v;
this.mutator = vector.getMutator();
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:8,代码来源:VarLengthColumnReaders.java
示例11: VarBinaryColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
VarBinaryColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, VarBinaryVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
varBinaryVector = v;
mutator = v.getMutator();
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:8,代码来源:VarLengthColumnReaders.java
示例12: NullableVarBinaryColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
NullableVarBinaryColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, NullableVarBinaryVector v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
nullableVarBinaryVector = v;
mutator = v.getMutator();
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:8,代码来源:VarLengthColumnReaders.java
示例13: VarLengthColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
VarLengthColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
usingDictionary = true;
}
else {
usingDictionary = false;
}
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:12,代码来源:VarLengthColumn.java
示例14: loadDictionaryIfExists
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
private void loadDictionaryIfExists(final ColumnReader<?> parentStatus,
final ColumnChunkMetaData columnChunkMetaData, final FSDataInputStream f) throws IOException {
if (columnChunkMetaData.getDictionaryPageOffset() > 0) {
f.seek(columnChunkMetaData.getDictionaryPageOffset());
final PageHeader pageHeader = Util.readPageHeader(f);
assert pageHeader.type == PageType.DICTIONARY_PAGE;
final DrillBuf dictionaryData = allocateDictionaryBuffer(pageHeader.getUncompressed_page_size());
if (parentColumnReader.columnChunkMetaData.getCodec() == CompressionCodecName.UNCOMPRESSED) {
dataReader.loadPage(dictionaryData, pageHeader.compressed_page_size);
} else {
final DrillBuf compressedData = allocateTemporaryBuffer(pageHeader.compressed_page_size);
try {
dataReader.loadPage(compressedData, pageHeader.compressed_page_size);
DirectBytesDecompressor decompressor = codecFactory.getDecompressor(parentColumnReader.columnChunkMetaData
.getCodec());
decompressor.decompress(
compressedData,
pageHeader.compressed_page_size,
dictionaryData,
pageHeader.getUncompressed_page_size());
} finally {
compressedData.release();
}
}
DictionaryPage page = new DictionaryPage(
asBytesInput(dictionaryData, 0, pageHeader.uncompressed_page_size),
pageHeader.uncompressed_page_size,
pageHeader.dictionary_page_header.num_values,
parquet.column.Encoding.valueOf(pageHeader.dictionary_page_header.encoding.name())
);
this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
}
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:38,代码来源:PageReader.java
示例15: FixedWidthRepeatedReader
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
FixedWidthRepeatedReader(ParquetRecordReader parentReader, ColumnReader dataReader, int dataTypeLengthInBytes, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, RepeatedValueVector valueVector, SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, valueVector, schemaElement);
this.castedRepeatedVector = valueVector;
this.dataTypeLengthInBytes = dataTypeLengthInBytes;
this.dataReader = dataReader;
this.dataReader.pageReader.clear();
this.dataReader.pageReader = this.pageReader;
// this is not in the reset method because it needs to be initialized only for the very first page read
// in all other cases if a read ends at a page boundary we will need to keep track of this flag and not
// clear it at the start of the next read loop
notFishedReadingList = false;
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:13,代码来源:FixedWidthRepeatedReader.java
示例16: VarLengthValuesColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
VarLengthValuesColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v,
SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
variableWidthVector = (VariableWidthVector) valueVec;
if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
usingDictionary = true;
}
else {
usingDictionary = false;
}
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:13,代码来源:VarLengthValuesColumn.java
示例17: NullableColumnReader
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
NullableColumnReader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData,
boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException {
super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
castedBaseVector = (BaseDataValueVector) v;
castedVectorMutator = (NullableVectorDefinitionSetter) v.getMutator();
totalDefinitionLevelsRead = 0;
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:8,代码来源:NullableColumnReader.java
示例18: ColumnChunkIncPageReader
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
public ColumnChunkIncPageReader(ColumnChunkMetaData metaData, ColumnDescriptor columnDescriptor, FSDataInputStream in) {
this.metaData = metaData;
this.columnDescriptor = columnDescriptor;
this.size = metaData.getTotalSize();
this.fileOffset = metaData.getStartingPos();
this.in = in;
this.decompressor = codecFactory.getDecompressor(metaData.getCodec());
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:9,代码来源:ColumnChunkIncReadStore.java
示例19: addColumn
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
public void addColumn(ColumnDescriptor descriptor, ColumnChunkMetaData metaData) throws IOException {
FSDataInputStream in = fs.open(path);
streams.add(in);
in.seek(metaData.getStartingPos());
ColumnChunkIncPageReader reader = new ColumnChunkIncPageReader(metaData, descriptor, in);
columns.put(descriptor, reader);
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:9,代码来源:ColumnChunkIncReadStore.java
示例20: readNextRowGroup
import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
/**
* Reads all the columns requested from the row group at the current file position.
*
* @return the PageReadStore which can provide PageReaders for each column.
* @throws IOException if an error occurs while reading
*/
public PageReadStore readNextRowGroup() throws IOException {
if (currentBlock == blocks.size()) {
return null;
}
BlockMetaData block = blocks.get(currentBlock);
if (block.getRowCount() == 0) {
throw new RuntimeException("Illegal row group of 0 rows");
}
ColumnChunkPageReadStore columnChunkPageReadStore = new ColumnChunkPageReadStore(block.getRowCount());
// prepare the list of consecutive chunks to read them in one scan
List<ConsecutiveChunkList> allChunks = new ArrayList<ConsecutiveChunkList>();
ConsecutiveChunkList currentChunks = null;
for (ColumnChunkMetaData mc : block.getColumns()) {
ColumnPath pathKey = mc.getPath();
BenchmarkCounter.incrementTotalBytes(mc.getTotalSize());
ColumnDescriptor columnDescriptor = paths.get(pathKey);
if (columnDescriptor != null) {
long startingPos = mc.getStartingPos();
// first chunk or not consecutive => new list
if (currentChunks == null || currentChunks.endPos() != startingPos) {
currentChunks = new ConsecutiveChunkList(startingPos);
allChunks.add(currentChunks);
}
currentChunks.addChunk(new ChunkDescriptor(columnDescriptor, mc, startingPos, (int) mc.getTotalSize()));
}
}
// actually read all the chunks
for (ConsecutiveChunkList consecutiveChunks : allChunks) {
final List<Chunk> chunks = consecutiveChunks.readAll(f);
for (Chunk chunk : chunks) {
columnChunkPageReadStore.addColumn(chunk.descriptor.col, chunk.readAllPages());
}
}
++currentBlock;
return columnChunkPageReadStore;
}
开发者ID:grokcoder,项目名称:pbase,代码行数:43,代码来源:ParquetFileReader.java
注:本文中的parquet.hadoop.metadata.ColumnChunkMetaData类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论