本文整理汇总了Java中parquet.schema.Type类的典型用法代码示例。如果您正苦于以下问题:Java Type类的具体用法?Java Type怎么用?Java Type使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Type类属于parquet.schema包,在下文中一共展示了Type类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: newSchema
import parquet.schema.Type; //导入依赖的package包/类
private void newSchema() throws IOException {
List<Type> types = Lists.newArrayList();
for (MaterializedField field : batchSchema) {
if (field.getPath().equals(SchemaPath.getSimplePath(WriterPrel.PARTITION_COMPARATOR_FIELD))) {
continue;
}
types.add(getType(field));
}
schema = new MessageType("root", types);
int initialBlockBufferSize = max(MINIMUM_BUFFER_SIZE, blockSize / this.schema.getColumns().size() / 5);
pageStore = ColumnChunkPageWriteStoreExposer.newColumnChunkPageWriteStore(this.oContext,
codecFactory.getCompressor(codec, pageSize),
schema,
initialBlockBufferSize);
int initialPageBufferSize = max(MINIMUM_BUFFER_SIZE, min(pageSize + pageSize / 10, initialBlockBufferSize));
store = new ColumnWriteStoreV1(pageStore, pageSize, initialPageBufferSize, dictionaryPageSize, enableDictionary, writerVersion);
MessageColumnIO columnIO = new ColumnIOFactory(false).getColumnIO(this.schema);
consumer = columnIO.getRecordWriter(store);
setUp(schema, consumer);
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:22,代码来源:ParquetRecordWriter.java
示例2: getType
import parquet.schema.Type; //导入依赖的package包/类
private parquet.schema.Type getType(MaterializedField field) {
MinorType minorType = field.getType().getMinorType();
DataMode dataMode = field.getType().getMode();
switch(minorType) {
case MAP:
List<parquet.schema.Type> types = Lists.newArrayList();
for (MaterializedField childField : field.getChildren()) {
types.add(getType(childField));
}
return new GroupType(dataMode == DataMode.REPEATED ? Repetition.REPEATED : Repetition.OPTIONAL, field.getLastName(), types);
case LIST:
throw new UnsupportedOperationException("Unsupported type " + minorType);
default:
return getPrimitiveType(field);
}
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:17,代码来源:ParquetRecordWriter.java
示例3: writeTuple
import parquet.schema.Type; //导入依赖的package包/类
private void writeTuple(Tuple tuple, GroupType type) {
for (int index = 0; index < type.getFieldCount(); index++) {
Type fieldType = type.getType(index);
String fieldName = fieldType.getName();
// empty fields have to be omitted
if (tuple.isNull(index))
continue;
recordConsumer.startField(fieldName, index);
if (fieldType.isPrimitive()) {
tuple.writePrimitiveValue(recordConsumer, index, (PrimitiveType)fieldType);
}
else {
recordConsumer.startGroup();
writeTuple(tuple.getTuple(index), fieldType.asGroupType());
recordConsumer.endGroup();
}
recordConsumer.endField(fieldName, index);
}
}
开发者ID:EXASOL,项目名称:hadoop-etl-udfs,代码行数:20,代码来源:TupleWriter.java
示例4: initScanFilter
import parquet.schema.Type; //导入依赖的package包/类
/**
* init the scan filter with the read schema
* @param scan
*/
public void initScanFilter(Scan scan){
String schema = new String(scan.getAttribute(HConstants.SCAN_TABLE_SCHEMA));
try {
if (scan != null && schema != null && !schema.isEmpty()) {
MessageType readSchema = MessageTypeParser.parseMessageType(schema);
//readSchema.getFields();
List<Type> types = readSchema.getFields();
for(Type type : types){
String columnName = type.getName();
if(columnName.startsWith("cf"))// fetch the real column name
columnName = columnName.substring(3);
filterColumns.add(columnName.getBytes());
}
}
}catch (Exception e){
//TODO: send the exception back to the client
LOG.error("parse the message schema error" + e);
}
}
开发者ID:grokcoder,项目名称:pbase,代码行数:25,代码来源:PMemStoreImpl.java
示例5: groupToCells
import parquet.schema.Type; //导入依赖的package包/类
/**
* transform data in group into cells(List<cell> - > {@link org.apache.hadoop.hbase.client.Result}</>)
* @param group
* @return
*/
public static List<Cell> groupToCells(Group group){
List<Cell> cells = new LinkedList<>();
if(group != null){
cells = new LinkedList<>();
GroupType groupType = group.getType();
List<Type> types = groupType.getFields();
byte [] rowKey = group.getBinary(HConstants.ROW_KEY, 0).getBytes();
long timestamp = group.getLong(HConstants.TIME_STAMP, 0);
for(Type t : types){
if(! t.getName().equals(HConstants.ROW_KEY) && ! t.getName().equals(HConstants.TIME_STAMP)){
String name = t.getName();
String [] names = name.split(":");
if(names.length == 2) {
byte[] value = group.getBinary(name, 0).getBytes();
Cell cell = new KeyValue(rowKey, names[0].getBytes(), names[1].getBytes(), timestamp, value);
cells.add(cell);
}
}
}
}
return cells;
}
开发者ID:grokcoder,项目名称:pbase,代码行数:31,代码来源:PFileReader.java
示例6: writeRecordFields
import parquet.schema.Type; //导入依赖的package包/类
private void writeRecordFields(GroupType schema, Schema tajoSchema,
Tuple tuple) {
List<Type> fields = schema.getFields();
// Parquet ignores Tajo NULL_TYPE columns, so the index may differ.
int index = 0;
for (int tajoIndex = 0; tajoIndex < tajoSchema.size(); ++tajoIndex) {
Column column = tajoSchema.getColumn(tajoIndex);
if (column.getDataType().getType() == TajoDataTypes.Type.NULL_TYPE) {
continue;
}
Datum datum = tuple.get(tajoIndex);
Type fieldType = fields.get(index);
if (!tuple.isNull(tajoIndex)) {
recordConsumer.startField(fieldType.getName(), index);
writeValue(fieldType, column, datum);
recordConsumer.endField(fieldType.getName(), index);
} else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
throw new RuntimeException("Null-value for required field: " +
column.getSimpleName());
}
++index;
}
}
开发者ID:gruter,项目名称:tajo-cdh,代码行数:24,代码来源:TajoWriteSupport.java
示例7: toString
import parquet.schema.Type; //导入依赖的package包/类
public String toString(String indent) {
StringBuilder result = new StringBuilder();
int i = 0;
for (Type field : this.schema.getFields()) {
String name = field.getName();
List<Object> values = this.data[i];
for (Object value : values) {
result.append(indent).append(name);
if (value == null) {
result.append(": NULL\n");
} else if (value instanceof Group) {
result.append("\n").append(((ParquetGroup) value).toString(indent + " "));
} else {
result.append(": ").append(value.toString()).append("\n");
}
}
i++;
}
return result.toString();
}
开发者ID:apache,项目名称:incubator-gobblin,代码行数:21,代码来源:ParquetGroup.java
示例8: convertField
import parquet.schema.Type; //导入依赖的package包/类
@Override
Object convertField(JsonElement value) {
ParquetGroup r1 = new ParquetGroup((GroupType) schema());
JsonObject inputRecord = value.getAsJsonObject();
for (Map.Entry<String, JsonElement> entry : inputRecord.entrySet()) {
String key = entry.getKey();
JsonElementConverter converter = this.converters.get(key);
Object convertedValue = converter.convert(entry.getValue());
boolean valueIsNull = convertedValue == null;
Type.Repetition repetition = converter.jsonSchema.optionalOrRequired();
if (valueIsNull && repetition.equals(OPTIONAL)) {
continue;
}
r1.add(key, convertedValue);
}
return r1;
}
开发者ID:apache,项目名称:incubator-gobblin,代码行数:18,代码来源:JsonElementConversionFactory.java
示例9: buildSchema
import parquet.schema.Type; //导入依赖的package包/类
private Type buildSchema() {
JsonArray inputSchema = this.jsonSchema.getDataTypeValues();
List<Type> parquetTypes = new ArrayList<>();
for (JsonElement element : inputSchema) {
JsonObject map = (JsonObject) element;
JsonSchema elementSchema = new JsonSchema(map);
String columnName = elementSchema.getColumnName();
JsonElementConverter converter = JsonElementConversionFactory.getConverter(elementSchema, false);
Type schemaType = converter.schema();
this.converters.put(columnName, converter);
parquetTypes.add(schemaType);
}
String docName = this.jsonSchema.getColumnName();
switch (recordType) {
case ROOT:
return new MessageType(docName, parquetTypes);
case CHILD:
return new GroupType(this.jsonSchema.optionalOrRequired(), docName, parquetTypes);
default:
throw new RuntimeException("Unsupported Record type");
}
}
开发者ID:apache,项目名称:incubator-gobblin,代码行数:23,代码来源:JsonElementConversionFactory.java
示例10: AvroUnionConverter
import parquet.schema.Type; //导入依赖的package包/类
public AvroUnionConverter(ParentValueContainer parent, Type parquetSchema,
Schema avroSchema) {
this.parent = parent;
GroupType parquetGroup = parquetSchema.asGroupType();
this.memberConverters = new Converter[ parquetGroup.getFieldCount()];
int parquetIndex = 0;
for (int index = 0; index < avroSchema.getTypes().size(); index++) {
Schema memberSchema = avroSchema.getTypes().get(index);
if (!memberSchema.getType().equals(Schema.Type.NULL)) {
Type memberType = parquetGroup.getType(parquetIndex);
memberConverters[parquetIndex] = newConverter(memberSchema, memberType, new ParentValueContainer() {
@Override
void add(Object value) {
Preconditions.checkArgument(memberValue==null, "Union is resolving to more than one type");
memberValue = value;
}
});
parquetIndex++; // Note for nulls the parquetIndex id not increased
}
}
}
开发者ID:Datasio,项目名称:cascalog-avro-parquet,代码行数:23,代码来源:HMAvroConverter.java
示例11: writeRecordFields
import parquet.schema.Type; //导入依赖的package包/类
private void writeRecordFields(GroupType schema, Schema avroSchema,
Map record) {
List<Type> fields = schema.getFields();
List<Schema.Field> avroFields = avroSchema.getFields();
int index = 0; // parquet ignores Avro nulls, so index may differ
for (int avroIndex = 0; avroIndex < avroFields.size(); avroIndex++) {
Schema.Field avroField = avroFields.get(avroIndex);
if (avroField.schema().getType().equals(Schema.Type.NULL)) {
continue;
}
Type fieldType = fields.get(index);
Object value = record.get(fieldKeyword.invoke(avroField));
if (value != null) {
recordConsumer.startField(fieldType.getName(), index);
writeValue(fieldType, avroField.schema(), value);
recordConsumer.endField(fieldType.getName(), index);
} else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
throw new RuntimeException("Null-value for required field: " + avroField.name());
}
index++;
}
}
开发者ID:Datasio,项目名称:cascalog-avro-parquet,代码行数:23,代码来源:HMAvroWriteSupport.java
示例12: convertField
import parquet.schema.Type; //导入依赖的package包/类
private static SchemaDescription.Field convertField( SchemaDescription schema, Type t ) {
boolean allowNull = t.getRepetition() != Repetition.REQUIRED;
switch ( t.asPrimitiveType().getPrimitiveTypeName() ) {
case BINARY:
return schema.new Field( t.getName(), t.getName(), ValueMetaInterface.TYPE_STRING, allowNull );
case BOOLEAN:
return schema.new Field( t.getName(), t.getName(), ValueMetaInterface.TYPE_BOOLEAN, allowNull );
case DOUBLE:
case FLOAT:
return schema.new Field( t.getName(), t.getName(), ValueMetaInterface.TYPE_NUMBER, allowNull );
case INT32:
case INT64:
if ( t.getOriginalType() == OriginalType.DATE || t.getOriginalType() == OriginalType.TIME_MILLIS
|| t.getOriginalType() == OriginalType.TIMESTAMP_MILLIS ) {
return schema.new Field( t.getName(), t.getName(), ValueMetaInterface.TYPE_DATE, allowNull );
} else {
return schema.new Field( t.getName(), t.getName(), ValueMetaInterface.TYPE_INTEGER, allowNull );
}
case INT96:
return schema.new Field( t.getName(), t.getName(), ValueMetaInterface.TYPE_DATE, allowNull );
default:
throw new RuntimeException( "Undefined type: " + t );
}
}
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:25,代码来源:ParquetConverter.java
示例13: init
import parquet.schema.Type; //导入依赖的package包/类
@Override
public ReadContext init( InitContext context ) {
String schemaStr = context.getConfiguration().get( ParquetConverter.PARQUET_SCHEMA_CONF_KEY );
if ( schemaStr == null ) {
throw new RuntimeException( "Schema not defined in the PentahoParquetSchema key" );
}
schema = SchemaDescription.unmarshall( schemaStr );
converter = new ParquetConverter( schema );
// get all fields from file's schema
MessageType fileSchema = context.getFileSchema();
List<Type> newFields = new ArrayList<>();
// use only required fields
for ( SchemaDescription.Field f : schema ) {
Type origField = fileSchema.getFields().get( fileSchema.getFieldIndex( f.formatFieldName ) );
newFields.add( origField );
}
if ( newFields.isEmpty() ) {
throw new RuntimeException( "Fields should be declared" );
}
MessageType newSchema = new MessageType( fileSchema.getName(), newFields );
return new ReadContext( newSchema, new HashMap<>() );
}
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:26,代码来源:PentahoParquetReadSupport.java
示例14: createConverter
import parquet.schema.Type; //导入依赖的package包/类
private Converter createConverter(Type field) {
if (field.isPrimitive()) {
OriginalType otype = field.getOriginalType();
if (otype != null) {
switch (otype) {
case MAP: break;
case LIST: break;
case UTF8: return new StringConverter(field.getName());
case MAP_KEY_VALUE: break;
case ENUM: break;
}
}
return new SimplePrimitiveConverter(field.getName());
}
return new SimpleRecordConverter(field.asGroupType(), field.getName(), this);
}
开发者ID:wesleypeck,项目名称:parquet-tools,代码行数:19,代码来源:SimpleRecordConverter.java
示例15: getParquetType
import parquet.schema.Type; //导入依赖的package包/类
private Type getParquetType(HDFSColumnHandle column, MessageType messageType)
{
if (messageType.containsField(column.getName())) {
return messageType.getType(column.getName());
}
// parquet is case-insensitive, all hdfs-columns get converted to lowercase
for (Type type : messageType.getFields()) {
if (type.getName().equalsIgnoreCase(column.getName())) {
return type;
}
}
return null;
}
开发者ID:dbiir,项目名称:paraflow,代码行数:14,代码来源:HDFSPageSourceProvider.java
示例16: getFieldNames
import parquet.schema.Type; //导入依赖的package包/类
/**
* get the field (column) names.
* @return
*/
@Override
public List<String> getFieldNames ()
{
List<String> names = new ArrayList<String>();
for (Type type : this.fields)
{
names.add(type.getName());
}
return names;
}
开发者ID:dbiir,项目名称:rainbow,代码行数:15,代码来源:ParquetMetadataStat.java
示例17: getType
import parquet.schema.Type; //导入依赖的package包/类
private static Type getType(String[] pathSegments, int depth, MessageType schema) {
Type type = schema.getType(Arrays.copyOfRange(pathSegments, 0, depth + 1));
if (depth + 1 == pathSegments.length) {
return type;
} else {
Preconditions.checkState(!type.isPrimitive());
return new GroupType(type.getRepetition(), type.getName(), getType(pathSegments, depth + 1, schema));
}
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:10,代码来源:DrillParquetReader.java
示例18: getOriginalType
import parquet.schema.Type; //导入依赖的package包/类
private OriginalType getOriginalType(Type type, String[] path, int depth) {
if (type.isPrimitive()) {
return type.getOriginalType();
}
Type t = ((GroupType) type).getType(path[depth]);
return getOriginalType(t, path, depth + 1);
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:8,代码来源:Metadata.java
示例19: isComplex
import parquet.schema.Type; //导入依赖的package包/类
private static boolean isComplex(ParquetMetadata footer) {
MessageType schema = footer.getFileMetaData().getSchema();
for (Type type : schema.getFields()) {
if (!type.isPrimitive()) {
return true;
}
}
for (ColumnDescriptor col : schema.getColumns()) {
if (col.getMaxRepetitionLevel() > 0) {
return true;
}
}
return false;
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:16,代码来源:ParquetScanBatchCreator.java
示例20: testExportParquetBoolean
import parquet.schema.Type; //导入依赖的package包/类
@Test
public void testExportParquetBoolean() throws Exception {
List<Integer> dynamicCols = new ArrayList<>();
List<Type> schemaTypes = new ArrayList<>();
schemaTypes.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.BOOLEAN, "b1", null));
schemaTypes.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.BOOLEAN, "b2", null));
schemaTypes.add(new PrimitiveType(Type.Repetition.OPTIONAL, PrimitiveType.PrimitiveTypeName.BOOLEAN, "booleannull", null));
List<List<Object>> dataSet = new ArrayList<>();
List<Object> row = new ArrayList<>();
row.add(Boolean.TRUE);
row.add(Boolean.FALSE);
row.add(null);
addRow(dataSet, row);
ExaIterator iter = new ExaIteratorDummy(dataSet);
File tempFile = new File(testFolder.getRoot(),UUID.randomUUID().toString().replaceAll("-", "") + ".parq");
HdfsSerDeExportService.exportToParquetTable(testFolder.getRoot().toString(), "hdfs", false, null, tempFile.getName(), null, "uncompressed", schemaTypes, FIRST_DATA_COLUMN, dynamicCols, iter);
ExaIterator ctx = mock(ExaIterator.class);
List<HCatTableColumn> columns = new ArrayList<>();
columns.add(new HCatTableColumn("b1", "boolean"));
columns.add(new HCatTableColumn("b2", "boolean"));
columns.add(new HCatTableColumn("booleannull", "boolean"));
List<HCatTableColumn> partitionColumns = null;
importFile(ctx, columns, partitionColumns, tempFile.getCanonicalPath(), PARQUET_INPUT_FORMAT_CLASS_NAME, PARQUET_SERDE_CLASS_NAME);
int expectedNumRows = 1;
verify(ctx, times(expectedNumRows)).emit(anyVararg());
verify(ctx, times(1)).emit(
eq(Boolean.TRUE),
eq(Boolean.FALSE),
eq(null)
);
}
开发者ID:EXASOL,项目名称:hadoop-etl-udfs,代码行数:39,代码来源:HdfsSerDeExportServiceTest.java
注:本文中的parquet.schema.Type类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论