本文整理汇总了Java中parquet.io.api.Binary类的典型用法代码示例。如果您正苦于以下问题:Java Binary类的具体用法?Java Binary怎么用?Java Binary使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Binary类属于parquet.io.api包,在下文中一共展示了Binary类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: readField
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
protected void readField(long recordsToReadInThisPass) {
this.bytebuf = pageReader.pageData;
if (usingDictionary) {
NullableVarBinaryVector.Mutator mutator = castedVector.getMutator();
Binary currDictValToWrite;
for (int i = 0; i < recordsReadInThisIteration; i++){
currDictValToWrite = pageReader.dictionaryValueReader.readBytes();
mutator.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer(), 0,
currDictValToWrite.length());
}
// Set the write Index. The next page that gets read might be a page that does not use dictionary encoding
// and we will go into the else condition below. The readField method of the parent class requires the
// writer index to be set correctly.
int writerIndex = castedBaseVector.getBuffer().writerIndex();
castedBaseVector.getBuffer().setIndex(0, writerIndex + (int)readLength);
} else {
super.readField(recordsToReadInThisPass);
// TODO - replace this with fixed binary type in drill
// for now we need to write the lengths of each value
int byteLength = dataTypeLengthInBits / 8;
for (int i = 0; i < recordsToReadInThisPass; i++) {
castedVector.getMutator().setValueLengthSafe(valuesReadInCurrentPass + i, byteLength);
}
}
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:27,代码来源:NullableFixedByteAlignedReaders.java
示例2: getCustomerDataSet
import parquet.io.api.Binary; //导入依赖的package包/类
private static DataSet<Tuple2<Void, CustomerTable>> getCustomerDataSet(ExecutionEnvironment env) throws
IOException {
Job job = Job.getInstance();
ParquetInputFormat.setReadSupportClass(job, ThriftReadSupport.class);
job.getConfiguration().set("parquet.thrift.column.filter", "ID;MKTSEGMENT");
HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class,
CustomerTable.class, job);
// Filter market segment "AUTOMOBILE"
BinaryColumn mktsegment = binaryColumn("MKTSEGMENT");
FilterPredicate mktsegmentPred = eq(mktsegment, Binary.fromString("AUTOMOBILE"));
ParquetInputFormat.setFilterPredicate(job.getConfiguration(), mktsegmentPred);
ParquetThriftInputFormat.addInputPath(job, new Path(customerPath));
DataSet<Tuple2<Void, CustomerTable>> data = env.createInput(hadoopInputFormat);
return data;
}
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:22,代码来源:TPCHQuery3Parquet.java
示例3: readThrift
import parquet.io.api.Binary; //导入依赖的package包/类
public static DataSet<Tuple2<Void, Person>> readThrift(ExecutionEnvironment env, String inputPath) throws
IOException {
Job job = Job.getInstance();
HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ParquetThriftInputFormat(), Void.class, Person
.class, job);
// schema projection: don't read attributes id and email
job.getConfiguration().set("parquet.thrift.column.filter", "name;id;email;phone/number");
FileInputFormat.addInputPath(job, new Path(inputPath));
// push down predicates: get all persons with name = "Felix"
BinaryColumn name = binaryColumn("name");
FilterPredicate namePred = eq(name, Binary.fromString("Felix"));
ParquetInputFormat.setFilterPredicate(job.getConfiguration(), namePred);
DataSet<Tuple2<Void, Person>> data = env.createInput(hadoopInputFormat);
return data;
}
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:22,代码来源:ParquetThriftExample.java
示例4: addBinary
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
final public void addBinary(Binary value) {
if (fixedClass == null) {
parent.add(value.getBytes());
} else {
if (fixedClassCtor == null) {
throw new IllegalArgumentException(
"fixedClass specified but fixedClassCtor is null.");
}
try {
Object fixed = fixedClassCtor.newInstance(value.getBytes());
parent.add(fixed);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
开发者ID:Datasio,项目名称:cascalog-avro-parquet,代码行数:18,代码来源:HMAvroConverter.java
示例5: dateFromInt96
import parquet.io.api.Binary; //导入依赖的package包/类
private static long dateFromInt96( Binary value ) {
byte[] readBuffer = value.getBytes();
if ( readBuffer.length != 12 ) {
throw new RuntimeException( "Invalid byte array length for INT96" );
}
long timeOfDayNanos =
( ( (long) readBuffer[7] << 56 ) + ( (long) ( readBuffer[6] & 255 ) << 48 )
+ ( (long) ( readBuffer[5] & 255 ) << 40 ) + ( (long) ( readBuffer[4] & 255 ) << 32 )
+ ( (long) ( readBuffer[3] & 255 ) << 24 ) + ( ( readBuffer[2] & 255 ) << 16 )
+ ( ( readBuffer[1] & 255 ) << 8 ) + ( ( readBuffer[0] & 255 ) << 0 ) );
int julianDay =
( (int) ( readBuffer[11] & 255 ) << 24 ) + ( ( readBuffer[10] & 255 ) << 16 )
+ ( ( readBuffer[9] & 255 ) << 8 ) + ( ( readBuffer[8] & 255 ) << 0 );
return ( julianDay - JULIAN_DAY_OF_EPOCH ) * 24L * 60L * 60L * 1000L + timeOfDayNanos / 1000000;
}
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:19,代码来源:ParquetConverter.java
示例6: addBinary
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public void addBinary(Binary value) {
byte[] data = value.getBytes();
if (data == null) {
record.add(name, null);
return;
}
if (data != null) {
try {
CharBuffer buffer = UTF8_DECODER.decode(value.toByteBuffer());
record.add(name, buffer.toString());
return;
} catch (Throwable th) {
}
}
record.add(name, value.getBytes());
}
开发者ID:wesleypeck,项目名称:parquet-tools,代码行数:20,代码来源:SimpleRecordConverter.java
示例7: addBinary
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public void addBinary(Binary value) {
holder.buffer = buf = buf.reallocIfNeeded(value.length());
buf.setBytes(0, value.toByteBuffer());
holder.start = 0;
holder.end = value.length();
writer.write(holder);
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:9,代码来源:DrillParquetGroupConverter.java
示例8: readField
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
protected void readField(long recordsToReadInThisPass) {
recordsReadInThisIteration = Math.min(pageReader.currentPageCount
- pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass);
readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits;
readLength = (int) Math.ceil(readLengthInBits / 8.0);
if (usingDictionary) {
VarBinaryVector.Mutator mutator = castedVector.getMutator();
Binary currDictValToWrite = null;
for (int i = 0; i < recordsReadInThisIteration; i++){
currDictValToWrite = pageReader.dictionaryValueReader.readBytes();
mutator.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer(), 0,
currDictValToWrite.length());
}
// Set the write Index. The next page that gets read might be a page that does not use dictionary encoding
// and we will go into the else condition below. The readField method of the parent class requires the
// writer index to be set correctly.
int writerIndex = castedVector.getBuffer().writerIndex();
castedVector.getBuffer().setIndex(0, writerIndex + (int)readLength);
} else {
super.readField(recordsToReadInThisPass);
}
// TODO - replace this with fixed binary type in drill
// now we need to write the lengths of each value
int byteLength = dataTypeLengthInBits / 8;
for (int i = 0; i < recordsToReadInThisPass; i++) {
castedVector.getMutator().setValueLengthSafe(valuesReadInCurrentPass + i, byteLength);
}
}
开发者ID:skhalifa,项目名称:QDrill,代码行数:33,代码来源:ParquetFixedWidthDictionaryReaders.java
示例9: testAllFilter
import parquet.io.api.Binary; //导入依赖的package包/类
@Test
public void testAllFilter() throws Exception {
BinaryColumn name = binaryColumn("name");
FilterPredicate pred = eq(name, Binary.fromString("no matches"));
List<Group> found = PhoneBookWriter.readFile(phonebookFile, FilterCompat.get(pred));
assertEquals(new ArrayList<Group>(), found);
}
开发者ID:grokcoder,项目名称:pbase,代码行数:10,代码来源:TestRecordLevelFilters.java
示例10: keep
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public boolean keep(Binary value) {
if (value == null) {
return false;
}
return value.toStringUsingUTF8().startsWith("p");
}
开发者ID:grokcoder,项目名称:pbase,代码行数:8,代码来源:TestRecordLevelFilters.java
示例11: readValues
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public void readValues(ValuesReader valuesReader, int valueNumber, ParquetLevelReader definitionReader)
{
for (int i = 0; i < valueNumber; i++) {
if (definitionReader.readLevel() == descriptor.getMaxDefinitionLevel()) {
Binary binary = valuesReader.readBytes();
if (binary.length() != 0) {
VARCHAR.writeSlice(blockBuilder, Slices.wrappedBuffer(binary.getBytes()));
continue;
}
}
blockBuilder.appendNull();
}
}
开发者ID:y-lan,项目名称:presto,代码行数:15,代码来源:ParquetBinaryBuilder.java
示例12: addBinary
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
public void addBinary(Binary value)
{
nulls[fieldIndex] = false;
if (types[fieldIndex] == TIMESTAMP) {
longs[fieldIndex] = ParquetTimestampUtils.getTimestampMillis(value);
}
else {
slices[fieldIndex] = wrappedBuffer(value.getBytes());
}
}
开发者ID:y-lan,项目名称:presto,代码行数:12,代码来源:ParquetHiveRecordCursor.java
示例13: getTimestampMillis
import parquet.io.api.Binary; //导入依赖的package包/类
/**
* Returns GMT timestamp from binary encoded parquet timestamp (12 bytes - julian date + time of day nanos).
*
* @param timestampBinary INT96 parquet timestamp
* @return timestamp in millis, GMT timezone
*/
public static long getTimestampMillis(Binary timestampBinary)
{
if (timestampBinary.length() != 12) {
throw new PrestoException(HIVE_BAD_DATA, "Parquet timestamp must be 12 bytes, actual " + timestampBinary.length());
}
byte[] bytes = timestampBinary.getBytes();
// little endian encoding - need to invert byte order
long timeOfDayNanos = Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
int julianDay = Ints.fromBytes(bytes[11], bytes[10], bytes[9], bytes[8]);
return julianDayToMillis(julianDay) + (timeOfDayNanos / NANOS_PER_MILLISECOND);
}
开发者ID:y-lan,项目名称:presto,代码行数:20,代码来源:ParquetTimestampUtils.java
示例14: testInvalidBinaryLength
import parquet.io.api.Binary; //导入依赖的package包/类
@Test
public void testInvalidBinaryLength()
{
try {
byte[] invalidLengthBinaryTimestamp = new byte[8];
getTimestampMillis(Binary.fromByteArray(invalidLengthBinaryTimestamp));
}
catch (PrestoException e) {
assertEquals(e.getErrorCode(), HIVE_BAD_DATA.toErrorCode());
assertEquals(e.getMessage(), "Parquet timestamp must be 12 bytes, actual 8");
}
}
开发者ID:y-lan,项目名称:presto,代码行数:13,代码来源:TestParquetTimestampUtils.java
示例15: assertTimestampCorrect
import parquet.io.api.Binary; //导入依赖的package包/类
private static void assertTimestampCorrect(String timestampString)
{
Timestamp timestamp = Timestamp.valueOf(timestampString);
Binary timestampBytes = NanoTimeUtils.getNanoTime(timestamp, false).toBinary();
long decodedTimestampMillis = getTimestampMillis(timestampBytes);
assertEquals(decodedTimestampMillis, timestamp.getTime());
}
开发者ID:y-lan,项目名称:presto,代码行数:8,代码来源:TestParquetTimestampUtils.java
示例16: readProtobuf
import parquet.io.api.Binary; //导入依赖的package包/类
public static DataSet<Tuple2<Void, Person.Builder>> readProtobuf(ExecutionEnvironment env, String inputPath)
throws IOException {
Job job = Job.getInstance();
HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new ProtoParquetInputFormat(), Void.class, Person
.Builder.class, job);
FileInputFormat.addInputPath(job, new Path(inputPath));
//native predicate push down: read only records which satisfy a given constraint
BinaryColumn name = binaryColumn("name");
FilterPredicate namePred = eq(name, Binary.fromString("Felix"));
ParquetInputFormat.setFilterPredicate(job.getConfiguration(), namePred);
//schema projection: don't read type of phone type attribute
String projection = "message Person {\n" +
" required binary name (UTF8);\n" +
" required int32 id;\n" +
" optional binary email (UTF8);\n" +
" repeated group phone {\n" +
" required binary number (UTF8);\n" +
" }\n" +
"}";
ProtoParquetInputFormat.setRequestedProjection(job, projection);
DataSet<Tuple2<Void, Person.Builder>> data = env.createInput(hadoopInputFormat);
return data;
}
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:30,代码来源:ParquetProtobufExample.java
示例17: writeValue
import parquet.io.api.Binary; //导入依赖的package包/类
private void writeValue(Type fieldType, Column column, Datum datum) {
switch (column.getDataType().getType()) {
case BOOLEAN:
recordConsumer.addBoolean((Boolean) datum.asBool());
break;
case BIT:
case INT2:
case INT4:
recordConsumer.addInteger(datum.asInt4());
break;
case INT8:
recordConsumer.addLong(datum.asInt8());
break;
case FLOAT4:
recordConsumer.addFloat(datum.asFloat4());
break;
case FLOAT8:
recordConsumer.addDouble(datum.asFloat8());
break;
case CHAR:
case TEXT:
recordConsumer.addBinary(Binary.fromString(datum.asChars()));
break;
case PROTOBUF:
case BLOB:
case INET4:
case INET6:
recordConsumer.addBinary(Binary.fromByteArray(datum.asByteArray()));
break;
default:
break;
}
}
开发者ID:gruter,项目名称:tajo-cdh,代码行数:34,代码来源:TajoWriteSupport.java
示例18: addBinary
import parquet.io.api.Binary; //导入依赖的package包/类
@Override
final public void addBinary(Binary value) {
try {
ProtobufDatumFactory factory =
ProtobufDatumFactory.get(dataType.getCode());
Message.Builder builder = factory.newBuilder();
builder.mergeFrom(value.getBytes());
parent.add(factory.createDatum(builder));
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException(e);
}
}
开发者ID:gruter,项目名称:tajo-cdh,代码行数:13,代码来源:TajoRecordConverter.java
示例19: add
import parquet.io.api.Binary; //导入依赖的package包/类
public void add(int fieldIndex, Binary value) {
switch (this.getType().getType(fieldIndex).asPrimitiveType().getPrimitiveTypeName()) {
case BINARY:
this.add(fieldIndex, new BinaryValue(value));
break;
case INT96:
this.add(fieldIndex, new Int96Value(value));
break;
default:
throw new UnsupportedOperationException(
this.getType().asPrimitiveType().getName() + " not supported for Binary");
}
}
开发者ID:apache,项目名称:incubator-gobblin,代码行数:14,代码来源:ParquetGroup.java
示例20: writeValue
import parquet.io.api.Binary; //导入依赖的package包/类
@SuppressWarnings("unchecked")
private void writeValue(Type type, Schema avroSchema, Object value) {
Schema nonNullAvroSchema = AvroSchemaConverter.getNonNull(avroSchema);
Schema.Type avroType = nonNullAvroSchema.getType();
if (avroType.equals(Schema.Type.BOOLEAN)) {
recordConsumer.addBoolean((Boolean) value);
} else if (avroType.equals(Schema.Type.INT)) {
recordConsumer.addInteger(((Number) value).intValue());
} else if (avroType.equals(Schema.Type.LONG)) {
recordConsumer.addLong(((Number) value).longValue());
} else if (avroType.equals(Schema.Type.FLOAT)) {
recordConsumer.addFloat(((Number) value).floatValue());
} else if (avroType.equals(Schema.Type.DOUBLE)) {
recordConsumer.addDouble(((Number) value).doubleValue());
} else if (avroType.equals(Schema.Type.BYTES)) {
recordConsumer.addBinary(Binary.fromByteArray((byte[]) value));
} else if (avroType.equals(Schema.Type.STRING)) {
recordConsumer.addBinary(fromAvroString(value));
} else if (avroType.equals(Schema.Type.RECORD)) {
writeRecord((GroupType) type, nonNullAvroSchema, (Map) value);
} else if (avroType.equals(Schema.Type.ENUM)) {
recordConsumer.addBinary(Binary.fromString(value.toString()));
} else if (avroType.equals(Schema.Type.ARRAY)) {
writeArray((GroupType) type, nonNullAvroSchema, (List) value);
} else if (avroType.equals(Schema.Type.MAP)) {
writeMap((GroupType) type, nonNullAvroSchema, (Map) value);
} else if (avroType.equals(Schema.Type.UNION)) {
writeUnion((GroupType) type, nonNullAvroSchema, value);
} else if (avroType.equals(Schema.Type.FIXED)) {
recordConsumer.addBinary(Binary.fromByteArray((byte[]) value));
}
}
开发者ID:Datasio,项目名称:cascalog-avro-parquet,代码行数:33,代码来源:HMAvroWriteSupport.java
注:本文中的parquet.io.api.Binary类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论