本文整理汇总了Java中org.apache.orc.OrcFile类的典型用法代码示例。如果您正苦于以下问题:Java OrcFile类的具体用法?Java OrcFile怎么用?Java OrcFile使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
OrcFile类属于org.apache.orc包,在下文中一共展示了OrcFile类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: open
import org.apache.orc.OrcFile; //导入依赖的package包/类
@Override
public OrcWriter<T> open(Path path) {
if (writerOptions == null) {
writerOptions = OrcFile.writerOptions(configuration);
}
if (compressionKind != null) {
writerOptions.compress(compressionKind);
}
if (bufferSize != 0) {
writerOptions.bufferSize(bufferSize);
}
// Add the schema to the writer options.
TypeDescription schema = getTypeDescription();
writerOptions.setSchema(schema);
try {
writer = OrcFile.createWriter(path, writerOptions);
} catch (IOException e) {
throw new RuntimeException(e);
}
vectorizedRowBatch = schema.createRowBatch(batchSize);
specialCaseSetup();
return this;
}
开发者ID:eclecticlogic,项目名称:eclectic-orc,代码行数:24,代码来源:AbstractOrcWriter.java
示例2: writePostScript
import org.apache.orc.OrcFile; //导入依赖的package包/类
private int writePostScript(int footerLength, int metadataLength) throws IOException {
OrcProto.PostScript.Builder builder = OrcProto.PostScript.newBuilder()
.setCompression(writeCompressionKind(compress)).setFooterLength(footerLength)
.setMetadataLength(metadataLength).setMagic(OrcFile.MAGIC).addVersion(version.getMajor())
.addVersion(version.getMinor()).setWriterVersion(OrcFile.CURRENT_WRITER.getId());
if (compress != CompressionKind.NONE) {
builder.setCompressionBlockSize(bufferSize);
}
OrcProto.PostScript ps = builder.build();
// need to write this uncompressed
long startPosn = rawWriter.getPos();
ps.writeTo(rawWriter);
long length = rawWriter.getPos() - startPosn;
if (length > 255) {
throw new IllegalArgumentException("PostScript too large at " + length);
}
return (int) length;
}
开发者ID:ampool,项目名称:monarch,代码行数:19,代码来源:AWriterImpl.java
示例3: JsonORCFileWriter
import org.apache.orc.OrcFile; //导入依赖的package包/类
public JsonORCFileWriter(LogFilePath logFilePath, CompressionCodec codec)
throws IOException {
Configuration conf = new Configuration();
Path path = new Path(logFilePath.getLogFilePath());
schema = schemaProvider.getSchema(logFilePath.getTopic(),
logFilePath);
List<TypeDescription> fieldTypes = schema.getChildren();
converters = new JsonConverter[fieldTypes.size()];
for (int c = 0; c < converters.length; ++c) {
converters[c] = VectorColumnFiller.createConverter(fieldTypes
.get(c));
}
writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf)
.compress(resolveCompression(codec)).setSchema(schema));
batch = schema.createRowBatch();
}
开发者ID:pinterest,项目名称:secor,代码行数:18,代码来源:JsonORCFileReaderWriterFactory.java
示例4: PentahoOrcRecordWriter
import org.apache.orc.OrcFile; //导入依赖的package包/类
public PentahoOrcRecordWriter( SchemaDescription schemaDescription, TypeDescription schema, String filePath,
Configuration conf ) {
this.schemaDescription = schemaDescription;
this.schema = schema;
final AtomicInteger fieldNumber = new AtomicInteger(); //Mutable field count
schemaDescription.forEach( field -> setOutputMeta( fieldNumber, field ) );
outputRowMetaAndData = new RowMetaAndData( outputRowMeta, new Object[ fieldNumber.get() ] );
try {
writer = OrcFile.createWriter( new Path( filePath ),
OrcFile.writerOptions( conf )
.setSchema( schema ) );
batch = schema.createRowBatch();
} catch ( IOException e ) {
logger.error( e );
}
//Write the addition metadata for the fields
new OrcMetaDataWriter( writer ).write( schemaDescription );
}
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:22,代码来源:PentahoOrcRecordWriter.java
示例5: test
import org.apache.orc.OrcFile; //导入依赖的package包/类
@Test
public void test () throws IOException, Descriptors.DescriptorValidationException
{
Configuration conf = new Configuration();
System.setProperty("hadoop.home.dir", "/");
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://presto00:9000"), conf);
Path hdfsDirPath = new Path("/rainbow2/orc_new_compress");
System.out.println(fileSystem.isFile(hdfsDirPath));
FileStatus[] fileStatuses = fileSystem.listStatus(hdfsDirPath);
System.out.println(fileStatuses.length);
for (FileStatus status : fileStatuses)
{
status.getPath();
System.out.println(status.getPath() + ", " + status.getLen());
}
Reader reader = OrcFile.createReader(fileStatuses[0].getPath(),
OrcFile.readerOptions(conf));
System.out.println("file length:" + reader.getFileTail().getFileLength());
List<String> columnNames = new ArrayList<>();
columnNames.add("samplepercent");
System.out.println(reader.getRawDataSizeOfColumns(columnNames));
System.out.println(reader.getFileTail().getFooter().getTypes(0).getFieldNames(0));
System.out.println(reader.getTypes().get(0).getSerializedSize());
List<Reader> readers = new ArrayList<>();
for (FileStatus fileStatus : fileStatuses)
{
Reader reader1 = OrcFile.createReader(fileStatus.getPath(),
OrcFile.readerOptions(conf));
readers.add(reader1);
System.out.println("content size: " + reader1.getContentLength() + ", raw size: "
+ reader1.getRawDataSize());
}
for (String columnName : reader.getSchema().getFieldNames())
{
System.out.println(columnName);
}
}
开发者ID:dbiir,项目名称:rainbow,代码行数:41,代码来源:TestOrcMetadata.java
示例6: createStream
import org.apache.orc.OrcFile; //导入依赖的package包/类
/**
* Create a stream to store part of a column.
*
* @param column the column id for the stream
* @param kind the kind of stream
* @return The output outStream that the section needs to be written to.
* @throws IOException
*/
public OutStream createStream(int column, OrcProto.Stream.Kind kind) throws IOException {
final StreamName name = new StreamName(column, kind);
final EnumSet<CompressionCodec.Modifier> modifiers;
switch (kind) {
case BLOOM_FILTER:
case DATA:
case DICTIONARY_DATA:
if (getCompressionStrategy() == OrcFile.CompressionStrategy.SPEED) {
modifiers = EnumSet.of(CompressionCodec.Modifier.FAST, CompressionCodec.Modifier.TEXT);
} else {
modifiers =
EnumSet.of(CompressionCodec.Modifier.DEFAULT, CompressionCodec.Modifier.TEXT);
}
break;
case LENGTH:
case DICTIONARY_COUNT:
case PRESENT:
case ROW_INDEX:
case SECONDARY:
// easily compressed using the fastest modes
modifiers =
EnumSet.of(CompressionCodec.Modifier.FASTEST, CompressionCodec.Modifier.BINARY);
break;
default:
LOG.warn("Missing ORC compression modifiers for " + kind);
modifiers = null;
break;
}
BufferedStream result = streams.get(name);
if (result == null) {
result = new BufferedStream(name.toString(), bufferSize,
codec == null ? codec : codec.modify(modifiers));
streams.put(name, result);
}
return result.outStream;
}
开发者ID:ampool,项目名称:monarch,代码行数:47,代码来源:AWriterImpl.java
示例7: createIntegerWriter
import org.apache.orc.OrcFile; //导入依赖的package包/类
IntegerWriter createIntegerWriter(PositionedOutputStream output, boolean signed,
boolean isDirectV2, StreamFactory writer) {
if (isDirectV2) {
boolean alignedBitpacking = false;
if (writer.getEncodingStrategy().equals(OrcFile.EncodingStrategy.SPEED)) {
alignedBitpacking = true;
}
return new RunLengthIntegerWriterV2(output, signed, alignedBitpacking);
} else {
return new RunLengthIntegerWriter(output, signed);
}
}
开发者ID:ampool,项目名称:monarch,代码行数:13,代码来源:AWriterImpl.java
示例8: getStream
import org.apache.orc.OrcFile; //导入依赖的package包/类
@VisibleForTesting
public ADataOutputStream getStream() throws IOException {
if (rawWriter == null) {
// final OutputStream os = new FileOutputStream("/tmp/abc.orc");
rawWriter = new ADataOutputStream(null);
// rawWriter = fs.create(path, false, HDFS_BUFFER_SIZE,
// fs.getDefaultReplication(path), blockSize);
rawWriter.writeBytes(OrcFile.MAGIC);
headerLength = rawWriter.getPos();
writer = new OutStream("metadata", bufferSize, codec, new DirectStream(rawWriter));
protobufWriter = CodedOutputStream.newInstance(writer);
}
return rawWriter;
}
开发者ID:ampool,项目名称:monarch,代码行数:15,代码来源:AWriterImpl.java
示例9: initialize
import org.apache.orc.OrcFile; //导入依赖的package包/类
@Override
public void initialize(Map<String, Object> metaData) {
try {
Configuration conf = new Configuration();
// conf.set(OrcConf.BLOOM_FILTER_COLUMNS.getAttribute(), "tags");
processor = new OrcEntityProcessor(OrcFile.createWriter(new Path(filename),
OrcFile.writerOptions(conf).setSchema(SCHEMA)), SCHEMA.createRowBatch());
} catch (IOException e) {
throw new OsmosisRuntimeException(e);
}
}
开发者ID:mojodna,项目名称:osm2orc,代码行数:12,代码来源:OrcWriter.java
示例10: JsonORCFileReader
import org.apache.orc.OrcFile; //导入依赖的package包/类
@SuppressWarnings("deprecation")
public JsonORCFileReader(LogFilePath logFilePath, CompressionCodec codec)
throws IOException {
schema = schemaProvider.getSchema(logFilePath.getTopic(),
logFilePath);
Path path = new Path(logFilePath.getLogFilePath());
Reader reader = OrcFile.createReader(path,
OrcFile.readerOptions(new Configuration(true)));
offset = logFilePath.getOffset();
rows = reader.rows();
batch = reader.getSchema().createRowBatch();
rows.nextBatch(batch);
}
开发者ID:pinterest,项目名称:secor,代码行数:14,代码来源:JsonORCFileReaderWriterFactory.java
示例11: getReader
import org.apache.orc.OrcFile; //导入依赖的package包/类
private Reader getReader( ) throws Exception {
return inClassloader( () -> {
checkNullFileName();
Path filePath;
FileSystem fs;
Reader orcReader;
try {
filePath = new Path( fileName );
fs = FileSystem.get( filePath.toUri(), conf );
if ( !fs.exists( filePath ) ) {
throw new NoSuchFileException( fileName );
}
if ( fs.getFileStatus( filePath ).isDirectory() ) {
PathFilter pathFilter = new PathFilter() {
public boolean accept( Path file ) {
return file.getName().endsWith( ".orc" );
}
};
FileStatus[] fileStatuses = fs.listStatus( filePath, pathFilter );
if ( fileStatuses.length == 0 ) {
throw new NoSuchFileException( fileName );
}
filePath = fileStatuses[0].getPath();
}
orcReader = OrcFile.createReader( filePath,
OrcFile.readerOptions( conf ).filesystem( fs ) );
} catch ( IOException e ) {
throw new RuntimeException( "Unable to read data from file " + fileName, e );
}
return orcReader;
} );
}
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:37,代码来源:PentahoOrcInputFormat.java
示例12: withOptions
import org.apache.orc.OrcFile; //导入依赖的package包/类
@Override
public OrcHandle<T> withOptions(OrcFile.WriterOptions writerOptions) {
this.writerOptions = writerOptions;
return this;
}
开发者ID:eclecticlogic,项目名称:eclectic-orc,代码行数:6,代码来源:AbstractOrcWriter.java
示例13: flush
import org.apache.orc.OrcFile; //导入依赖的package包/类
private boolean flush(BufferSegment segment, String path, TypeDescription schema)
{
Configuration conf = new Configuration();
try {
Writer writer = OrcFile.createWriter(new Path(path),
OrcFile.writerOptions(conf)
.setSchema(schema)
.stripeSize(orcFileStripeSize)
.bufferSize(orcFileBufferSize)
.blockSize(orcFileBlockSize)
.compress(CompressionKind.ZLIB)
.version(OrcFile.Version.V_0_12));
VectorizedRowBatch batch = schema.createRowBatch();
while (segment.hasNext()) {
String[] contents = segment.getNext();
int rowCount = batch.size++;
// System.out.println("contents : message.getValues() : " + Arrays.toString(contents));
System.out.println("contents.length : " + contents.length);
for (int i = 0; i < contents.length; i++) {
((BytesColumnVector) batch.cols[i]).setVal(rowCount, contents[i].getBytes());
//batch full
if (batch.size == batch.getMaxSize()) {
writer.addRowBatch(batch);
batch.reset();
}
}
if (batch.size != 0) {
writer.addRowBatch(batch);
batch.reset();
}
writer.close();
segment.setFilePath(path);
System.out.println("path : " + path);
}
return true;
}
catch (IOException e) {
e.printStackTrace();
return false;
}
}
开发者ID:dbiir,项目名称:paraflow,代码行数:42,代码来源:OrcFlushThread.java
示例14: AWriterImpl
import org.apache.orc.OrcFile; //导入依赖的package包/类
public AWriterImpl(FileSystem fs, Path path, OrcFile.WriterOptions opts) throws IOException {
this.fs = fs;
this.path = path;
this.conf = opts.getConfiguration();
this.callback = opts.getCallback();
this.schema = opts.getSchema();
if (callback != null) {
callbackContext = new OrcFile.WriterContext() {
@Override
public Writer getWriter() {
return AWriterImpl.this;
}
};
} else {
callbackContext = null;
}
this.adjustedStripeSize = opts.getStripeSize();
this.defaultStripeSize = opts.getStripeSize();
this.version = opts.getVersion();
this.encodingStrategy = opts.getEncodingStrategy();
this.compressionStrategy = opts.getCompressionStrategy();
this.addBlockPadding = opts.getBlockPadding();
this.blockSize = opts.getBlockSize();
this.paddingTolerance = opts.getPaddingTolerance();
this.compress = opts.getCompress();
this.rowIndexStride = opts.getRowIndexStride();
this.memoryManager = opts.getMemoryManager();
buildIndex = rowIndexStride > 0;
codec = createCodec(compress);
int numColumns = schema.getMaximumId() + 1;
if (opts.isEnforceBufferSize()) {
this.bufferSize = opts.getBufferSize();
} else {
this.bufferSize = getEstimatedBufferSize(defaultStripeSize, numColumns, opts.getBufferSize());
}
if (version == OrcFile.Version.V_0_11) {
/* do not write bloom filters for ORC v11 */
this.bloomFilterColumns = new boolean[schema.getMaximumId() + 1];
} else {
this.bloomFilterColumns = OrcUtils.includeColumns(opts.getBloomFilterColumns(), schema);
}
this.bloomFilterFpp = opts.getBloomFilterFpp();
treeWriter = createTreeWriter(schema, streamFactory, false);
if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
throw new IllegalArgumentException("Row stride must be at least " + MIN_ROW_INDEX_STRIDE);
}
// ensure that we are able to handle callbacks before we register ourselves
memoryManager.addWriter(path, opts.getStripeSize(), this);
// LOG.info("ORC writer created for path: {} with stripeSize: {} blockSize: {}" +
// " compression: {} bufferSize: {}", path, defaultStripeSize, blockSize,
// compress, bufferSize);
}
开发者ID:ampool,项目名称:monarch,代码行数:55,代码来源:AWriterImpl.java
示例15: getVersion
import org.apache.orc.OrcFile; //导入依赖的package包/类
/**
* Get the version of the file to write.
*/
public OrcFile.Version getVersion() {
return version;
}
开发者ID:ampool,项目名称:monarch,代码行数:7,代码来源:AWriterImpl.java
示例16: isNewWriteFormat
import org.apache.orc.OrcFile; //导入依赖的package包/类
boolean isNewWriteFormat(StreamFactory writer) {
return writer.getVersion() != OrcFile.Version.V_0_11;
}
开发者ID:ampool,项目名称:monarch,代码行数:4,代码来源:AWriterImpl.java
示例17: setup
import org.apache.orc.OrcFile; //导入依赖的package包/类
@Before
public void setup() throws Exception {
PowerMockito.mockStatic(FileSystem.class);
when(FileSystem.get(configuration)).thenReturn(fileSystem);
when(FileSystem.get(any(URI.class), any(Configuration.class))).thenReturn(fileSystem);
when(fileStatus.getPath()).thenReturn(path);
when(fileStatus.isDirectory()).thenReturn(false);
FileStatus[] fileStatuses = {fileStatus};
when(fileSystem.listStatus(any(Path.class))).thenReturn(fileStatuses);
when(fileStatus2.getPath()).thenReturn(path2);
when(fileStatus2.isDirectory()).thenReturn(true);
when(fileSystem.listStatus(path2)).thenReturn(fileStatuses);
final FSDataInputStream fsDataInputStream = mock(FSDataInputStream.class);
when(fileSystem.open(any(Path.class))).thenReturn(fsDataInputStream);
mockStatic(Job.class);
when(Job.getInstance(configuration)).thenReturn(job);
when(job.getConfiguration()).thenReturn(configuration);
when(path.getFileSystem(configuration)).thenReturn(fileSystem);
when(fileSystem.makeQualified(path)).thenReturn(path);
final UUID uuid = UUID.randomUUID();
mockStatic(UUID.class);
whenNew(UUID.class).withAnyArguments().thenReturn(uuid);
when(UUID.randomUUID()).thenReturn(uuid);
whenNew(Path.class).withArguments("/apps/datasqueeze/staging/tmp-" + uuid.toString()).thenReturn(path);
whenNew(Path.class).withArguments("/source/path").thenReturn(path);
whenNew(Path.class).withArguments("s3/source/path").thenReturn(path);
whenNew(Path.class).withArguments("/source/path/dir").thenReturn(path2);
when(configuration.get("mapreduce.multipleoutputs", "")).thenReturn("");
whenNew(ReaderImpl.class).withArguments(any(Path.class), any(OrcFile.ReaderOptions.class)).thenReturn(reader);
final TypeDescription schema = TypeDescription.createStruct()
.addField("field1", TypeDescription.createInt());
when(reader.getSchema()).thenReturn(schema);
when(reader.getCompressionKind()).thenReturn(CompressionKind.SNAPPY);
CompactionManagerFactory.DEFAULT_THRESHOLD_IN_BYTES = 1234L;
whenNew(JobRunner.class).withArguments(job).thenReturn(jobRunner);
whenNew(SequenceFile.Reader.class).withArguments(any(Configuration.class), any(Path.class)).thenReturn(seqReader);
when(seqReader.isCompressed()).thenReturn(true);
CompressionCodec compressionCodec = mock(CompressionCodec.class);
when(seqReader.getCompressionCodec()).thenReturn(compressionCodec);
when(seqReader.getCompressionType()).thenReturn(SequenceFile.CompressionType.BLOCK);
}
开发者ID:ExpediaInceCommercePlatform,项目名称:dataSqueeze,代码行数:49,代码来源:CompactionManagerImplTest.java
示例18: open
import org.apache.orc.OrcFile; //导入依赖的package包/类
public void open(Configuration conf) throws IOException{
this.reader = OrcFile.createReader(new Path(this.filePath), OrcFile.readerOptions(conf));
this.schema = this.reader.getSchema();
}
开发者ID:nqbao,项目名称:python-orc,代码行数:5,代码来源:SimplifiedOrcReader.java
示例19: open
import org.apache.orc.OrcFile; //导入依赖的package包/类
@Override
public void open(FileInputSplit fileSplit) throws IOException {
LOG.debug("Opening ORC file {}", fileSplit.getPath());
// open ORC file and create reader
org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(fileSplit.getPath().getPath());
Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf));
// get offset and length for the stripes that start in the split
Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(fileSplit, getStripes(orcReader));
// create ORC row reader configuration
Reader.Options options = getOptions(orcReader)
.schema(schema)
.range(offsetAndLength.f0, offsetAndLength.f1)
.useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf))
.skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
.tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));
// configure filters
if (!conjunctPredicates.isEmpty()) {
SearchArgument.Builder b = SearchArgumentFactory.newBuilder();
b = b.startAnd();
for (Predicate predicate : conjunctPredicates) {
predicate.add(b);
}
b = b.end();
options.searchArgument(b.build(), new String[]{});
}
// configure selected fields
options.include(computeProjectionMask());
// create ORC row reader
this.orcRowsReader = orcReader.rows(options);
// assign ids
this.schema.getId();
// create row batch
this.rowBatch = schema.createRowBatch(batchSize);
rowsInBatch = 0;
nextRow = 0;
}
开发者ID:axbaretto,项目名称:flink,代码行数:45,代码来源:OrcRowInputFormat.java
示例20: getEncodingStrategy
import org.apache.orc.OrcFile; //导入依赖的package包/类
/**
* Get the encoding strategy to use.
*
* @return encoding strategy
*/
public OrcFile.EncodingStrategy getEncodingStrategy() {
return encodingStrategy;
}
开发者ID:ampool,项目名称:monarch,代码行数:9,代码来源:AWriterImpl.java
注:本文中的org.apache.orc.OrcFile类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论