本文整理汇总了Java中org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch类的典型用法代码示例。如果您正苦于以下问题:Java VectorizedRowBatch类的具体用法?Java VectorizedRowBatch怎么用?Java VectorizedRowBatch使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
VectorizedRowBatch类属于org.apache.hadoop.hive.ql.exec.vector包,在下文中一共展示了VectorizedRowBatch类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: addRowBatch
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public void addRowBatch(VectorizedRowBatch batch) throws IOException {
if (buildIndex) {
// Batch the writes up to the rowIndexStride so that we can get the
// right size indexes.
int posn = 0;
while (posn < batch.size) {
int chunkSize = Math.min(batch.size - posn, rowIndexStride - rowsInIndex);
treeWriter.writeRootBatch(batch, posn, chunkSize);
posn += chunkSize;
rowsInIndex += chunkSize;
rowsInStripe += chunkSize;
if (rowsInIndex >= rowIndexStride) {
createRowIndexEntry();
}
}
} else {
rowsInStripe += batch.size;
treeWriter.writeRootBatch(batch, 0, batch.size);
}
memoryManager.addedRow(batch.size);
}
开发者ID:ampool,项目名称:monarch,代码行数:23,代码来源:AWriterImpl.java
示例2: convertFromOrc
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@VisibleForTesting
RowMetaAndData convertFromOrc( RowMetaAndData rowMetaAndData, VectorizedRowBatch batch, int currentBatchRow,
SchemaDescription schemaDescription, TypeDescription typeDescription,
Map<String, Integer> schemaToOrcSubcripts,
SchemaDescription orcSchemaDescription ) {
int orcColumn;
for ( SchemaDescription.Field field : schemaDescription ) {
SchemaDescription.Field orcField = orcSchemaDescription.getField( field.formatFieldName );
if ( field != null ) {
ColumnVector columnVector = batch.cols[ schemaToOrcSubcripts.get( field.pentahoFieldName ) ];
Object orcToPentahoValue = convertFromSourceToTargetDataType( columnVector, currentBatchRow, orcField.pentahoValueMetaType );
Object convertToSchemaValue = null;
try {
convertToSchemaValue = valueMetaConverter.convertFromSourceToTargetDataType( orcField.pentahoValueMetaType, field.pentahoValueMetaType, orcToPentahoValue );
} catch ( ValueMetaConversionException e ) {
logger.error( e );
}
rowMetaAndData.addValue( field.pentahoFieldName, field.pentahoValueMetaType, convertToSchemaValue );
}
}
return rowMetaAndData;
}
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:26,代码来源:OrcConverter.java
示例3: next
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public boolean next( final NullWritable key, final VectorizedRowBatch outputBatch ) throws IOException {
outputBatch.reset();
setting.setPartitionValues( outputBatch );
if( indexSize <= currentIndex ){
if( ! currentReader.hasNext() ){
updateCounter( currentReader.getReadStats() );
outputBatch.endOfFile = true;
isEnd = true;
return false;
}
while( ! setSpread() ){
if( ! currentReader.hasNext() ){
updateCounter( currentReader.getReadStats() );
outputBatch.endOfFile = true;
isEnd = true;
return false;
}
}
}
int maxSize = outputBatch.getMaxSize();
if( indexSize < currentIndex + maxSize ){
maxSize = indexSize - currentIndex;
}
for( int colIndex : needColumnIds ){
assignors[colIndex].setColumnVector( outputBatch.cols[colIndex] , currentIndexList , currentIndex , maxSize );
}
outputBatch.size = maxSize;
currentIndex += maxSize;
if( indexSize <= currentIndex && ! currentReader.hasNext() ){
outputBatch.endOfFile = true;
}
return outputBatch.size > 0;
}
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:39,代码来源:MDSHiveDirectVectorizedReader.java
示例4: writeRootBatch
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
void writeRootBatch(VectorizedRowBatch batch, int offset, int length) throws IOException {
// update the statistics for the root column
indexStatistics.increment(length);
// I'm assuming that the root column isn't nullable so that I don't need
// to update isPresent.
for (int i = 0; i < childrenWriters.length; ++i) {
childrenWriters[i].writeBatch(batch.cols[i], offset, length);
}
}
开发者ID:ampool,项目名称:monarch,代码行数:11,代码来源:AWriterImpl.java
示例5: fillRows
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
/**
* Fills an ORC batch into an array of Row.
*
* @param rows The batch of rows need to be filled.
* @param schema The schema of the ORC data.
* @param batch The ORC data.
* @param selectedFields The list of selected ORC fields.
* @return The number of rows that were filled.
*/
static int fillRows(Row[] rows, TypeDescription schema, VectorizedRowBatch batch, int[] selectedFields) {
int rowsToRead = Math.min((int) batch.count(), rows.length);
List<TypeDescription> fieldTypes = schema.getChildren();
// read each selected field
for (int rowIdx = 0; rowIdx < selectedFields.length; rowIdx++) {
int orcIdx = selectedFields[rowIdx];
readField(rows, rowIdx, fieldTypes.get(orcIdx), batch.cols[orcIdx], null, rowsToRead);
}
return rowsToRead;
}
开发者ID:axbaretto,项目名称:flink,代码行数:22,代码来源:OrcUtils.java
示例6: processRow
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
public static void processRow(JSONWriter writer, VectorizedRowBatch batch,
TypeDescription schema, int row) throws JSONException {
if (schema.getCategory() == TypeDescription.Category.STRUCT) {
List<TypeDescription> fieldTypes = schema.getChildren();
List<String> fieldNames = schema.getFieldNames();
writer.object();
for (int c = 0; c < batch.cols.length; ++c) {
writer.key(fieldNames.get(c));
setValue(writer, batch.cols[c], fieldTypes.get(c), row);
}
writer.endObject();
} else {
setValue(writer, batch.cols[0], schema, row);
}
}
开发者ID:pinterest,项目名称:secor,代码行数:16,代码来源:JsonFieldFiller.java
示例7: fillRow
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
public static void fillRow(int rowIndex, JsonConverter[] converters,
TypeDescription schema, VectorizedRowBatch batch, JsonObject data) {
List<String> fieldNames = schema.getFieldNames();
for (int c = 0; c < converters.length; ++c) {
JsonElement field = data.get(fieldNames.get(c));
if (field == null) {
batch.cols[c].noNulls = false;
batch.cols[c].isNull[rowIndex] = true;
} else {
converters[c].convert(field, batch.cols[c], rowIndex);
}
}
}
开发者ID:pinterest,项目名称:secor,代码行数:14,代码来源:VectorColumnFiller.java
示例8: createValue
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public VectorizedRowBatch createValue() {
return setting.createVectorizedRowBatch();
}
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:5,代码来源:MDSHiveDirectVectorizedReader.java
示例9: createVectorizedRowBatch
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public VectorizedRowBatch createVectorizedRowBatch(){
return rbCtx.createVectorizedRowBatch( projectionColumn );
}
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:5,代码来源:HiveVectorizedReaderSetting.java
示例10: setPartitionValues
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public void setPartitionValues( final VectorizedRowBatch outputBatch ){
if( 0 < partitionValues.length ){
rbCtx.addPartitionColsToBatch( outputBatch , partitionValues );
}
}
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:7,代码来源:HiveVectorizedReaderSetting.java
示例11: flush
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
private boolean flush(BufferSegment segment, String path, TypeDescription schema)
{
Configuration conf = new Configuration();
try {
Writer writer = OrcFile.createWriter(new Path(path),
OrcFile.writerOptions(conf)
.setSchema(schema)
.stripeSize(orcFileStripeSize)
.bufferSize(orcFileBufferSize)
.blockSize(orcFileBlockSize)
.compress(CompressionKind.ZLIB)
.version(OrcFile.Version.V_0_12));
VectorizedRowBatch batch = schema.createRowBatch();
while (segment.hasNext()) {
String[] contents = segment.getNext();
int rowCount = batch.size++;
// System.out.println("contents : message.getValues() : " + Arrays.toString(contents));
System.out.println("contents.length : " + contents.length);
for (int i = 0; i < contents.length; i++) {
((BytesColumnVector) batch.cols[i]).setVal(rowCount, contents[i].getBytes());
//batch full
if (batch.size == batch.getMaxSize()) {
writer.addRowBatch(batch);
batch.reset();
}
}
if (batch.size != 0) {
writer.addRowBatch(batch);
batch.reset();
}
writer.close();
segment.setFilePath(path);
System.out.println("path : " + path);
}
return true;
}
catch (IOException e) {
e.printStackTrace();
return false;
}
}
开发者ID:dbiir,项目名称:paraflow,代码行数:42,代码来源:OrcFlushThread.java
示例12: addRowBatch
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public void addRowBatch(VectorizedRowBatch batch) throws IOException {
flushInternalBatch();
super.addRowBatch(batch);
}
开发者ID:ampool,项目名称:monarch,代码行数:6,代码来源:AWriter.java
示例13: serializeVector
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspector) throws SerDeException {
throw new UnsupportedOperationException("serializeVector not supported");
}
开发者ID:ZuInnoTe,项目名称:hadoopcryptoledger,代码行数:5,代码来源:EthereumBlockSerde.java
示例14: deserializeVector
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public void deserializeVector(Object rowBlob, int rowsInBlob, VectorizedRowBatch reuseBatch) throws SerDeException {
// nothing to do here
}
开发者ID:ZuInnoTe,项目名称:hadoopcryptoledger,代码行数:6,代码来源:EthereumBlockSerde.java
示例15: deserializeVector
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
/** VectorizedSerde **/
@Override
public void deserializeVector(Object rowBlob, int rowsInBlob, VectorizedRowBatch reuseBatch) throws SerDeException {
// nothing to do here
}
开发者ID:ZuInnoTe,项目名称:hadoopcryptoledger,代码行数:6,代码来源:BitcoinBlockSerde.java
示例16: serializeVector
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspector) throws SerDeException {
throw new UnsupportedOperationException("serializeVector not supported");
}
开发者ID:ZuInnoTe,项目名称:hadoopcryptoledger,代码行数:5,代码来源:BitcoinBlockSerde.java
示例17: OrcEntityProcessor
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
OrcEntityProcessor(Writer writer, VectorizedRowBatch batch) {
this.writer = writer;
this.batch = batch;
}
开发者ID:mojodna,项目名称:osm2orc,代码行数:5,代码来源:OrcWriter.java
示例18: compareFrameContents
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
static int compareFrameContents(String fileName, Set<String> failedFiles, Frame h2oFrame, Reader orcReader,
String[] colTypes, String[] colNames, boolean[] toInclude) {
List<StripeInformation> stripesInfo = orcReader.getStripes(); // get all stripe info
int wrongTests = 0;
if (stripesInfo.size() == 0) { // Orc file contains no data
assertEquals("Orc file is empty. H2O frame row number should be zero: ", 0, h2oFrame.numRows());
} else {
Long startRowIndex = 0L; // row index into H2O frame
for (StripeInformation oneStripe : stripesInfo) {
try {
RecordReader
perStripe = orcReader.rows(oneStripe.getOffset(), oneStripe.getDataLength(), toInclude, null,
colNames);
VectorizedRowBatch batch = perStripe.nextBatch(null); // read orc file stripes in vectorizedRowBatch
boolean done = false;
Long rowCounts = 0L;
Long rowNumber = oneStripe.getNumberOfRows(); // row number of current stripe
while (!done) {
long currentBatchRow = batch.count(); // row number of current batch
ColumnVector[] dataVectors = batch.cols;
int colIndex = 0;
for (int cIdx = 0; cIdx < batch.numCols; cIdx++) { // read one column at a time;
if (toInclude[cIdx+1]) {
compare1Cloumn(dataVectors[cIdx], colTypes[colIndex].toLowerCase(), colIndex, currentBatchRow,
h2oFrame.vec(colNames[colIndex]), startRowIndex);
colIndex++;
}
}
rowCounts = rowCounts + currentBatchRow; // record number of rows of data actually read
startRowIndex = startRowIndex + currentBatchRow;
if (rowCounts >= rowNumber) // read all rows of the stripe already.
done = true;
if (!done) // not done yet, get next batch
batch = perStripe.nextBatch(batch);
}
perStripe.close();
} catch (Throwable e) {
failedFiles.add(fileName);
e.printStackTrace();
wrongTests += 1;
}
}
}
return wrongTests;
}
开发者ID:h2oai,项目名称:h2o-3,代码行数:55,代码来源:OrcTestUtils.java
示例19: createVectorizedRowBatch
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
VectorizedRowBatch createVectorizedRowBatch();
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:2,代码来源:IVectorizedReaderSetting.java
示例20: setPartitionValues
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
void setPartitionValues( final VectorizedRowBatch outputBatch );
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:2,代码来源:IVectorizedReaderSetting.java
注:本文中的org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论