本文整理汇总了Java中org.apache.hive.hcatalog.data.HCatRecord类的典型用法代码示例。如果您正苦于以下问题:Java HCatRecord类的具体用法?Java HCatRecord怎么用?Java HCatRecord使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
HCatRecord类属于org.apache.hive.hcatalog.data包,在下文中一共展示了HCatRecord类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: generateHCatRecords
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
private List<HCatRecord> generateHCatRecords(int numRecords,
HCatSchema hCatTblSchema, ColumnGenerator... extraCols) throws Exception {
List<HCatRecord> records = new ArrayList<HCatRecord>();
List<HCatFieldSchema> hCatTblCols = hCatTblSchema.getFields();
int size = hCatTblCols.size();
for (int i = 0; i < numRecords; ++i) {
DefaultHCatRecord record = new DefaultHCatRecord(size);
record.set(hCatTblCols.get(0).getName(), hCatTblSchema, i);
record.set(hCatTblCols.get(1).getName(), hCatTblSchema, "textfield" + i);
int idx = 0;
for (int j = 0; j < extraCols.length; ++j) {
if (extraCols[j].getKeyType() == KeyType.STATIC_KEY) {
continue;
}
record.set(hCatTblCols.get(idx + 2).getName(), hCatTblSchema,
extraCols[j].getHCatValue(i));
++idx;
}
records.add(record);
}
return records;
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:24,代码来源:HCatalogTestUtils.java
示例2: split
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
/**
* Calculates the 'desired' number of splits based on desiredBundleSizeBytes which is passed as
* a hint to native API. Retrieves the actual splits generated by native API, which could be
* different from the 'desired' split count calculated using desiredBundleSizeBytes
*/
@Override
public List<BoundedSource<HCatRecord>> split(
long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
int desiredSplitCount = 1;
long estimatedSizeBytes = getEstimatedSizeBytes(options);
if (desiredBundleSizeBytes > 0 && estimatedSizeBytes > 0) {
desiredSplitCount = (int) Math.ceil((double) estimatedSizeBytes / desiredBundleSizeBytes);
}
ReaderContext readerContext = getReaderContext(desiredSplitCount);
//process the splits returned by native API
//this could be different from 'desiredSplitCount' calculated above
LOG.info(
"Splitting into bundles of {} bytes: "
+ "estimated size {}, desired split count {}, actual split count {}",
desiredBundleSizeBytes,
estimatedSizeBytes,
desiredSplitCount,
readerContext.numSplits());
List<BoundedSource<HCatRecord>> res = new ArrayList<>();
for (int split = 0; split < readerContext.numSplits(); split++) {
res.add(new BoundedHCatalogSource(spec.withContext(readerContext).withSplitId(split)));
}
return res;
}
开发者ID:apache,项目名称:beam,代码行数:31,代码来源:HCatalogIO.java
示例3: testReadFromSource
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
/** Test of Read using SourceTestUtils.readFromSource(..). */
@Test
@NeedsTestData
public void testReadFromSource() throws Exception {
ReaderContext context = getReaderContext(getConfigPropertiesAsMap(service.getHiveConf()));
HCatalogIO.Read spec =
HCatalogIO.read()
.withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
.withContext(context)
.withTable(TEST_TABLE);
List<String> records = new ArrayList<>();
for (int i = 0; i < context.numSplits(); i++) {
BoundedHCatalogSource source = new BoundedHCatalogSource(spec.withSplitId(i));
for (HCatRecord record : SourceTestUtils.readFromSource(source, OPTIONS)) {
records.add(record.get(0).toString());
}
}
assertThat(records, containsInAnyOrder(getExpectedRecords(TEST_RECORDS_COUNT).toArray()));
}
开发者ID:apache,项目名称:beam,代码行数:21,代码来源:HCatalogIOTest.java
示例4: testSourceEqualsSplits
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
/** Test of Read using SourceTestUtils.assertSourcesEqualReferenceSource(..). */
@Test
@NeedsTestData
public void testSourceEqualsSplits() throws Exception {
final int numRows = 1500;
final int numSamples = 10;
final long bytesPerRow = 15;
ReaderContext context = getReaderContext(getConfigPropertiesAsMap(service.getHiveConf()));
HCatalogIO.Read spec =
HCatalogIO.read()
.withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
.withContext(context)
.withTable(TEST_TABLE);
BoundedHCatalogSource source = new BoundedHCatalogSource(spec);
List<BoundedSource<HCatRecord>> unSplitSource = source.split(-1, OPTIONS);
assertEquals(1, unSplitSource.size());
List<BoundedSource<HCatRecord>> splits =
source.split(numRows * bytesPerRow / numSamples, OPTIONS);
assertTrue(splits.size() >= 1);
SourceTestUtils.assertSourcesEqualReferenceSource(unSplitSource.get(0), splits, OPTIONS);
}
开发者ID:apache,项目名称:beam,代码行数:25,代码来源:HCatalogIOTest.java
示例5: map
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public void map(T key, HCatRecord value, Context context) throws IOException, InterruptedException {
HCatFieldSchema field;
Object fieldValue;
for (int m = 0; m < columnSize; m++) {
field = schema.get(m);
fieldValue = value.get(field.getName(), schema);
if (fieldValue == null)
fieldValue = "NULL";
if (counter < 5 && m < 10) {
System.out.println("Get row " + counter + " column '" + field.getName() + "' value: " + fieldValue);
}
if (fieldValue != null)
getHllc(m).add(Bytes.toBytes(fieldValue.toString()));
}
counter++;
}
开发者ID:KylinOLAP,项目名称:Kylin,代码行数:22,代码来源:ColumnCardinalityMapper.java
示例6: map
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {
try {
int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
HCatFieldSchema fieldSchema = null;
for (int i : factDictCols) {
outputKey.set((short) i);
fieldSchema = schema.get(flatTableIndexes[i]);
Object fieldValue = record.get(fieldSchema.getName(), schema);
if (fieldValue == null)
continue;
byte[] bytes = Bytes.toBytes(fieldValue.toString());
outputValue.set(bytes, 0, bytes.length);
context.write(outputKey, outputValue);
}
} catch (Exception ex) {
handleErrorRecord(record, ex);
}
}
开发者ID:KylinOLAP,项目名称:Kylin,代码行数:23,代码来源:FactDistinctColumnsMapper.java
示例7: map
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {
HCatFieldSchema fieldSchema = null;
for (short i = 0; i < columnSize; i++) {
outputKey.set(i);
fieldSchema = schema.get(i);
Object fieldValue = record.get(fieldSchema.getName(), schema);
if (fieldValue == null)
continue;
byte[] bytes = Bytes.toBytes(fieldValue.toString());
outputValue.set(bytes, 0, bytes.length);
context.write(outputKey, outputValue);
}
}
开发者ID:KylinOLAP,项目名称:Kylin,代码行数:17,代码来源:IIDistinctColumnsMapper.java
示例8: call
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public LabeledPoint call(Tuple2<WritableComparable, HCatRecord> tuple) throws Exception {
HCatRecord record = tuple._2();
if (record == null) {
log.info("@@@ Null record");
return defaultLabeledPoint;
}
double[] features = new double[numFeatures];
for (int i = 0; i < numFeatures; i++) {
int featurePos = featurePositions[i];
features[i] = featureValueMappers[i].call(record.get(featurePos));
}
double label = featureValueMappers[labelColumnPos].call(record.get(labelColumnPos));
return new LabeledPoint(label, Vectors.dense(features));
}
开发者ID:apache,项目名称:lens,代码行数:20,代码来源:ColumnFeatureFunction.java
示例9: insert
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
private void insert(Map<String, String> partitionSpec, Iterable<HCatRecord> rows) {
WriteEntity entity = new WriteEntity.Builder()
.withDatabase(databaseName)
.withTable(tableName)
.withPartition(partitionSpec)
.build();
try {
HCatWriter master = DataTransferFactory.getHCatWriter(entity, config);
WriterContext context = master.prepareWrite();
HCatWriter writer = DataTransferFactory.getHCatWriter(context);
writer.write(rows.iterator());
master.commit(context);
} catch (HCatException e) {
throw new RuntimeException("An error occurred while inserting data to " + databaseName + "." + tableName, e);
}
}
开发者ID:klarna,项目名称:HiveRunner,代码行数:18,代码来源:TableDataInserter.java
示例10: testPartitionedSimple
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Test
public void testPartitionedSimple() {
HCatTable table = table().cols(columns(COLUMN_1)).partCols(columns(PARTITION_COLUMN_1));
Multimap<Map<String, String>, HCatRecord> data = new TableDataBuilder(table)
.addRow("value", "partition_value")
.build();
assertEquals(1, data.size());
Map<String, String> partitionSpec = new HashMap<>();
partitionSpec.put(PARTITION_COLUMN_1, "partition_value");
Collection<HCatRecord> rows = data.get(partitionSpec);
assertEquals(1, rows.size());
HCatRecord row = rows.iterator().next();
assertEquals(Arrays.asList((Object) "value", "partition_value"), row.getAll());
}
开发者ID:klarna,项目名称:HiveRunner,代码行数:19,代码来源:TableDataBuilderTest.java
示例11: HCatalogRootParser
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
public HCatalogRootParser( final HCatRecord record , final HCatSchema schema ) throws IOException{
this.record = record;
fieldIndexMap = new HashMap<String,Integer>();
converterList = new ArrayList<IHCatalogPrimitiveConverter>();
schemaList = new ArrayList<HCatFieldSchema>();
for( int i = 0 ; i < schema.size() ; i++ ){
HCatFieldSchema fieldSchema = schema.get(i);
fieldIndexMap.put( fieldSchema.getName() , Integer.valueOf(i) );
converterList.add( HCatalogPrimitiveConverterFactory.get( fieldSchema ) );
schemaList.add( schema.get(i) );
}
}
开发者ID:yahoojapan,项目名称:dataplatform-schema-lib,代码行数:15,代码来源:HCatalogRootParser.java
示例12: map
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public void map(LongWritable key, HCatRecord hcr, Context context)
throws IOException, InterruptedException {
SqoopRecord sqr = helper.convertToSqoopRecord(hcr);
writeSqoopRecord(sqr);
context.progress();
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:8,代码来源:NetezzaExternalTableHCatExportMapper.java
示例13: convertToSqoopRecord
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
public SqoopRecord convertToSqoopRecord(HCatRecord hcr)
throws IOException {
Text key = new Text();
for (Map.Entry<String, Object> e : sqoopRecord.getFieldMap().entrySet()) {
String colName = e.getKey();
String hfn = colName.toLowerCase();
key.set(hfn);
Object hCatVal = hcr.get(hfn, hCatFullTableSchema);
if (!isOdps) {
String javaColType = colTypesJava.get(key).toString();
int sqlType = ((IntWritable) colTypesSql.get(key)).get();
HCatFieldSchema field = hCatFullTableSchema.get(hfn);
HCatFieldSchema.Type fieldType = field.getType();
String hCatTypeString = field.getTypeString();
Object sqlVal = convertToSqoop(hCatVal, fieldType, javaColType, hCatTypeString);
if (debugHCatExportMapper) {
LOG.debug("hCatVal " + hCatVal + " of type "
+ (hCatVal == null ? null : hCatVal.getClass().getName()) + ",sqlVal " + sqlVal
+ " of type " + (sqlVal == null ? null : sqlVal.getClass().getName()) + ",java type "
+ javaColType + ", sql type = " + SqoopHCatUtilities.sqlTypeString(sqlType));
}
sqoopRecord.setField(colName, sqlVal);
} else {
sqoopRecord.setField(colName, hCatVal == null ? null : hCatVal.toString());
}
}
return sqoopRecord;
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:29,代码来源:SqoopHCatExportHelper.java
示例14: createRecordReader
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public RecordReader<WritableComparable, HCatRecord>
createRecordReader(InputSplit split,
TaskAttemptContext taskContext)
throws IOException, InterruptedException {
LOG.debug("Creating a SqoopHCatRecordReader");
return new SqoopHCatRecordReader(split, taskContext, this);
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:9,代码来源:SqoopHCatExportFormat.java
示例15: createHCatRecordReader
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
public RecordReader<WritableComparable, HCatRecord>
createHCatRecordReader(InputSplit split,
TaskAttemptContext taskContext)
throws IOException, InterruptedException {
LOG.debug("Creating a base HCatRecordReader");
return super.createRecordReader(split, taskContext);
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:8,代码来源:SqoopHCatExportFormat.java
示例16: runHCatImport
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
protected void runHCatImport(List<String> addlArgsArray,
int totalRecords, String table, ColumnGenerator[] cols,
String[] cNames, boolean dontCreate, boolean isQuery) throws Exception {
CreateMode mode = CreateMode.CREATE;
if (dontCreate) {
mode = CreateMode.NO_CREATION;
}
HCatSchema tblSchema =
utils.createHCatTable(mode, totalRecords, table, cols);
utils.createSqlTable(getConnection(), false, totalRecords, table, cols);
addlArgsArray.add("-m");
addlArgsArray.add("1");
addlArgsArray.add("--hcatalog-table");
addlArgsArray.add(table);
String[] colNames = null;
if (cNames != null) {
colNames = cNames;
} else {
colNames = new String[2 + cols.length];
colNames[0] = "ID";
colNames[1] = "MSG";
for (int i = 0; i < cols.length; ++i) {
colNames[2 + i] = cols[i].getName().toUpperCase();
}
}
String[] importArgs;
if (isQuery) {
importArgs = getQueryArgv(true, colNames, new Configuration());
} else {
importArgs = getArgv(true, colNames, new Configuration());
}
LOG.debug("Import args = " + Arrays.toString(importArgs));
SqoopHCatUtilities.instance().setConfigured(false);
runImport(new ImportTool(), importArgs);
List<HCatRecord> recs = utils.readHCatRecords(null, table, null);
LOG.debug("HCat records ");
LOG.debug(utils.hCatRecordDump(recs, tblSchema));
validateHCatRecords(recs, tblSchema, 10, cols);
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:40,代码来源:HCatalogImportTest.java
示例17: map
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
try {
HCatRecord rec = recsToLoad.get(writtenRecordCount);
context.write(null, rec);
writtenRecordCount++;
} catch (Exception e) {
if (LOG.isDebugEnabled()) {
e.printStackTrace(System.err);
}
throw new IOException(e);
}
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:16,代码来源:HCatalogTestUtils.java
示例18: readHCatRecords
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
/**
* Run a local map reduce job to read records from HCatalog table.
* @param readCount
* @param filter
* @return
* @throws Exception
*/
public List<HCatRecord> readHCatRecords(String dbName,
String tableName, String filter) throws Exception {
HCatReaderMapper.setReadRecordCount(0);
recsRead.clear();
// Configuration conf = new Configuration();
Job job = new Job(conf, "HCatalog reader job");
job.setJarByClass(this.getClass());
job.setMapperClass(HCatReaderMapper.class);
job.getConfiguration()
.setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1);
// input/output settings
job.setInputFormatClass(HCatInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
HCatInputFormat.setInput(job, dbName, tableName).setFilter(filter);
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(Text.class);
job.setNumReduceTasks(0);
Path path = new Path(fs.getWorkingDirectory(),
"mapreduce/HCatTableIndexOutput");
if (fs.exists(path)) {
fs.delete(path, true);
}
FileOutputFormat.setOutputPath(job, path);
job.waitForCompletion(true);
LOG.info("Read " + HCatReaderMapper.readRecordCount + " records");
return recsRead;
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:44,代码来源:HCatalogTestUtils.java
示例19: hCatRecordDump
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
public String hCatRecordDump(List<HCatRecord> recs,
HCatSchema schema) throws Exception {
List<String> fields = schema.getFieldNames();
int count = 0;
StringBuilder sb = new StringBuilder(1024);
for (HCatRecord rec : recs) {
sb.append("HCat Record : " + ++count).append('\n');
for (String field : fields) {
sb.append('\t').append(field).append('=');
sb.append(rec.get(field, schema)).append('\n');
sb.append("\n\n");
}
}
return sb.toString();
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:16,代码来源:HCatalogTestUtils.java
示例20: expand
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public PCollection<HCatRecord> expand(PBegin input) {
checkArgument(getTable() != null, "withTable() is required");
checkArgument(getConfigProperties() != null, "withConfigProperties() is required");
return input.apply(org.apache.beam.sdk.io.Read.from(new BoundedHCatalogSource(this)));
}
开发者ID:apache,项目名称:beam,代码行数:8,代码来源:HCatalogIO.java
注:本文中的org.apache.hive.hcatalog.data.HCatRecord类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论