本文整理汇总了Java中org.apache.avro.mapred.Pair类的典型用法代码示例。如果您正苦于以下问题:Java Pair类的具体用法?Java Pair怎么用?Java Pair使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Pair类属于org.apache.avro.mapred包,在下文中一共展示了Pair类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: setup
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
protected void setup(Context context) throws InterruptedException, IOException {
super.setup(context);
Configuration conf = context.getConfiguration();
final String userClassName = conf.get(MergeJob.MERGE_SQOOP_RECORD_KEY);
try {
final Class<? extends Object> clazz = Class.forName(userClassName, true,
Thread.currentThread().getContextClassLoader());
sqoopRecordImpl = (SqoopRecord) ReflectionUtils.newInstance(clazz, conf);
for (final Field field : clazz.getDeclaredFields()) {
final String fieldName = field.getName();
final String fieldTypeName = field.getType().getName();
sqoopRecordFields.put(fieldName.toLowerCase(), new Pair<String, String>(fieldName,
fieldTypeName));
}
} catch (ClassNotFoundException e) {
throw new IOException("Cannot find the user record class with class name"
+ userClassName, e);
}
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:21,代码来源:MergeAvroMapper.java
示例2: write
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public void write(AvroWrapper<Pair<Utf8, T>> record, NullWritable n) throws IOException, InterruptedException {
String key = record.datum().key().toString();
if (!key.equals(currentKey)) {
if (currentWriter != null) {
currentWriter.close(null); // TaskAttemptContext not used for close
}
currentKey = key;
Path dir = new Path(basePath, key);
FileSystem fs = dir.getFileSystem(conf);
if (!fs.exists(dir)) {
fs.mkdirs(dir);
}
currentWriter = (RecordWriter<Void, T>)
new AvroParquetFileTarget.CrunchAvroParquetOutputFormat().getRecordWriter(conf,
new Path(dir, uniqueFileName),
AvroParquetFileTarget.CrunchAvroParquetOutputFormat.getCompression(conf));
}
currentWriter.write(null, record.datum().value());
}
开发者ID:cloudera,项目名称:quince,代码行数:22,代码来源:AvroParquetPathPerKeyOutputFormat.java
示例3: reduce
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public void reduce(Utf8 key,
Iterable<RegressionTestLoglikOutput> values,
AvroCollector<Pair<Utf8, RegressionTestLoglikOutput>> collector,
Reporter reporter) throws IOException
{
double sumLoglik = 0;
double n = 0;
for (RegressionTestLoglikOutput value : values)
{
float loglik = value.testLoglik;
sumLoglik += loglik;
n += value.count;
}
RegressionTestLoglikOutput output = new RegressionTestLoglikOutput();
output.key = key;
output.testLoglik = (float) sumLoglik;
output.count = n;
collector.collect(new Pair<Utf8, RegressionTestLoglikOutput>(key, output));
}
开发者ID:linkedin,项目名称:ml-ease,代码行数:21,代码来源:ItemModelTestLoglik.java
示例4: map
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public void map(RegressionPrepareOutput data,
AvroCollector<Pair<Integer, RegressionPrepareOutput>> collector,
Reporter reporter) throws IOException
{
Integer key = Integer.parseInt(data.key.toString());
for (int i = 0; i < _lambdaRhoConsumer.get().size(); i++)
{
int newkey = key * _lambdaRhoConsumer.get().size() + i;
// String newkey = String.valueOf(lambda)+"#"+key;
data.key = String.valueOf(newkey);
Pair<Integer, RegressionPrepareOutput> outPair =
new Pair<Integer, RegressionPrepareOutput>(newkey, data);
collector.collect(outPair);
}
}
开发者ID:linkedin,项目名称:ml-ease,代码行数:17,代码来源:RegressionAdmmTrain.java
示例5: reduce
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public void reduce(Utf8 key,
Iterable<RegressionTestLoglikOutput> values,
AvroCollector<Pair<Utf8, RegressionTestLoglikOutput>> collector,
Reporter reporter) throws IOException
{
double sumLoglik = 0;
double n = 0;
for (RegressionTestLoglikOutput value : values)
{
float loglik = value.testLoglik;
sumLoglik += loglik;
n += value.count;
}
RegressionTestLoglikOutput output = new RegressionTestLoglikOutput();
output.key = "averageTestLoglik";
output.testLoglik = (float) sumLoglik;
output.count = n;
collector.collect(new Pair<Utf8, RegressionTestLoglikOutput>(key, output));
}
开发者ID:linkedin,项目名称:ml-ease,代码行数:21,代码来源:RegressionTestLoglik.java
示例6: run
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
JobConf job = new JobConf(getConf(), ModelNameCountApp.class);
job.setJobName(JOB_NAME);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
AvroJob.setMapperClass(job, ModelCountMapper.class);
AvroJob.setReducerClass(job, ModelCountReducer.class);
AvroJob.setInputSchema(job, Automobile.getClassSchema());
AvroJob.setOutputSchema(
job,
Pair.getPairSchema(Schema.create(Type.STRING),
Schema.create(Type.INT)));
JobClient.runJob(job);
return 0;
}
开发者ID:nathanagood,项目名称:examples-apahce-avro,代码行数:24,代码来源:ModelNameCountApp.java
示例7: toSqoopRecord
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
private SqoopRecord toSqoopRecord(GenericRecord genericRecord) throws IOException {
Schema avroSchema = genericRecord.getSchema();
for (Schema.Field field : avroSchema.getFields()) {
Pair<String, String> sqoopRecordField = sqoopRecordFields.get(field.name().toLowerCase());
if (null == sqoopRecordField) {
throw new IOException("Cannot find field '" + field.name() + "' in fields of user class"
+ sqoopRecordImpl.getClass().getName() + ". Fields are: "
+ Arrays.deepToString(sqoopRecordFields.values().toArray()));
}
Object avroObject = genericRecord.get(field.name());
Object fieldVal = AvroUtil.fromAvro(avroObject, field.schema(), sqoopRecordField.value());
sqoopRecordImpl.setField(sqoopRecordField.key(), fieldVal);
}
return sqoopRecordImpl;
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:16,代码来源:MergeAvroMapper.java
示例8: map
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public void map(GenericData.Record user,
AvroCollector<Pair<CharSequence, Integer>> collector, Reporter reporter)
throws IOException {
CharSequence color = (CharSequence) user.get("favorite_color");
// We need this check because the User.favorite_color field has
// type ["string", "null"]
if (color == null) {
color = "none";
}
PAIR.key(color);
collector.collect(PAIR);
}
开发者ID:cloudera,项目名称:RecordServiceClient,代码行数:15,代码来源:MapredColorCount.java
示例9: reduce
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public void reduce(CharSequence key, Iterable<Integer> values,
AvroCollector<Pair<CharSequence, Integer>> collector, Reporter reporter)
throws IOException {
int sum = 0;
for (Integer value : values) {
sum += value;
}
PAIR.set(key, sum);
collector.collect(PAIR);
}
开发者ID:cloudera,项目名称:RecordServiceClient,代码行数:13,代码来源:MapredColorCount.java
示例10: map
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public void map(GenericData.Record user,
AvroCollector<Pair<CharSequence, Integer>> collector, Reporter reporter)
throws IOException {
CharSequence color = (CharSequence) user.get("favorite_color");
if (color == null) {
color = "none";
}
PAIR.key(color);
collector.collect(PAIR);
}
开发者ID:cloudera,项目名称:RecordServiceClient,代码行数:13,代码来源:ColorCount.java
示例11: countColors
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
/**
* Run the MR1 color count with generic records, and return a map of favorite colors to
* the number of users.
*/
public static java.util.Map<String, Integer> countColors() throws IOException {
String output = TestUtil.getTempDirectory();
Path outputPath = new Path(output);
JobConf conf = new JobConf(ColorCount.class);
conf.setJobName("MR1 Color Count With Generic Records");
conf.setInt("mapreduce.job.reduces", 1);
conf.setBoolean(
com.cloudera.recordservice.avro.AvroJob.USE_RECORD_SERVICE_INPUT_FORMAT_CONF_KEY,
true);
com.cloudera.recordservice.avro.AvroJob.setInputFormat(conf,
org.apache.avro.mapred.AvroInputFormat.class);
RecordServiceConfig.setInputTable(conf, "rs", "users");
FileOutputFormat.setOutputPath(conf, outputPath);
AvroJob.setMapperClass(conf, Map.class);
AvroJob.setReducerClass(conf, Reduce.class);
AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Schema.Type.STRING),
Schema.create(Schema.Type.INT)));
JobClient.runJob(conf);
// Read the result and return it. Since we set the number of reducers to 1,
// there is always just one file containing the value.
SeekableInput input = new FsInput(new Path(output + "/part-00000.avro"), conf);
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
java.util.Map<String, Integer> colorMap = new HashMap<String, Integer>();
for (GenericRecord datum: fileReader) {
colorMap.put(datum.get(0).toString(), Integer.parseInt(datum.get(1).toString()));
}
return colorMap;
}
开发者ID:cloudera,项目名称:RecordServiceClient,代码行数:40,代码来源:ColorCount.java
示例12: getRecordWriter
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public RecordWriter<AvroWrapper<Pair<Utf8, T>>, NullWritable> getRecordWriter(TaskAttemptContext taskAttemptContext)
throws IOException, InterruptedException {
Configuration conf = taskAttemptContext.getConfiguration();
Path basePath = new Path(getOutputPath(taskAttemptContext), conf.get("mapreduce.output.basename", "part"));
return new AvroParquetFilePerKeyRecordWriter<T>(basePath,
getUniqueFile(taskAttemptContext, "part", ".parquet"), conf);
}
开发者ID:cloudera,项目名称:quince,代码行数:9,代码来源:AvroParquetPathPerKeyOutputFormat.java
示例13: map
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public void map(GenericData.Record data,
AvroCollector<Pair<String, GenericData.Record>> collector,
Reporter reporter) throws IOException
{
if (data.get(_itemKey) == null)
{
throw new IOException("data does not contain the column" + _itemKey);
}
String itemKey = data.get(_itemKey).toString();
collector.collect(new Pair<String, GenericData.Record>(itemKey, data));
}
开发者ID:linkedin,项目名称:ml-ease,代码行数:13,代码来源:ItemModelTest.java
示例14: createJobConf
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
private JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
Class<? extends AvroReducer> reducerClass) throws IOException, URISyntaxException
{
JobConf conf = createJobConf();
Schema inputSchema = Util.removeUnion(AvroUtils.getAvroInputSchema(conf));
if (inputSchema == null)
{
throw new IllegalStateException("Input does not have schema info and/or input is missing.");
}
_logger.info("Input Schema=" + inputSchema.toString());
List<Schema.Field> inputFields = inputSchema.getFields();
Schema.Field predField =
new Schema.Field("pred", Schema.create(Type.FLOAT), "", null);
List<Schema.Field> outputFields = new LinkedList<Schema.Field>();
for (Schema.Field field : inputFields)
{
outputFields.add(new Schema.Field(field.name(),
field.schema(),
field.doc(),
null));
}
outputFields.add(predField);
Schema outputSchema =
Schema.createRecord("PerItemTestOutput",
"Test output for PerItemTest",
"com.linkedin.lab.regression.avro",
false);
outputSchema.setFields(outputFields);
AvroJob.setOutputSchema(conf, outputSchema);
AvroJob.setMapOutputSchema(conf,
Pair.getPairSchema(Schema.create(Type.STRING), inputSchema));
AvroJob.setMapperClass(conf, mapperClass);
AvroJob.setReducerClass(conf, reducerClass);
return conf;
}
开发者ID:linkedin,项目名称:ml-ease,代码行数:36,代码来源:ItemModelTest.java
示例15: map
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public void map(GenericData.Record data,
AvroCollector<Pair<Float, GenericData.Record>> collector,
Reporter reporter) throws IOException
{
LinearModel model;
if (_lambda >= 0)
{
model = _modelConsumer.get().get(String.valueOf(_lambda));
}
else
{
// lambda should be -1 and it should include only 1 model which is the best-model
// found in train
Iterator<LinearModel> iter = _modelConsumer.get().values().iterator();
model = iter.next();
}
float pred = (float) model.evalInstanceAvro(data, false, _ignoreValue);
GenericData.Record output = new GenericData.Record(_outputSchema);
List<Schema.Field> inputFields = data.getSchema().getFields();
for (Schema.Field field : inputFields)
{
output.put(field.name(), data.get(field.name()));
_logger.info(field.name() + ": " + data.get(field.name()));
}
output.put("pred", pred);
Pair<Float, GenericData.Record> outPair =
new Pair<Float, GenericData.Record>(pred, output);
collector.collect(outPair);
}
开发者ID:linkedin,项目名称:ml-ease,代码行数:31,代码来源:RegressionTest.java
示例16: createJobConf
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
private JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
Class<? extends AvroReducer> reducerClass) throws IOException, URISyntaxException
{
JobConf conf = createJobConf();
Schema inputSchema = Util.removeUnion(AvroUtils.getAvroInputSchema(conf));
if (inputSchema == null)
{
throw new IllegalStateException("Input does not have schema info and/or input is missing.");
}
_logger.info("Input Schema=" + inputSchema.toString());
List<Schema.Field> inputFields = inputSchema.getFields();
Schema.Field predField =
new Schema.Field("pred", Schema.create(Type.FLOAT), "", null);
List<Schema.Field> outputFields = new LinkedList<Schema.Field>();
for (Schema.Field field : inputFields)
{
outputFields.add(new Schema.Field(field.name(),
field.schema(),
field.doc(),
null));
}
outputFields.add(predField);
Schema outputSchema =
Schema.createRecord("AdmmTestOutput",
"Test output for AdmmTest",
"com.linkedin.lab.regression.avro",
false);
outputSchema.setFields(outputFields);
AvroJob.setOutputSchema(conf, outputSchema);
AvroJob.setMapOutputSchema(conf,
Pair.getPairSchema(Schema.create(Type.FLOAT), outputSchema));
AvroJob.setMapperClass(conf, mapperClass);
AvroJob.setReducerClass(conf, reducerClass);
return conf;
}
开发者ID:linkedin,项目名称:ml-ease,代码行数:36,代码来源:RegressionTest.java
示例17: map
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public void map(RegressionPrepareOutput data,
AvroCollector<Pair<String, RegressionPrepareOutput>> collector,
Reporter reporter) throws IOException
{
String key = data.key.toString();
for (float lambda : _lambdaSet)
{
String newkey = String.valueOf(lambda) + "#" + key;
data.key = newkey;
Pair<String, RegressionPrepareOutput> outPair =
new Pair<String, RegressionPrepareOutput>(newkey, data);
collector.collect(outPair);
}
}
开发者ID:linkedin,项目名称:ml-ease,代码行数:16,代码来源:RegressionNaiveTrain.java
示例18: run
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public void run() throws Exception
{
JobConfig props = super.getJobConfig();
JobConf conf = super.createJobConf(ItemModelTestLoglikMapper.class,
ItemModelTestLoglikReducer.class,
ItemModelTestLoglikCombiner.class,
Pair.getPairSchema(Schema.create(Type.STRING),
RegressionTestLoglikOutput.SCHEMA$),
RegressionTestLoglikOutput.SCHEMA$);
AvroUtils.runAvroJob(conf);
}
开发者ID:linkedin,项目名称:ml-ease,代码行数:13,代码来源:ItemModelTestLoglik.java
示例19: map
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public void map(GenericData.Record data,
AvroCollector<Pair<String, RegressionTestLoglikOutput>> collector,
Reporter reporter) throws IOException
{
int response = Util.getIntAvro(data, "response");
Map<Utf8, Float> pred = (Map<Utf8, Float>) data.get("pred");
double weight = 1;
if (data.get("weight")!=null)
{
weight = Util.getDoubleAvro(data, "weight");
}
if (response != 1 && response != 0 && response != -1)
{
throw new IOException("response should be 1,0 or -1!");
}
for (Utf8 k : pred.keySet())
{
double loglik = 0;
if (response == 1)
{
loglik = -Math.log1p(Math.exp(-pred.get(k))) * weight;
}
else
{
loglik = -Math.log1p(Math.exp(pred.get(k))) * weight;
}
RegressionTestLoglikOutput output = new RegressionTestLoglikOutput();
output.key = k;
output.testLoglik = (float) loglik;
output.count = weight;
collector.collect(new Pair<String, RegressionTestLoglikOutput>(k.toString(), output));
}
}
开发者ID:linkedin,项目名称:ml-ease,代码行数:35,代码来源:ItemModelTestLoglik.java
示例20: map
import org.apache.avro.mapred.Pair; //导入依赖的package包/类
@Override
public void map(RegressionPrepareOutput data,
AvroCollector<Pair<String, Integer>> collector,
Reporter reporter) throws IOException
{
String key = data.key.toString();
for (float lambda : _lambdaSet)
{
String newkey = String.valueOf(lambda) + "#" + key;
data.key = newkey;
Pair<String, Integer> outPair = new Pair<String, Integer>(newkey, 1);
collector.collect(outPair);
}
}
开发者ID:linkedin,项目名称:ml-ease,代码行数:15,代码来源:PartitionIdAssigner.java
注:本文中的org.apache.avro.mapred.Pair类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论