本文整理汇总了Java中org.apache.crunch.types.PType类的典型用法代码示例。如果您正苦于以下问题:Java PType类的具体用法?Java PType怎么用?Java PType使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
PType类属于org.apache.crunch.types包,在下文中一共展示了PType类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: loadKeyedRecords
import org.apache.crunch.types.PType; //导入依赖的package包/类
@Override
public PTable<Tuple3<String, Long, String>, SpecificRecord>
loadKeyedRecords(String inputFormat, Path inputPath, Configuration conf,
Pipeline pipeline, boolean variantsOnly, boolean flatten, String sampleGroup,
Set<String> samples)
throws IOException {
PCollection<Pair<org.bdgenomics.formats.avro.Variant, Collection<Genotype>>> adamRecords
= readVariants(inputFormat, inputPath, conf, pipeline, sampleGroup);
// The data are now loaded into ADAM variant objects; convert to keyed SpecificRecords
ADAMToKeyedSpecificRecordFn converter =
new ADAMToKeyedSpecificRecordFn(variantsOnly, flatten, sampleGroup, samples);
@SuppressWarnings("unchecked")
PType<SpecificRecord> specificPType = Avros.specifics(converter.getSpecificRecordType());
return adamRecords.parallelDo("Convert to keyed SpecificRecords",
converter, Avros.tableOf(KEY_PTYPE, specificPType));
}
开发者ID:cloudera,项目名称:quince,代码行数:17,代码来源:ADAMVariantsLoader.java
示例2: readVariants
import org.apache.crunch.types.PType; //导入依赖的package包/类
private static PCollection<Pair<Variant, Collection<Genotype>>>
readVariants(String inputFormat, Path inputPath, Configuration conf,
Pipeline pipeline, String sampleGroup) throws IOException {
PCollection<Pair<Variant, Collection<Genotype>>> adamRecords;
if (inputFormat.equals("VCF")) {
TableSource<LongWritable, VariantContextWritable> vcfSource =
From.formattedFile(
inputPath, VCFInputFormat.class, LongWritable.class, VariantContextWritable.class);
PCollection<VariantContextWritable> vcfRecords = pipeline.read(vcfSource).values();
PType<Pair<Variant, Collection<Genotype>>> adamPType =
Avros.pairs(Avros.specifics(org.bdgenomics.formats.avro.Variant.class),
Avros.collections(Avros.specifics(Genotype.class)));
adamRecords =
vcfRecords.parallelDo("VCF to ADAM Variant", new VCFToADAMVariantFn(), adamPType);
} else if (inputFormat.equals("AVRO")) {
throw new UnsupportedOperationException("Unsupported input format: " + inputFormat);
} else if (inputFormat.equals("PARQUET")) {
throw new UnsupportedOperationException("Unsupported input format: " + inputFormat);
} else {
throw new IllegalStateException("Unrecognized input format: " + inputFormat);
}
return adamRecords;
}
开发者ID:cloudera,项目名称:quince,代码行数:24,代码来源:ADAMVariantsLoader.java
示例3: apply
import org.apache.crunch.types.PType; //导入依赖的package包/类
public <T> PCollection<Pair<Integer, T>> apply(PCollection<T> pcollect) {
PTypeFamily ptf = pcollect.getTypeFamily();
PType<Pair<Integer, T>> pt = ptf.pairs(ptf.ints(), pcollect.getPType());
return pcollect.parallelDo("crossfold", new MapFn<T, Pair<Integer, T>>() {
private transient RandomGenerator rand;
@Override
public void initialize() {
if (rand == null) {
this.rand = RandomManager.getSeededRandom(seed);
}
}
@Override
public Pair<Integer, T> map(T t) {
return Pair.of(rand.nextInt(numFolds), t);
}
}, pt);
}
开发者ID:apsaltis,项目名称:oryx,代码行数:21,代码来源:Crossfold.java
示例4: groupedWeightedSample
import org.apache.crunch.types.PType; //导入依赖的package包/类
public static <K, T, N extends Number> PTable<K, T> groupedWeightedSample(
PTable<K, Pair<T, N>> input,
int sampleSize,
RandomGenerator random) {
PTypeFamily ptf = input.getTypeFamily();
PType<K> keyType = input.getPTableType().getKeyType();
@SuppressWarnings("unchecked")
PType<T> ttype = (PType<T>) input.getPTableType().getValueType().getSubTypes().get(0);
PTableType<K, Pair<Double, T>> ptt = ptf.tableOf(keyType, ptf.pairs(ptf.doubles(), ttype));
// fill reservoirs by mapping over the vectors and re-emiting them; each map task emits at most sampleSize
// vectors per fold; the combiner/reducer will combine the outputs and pare down to sampleSize vectors total
PTable<K, Pair<Double, T>> samples = input.parallelDo("reservoirSampling",
new SampleFn<K, T, N>(sampleSize, random, ttype), ptt);
// pare down to just a single reservoir with sampleSize vectors
PTable<K, Pair<Double, T>> reservoir = samples.groupByKey(1).combineValues(new WRSCombineFn<K, T>(sampleSize, ttype));
// strip the weights off the final sampled reservoir and return
return reservoir.parallelDo("strippingSamplingWeights", new MapFn<Pair<K, Pair<Double, T>>, Pair<K, T>>() {
@Override
public Pair<K, T> map(Pair<K, Pair<Double, T>> p) {
return Pair.of(p.first(), p.second().second());
}
}, ptf.tableOf(keyType, ttype));
}
开发者ID:apsaltis,项目名称:oryx,代码行数:27,代码来源:ReservoirSampling.java
示例5: accept
import org.apache.crunch.types.PType; //导入依赖的package包/类
@Override
public boolean accept(final OutputHandler handler, final PType<?> pType) {
if (pType instanceof PTableType) {
PTableType pTableType = (PTableType) pType;
PType<?> keyType = pTableType.getKeyType();
PType<?> valueType = pTableType.getValueType();
List<PType> subTypes = valueType.getSubTypes();
if (ByteBuffer.class.equals(keyType.getTypeClass())
&& Collection.class.equals(valueType.getTypeClass())
&& subTypes.size() == 1
&& Mutation.class.equals(subTypes.get(0).getTypeClass())) {
handler.configure(this, pType);
return true;
}
}
return false;
}
开发者ID:spotify,项目名称:hdfs2cass,代码行数:19,代码来源:ThriftTarget.java
示例6: accept
import org.apache.crunch.types.PType; //导入依赖的package包/类
@Override
public boolean accept(OutputHandler handler, PType<?> ptype) {
if (ptype instanceof PTableType && ptype instanceof AvroType) {
if (String.class.equals(((PTableType) ptype).getKeyType().getTypeClass())) {
handler.configure(this, ptype);
return true;
}
}
return false;
}
开发者ID:cloudera,项目名称:quince,代码行数:11,代码来源:AvroParquetPathPerKeyTarget.java
示例7: configureForMapReduce
import org.apache.crunch.types.PType; //导入依赖的package包/类
@Override
public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
AvroType<?> atype = (AvroType) ((PTableType) ptype).getValueType();
String schemaParam;
if (name == null) {
schemaParam = "parquet.avro.schema";
} else {
schemaParam = "parquet.avro.schema" + "." + name;
}
FormatBundle fb = FormatBundle.forOutput(AvroParquetPathPerKeyOutputFormat.class);
fb.set(schemaParam, atype.getSchema().toString());
configureForMapReduce(job, Void.class, atype.getTypeClass(), fb, outputPath, name);
}
开发者ID:cloudera,项目名称:quince,代码行数:14,代码来源:AvroParquetPathPerKeyTarget.java
示例8: loadKeyedRecords
import org.apache.crunch.types.PType; //导入依赖的package包/类
@Override
public PTable<Tuple3<String, Long, String>, SpecificRecord>
loadKeyedRecords(String inputFormat, Path inputPath, Configuration conf,
Pipeline pipeline, boolean variantsOnly, boolean flatten, String sampleGroup,
Set<String> samples)
throws IOException {
PCollection<Variant> variants = readVariants(inputFormat, inputPath,
conf, pipeline, sampleGroup);
GA4GHToKeyedSpecificRecordFn converter =
new GA4GHToKeyedSpecificRecordFn(variantsOnly, flatten, sampleGroup, samples);
@SuppressWarnings("unchecked")
PType<SpecificRecord> specificPType = Avros.specifics(converter
.getSpecificRecordType());
return variants.parallelDo("Convert to keyed SpecificRecords",
converter, Avros.tableOf(KEY_PTYPE, specificPType));
}
开发者ID:cloudera,项目名称:quince,代码行数:18,代码来源:GA4GHVariantsLoader.java
示例9: createPipeline
import org.apache.crunch.types.PType; //导入依赖的package包/类
@Override
protected MRPipeline createPipeline() throws IOException {
JobStepConfig stepConfig = getConfig();
Config config = ConfigUtils.getDefaultConfig();
EvaluationSettings settings = EvaluationSettings.create(config);
String instanceDir = stepConfig.getInstanceDir();
long generationID = stepConfig.getGenerationID();
String prefix = Namespaces.getInstanceGenerationPrefix(instanceDir, generationID);
String outputKey = prefix + "eval/";
if (!validOutputPath(outputKey)) {
return null;
}
String inputKey = prefix + "weighted/weightedKSketchVectors/";
MRPipeline p = createBasicPipeline(ClosestSketchVectorFn.class);
PType<Pair<Integer, WeightedRealVector>> inputType = KMeansTypes.FOLD_WEIGHTED_VECTOR;
PCollection<Pair<Integer, WeightedRealVector>> weightedSketchVectors = p.read(avroInput(inputKey, inputType));
PCollection<KMeansEvaluationData> evaluation = weightedSketchVectors
.parallelDo("replicate",
new ReplicateValuesFn<Pair<Integer, WeightedRealVector>>(settings.getKValues(), settings.getReplications()),
Avros.tableOf(Avros.pairs(Avros.ints(), Avros.ints()), Avros.pairs(Avros.ints(), MLAvros.weightedVector())))
.groupByKey(settings.getParallelism())
.parallelDo("cluster",
new KMeansClusteringFn(settings),
Serializables.avro(KMeansEvaluationData.class));
// Write out the centers themselves to a text file
evaluation.parallelDo("replicaCenters", new CentersOutputFn(prefix), Avros.strings())
.write(compressedTextOutput(p.getConfiguration(), outputKey + "replicaCenters/"));
// Write out the per-replica stats
evaluation.parallelDo("replicaStats", new StatsOutputFn(), Avros.strings())
.write(compressedTextOutput(p.getConfiguration(), outputKey + "replicaStats/"));
return p;
}
开发者ID:apsaltis,项目名称:oryx,代码行数:39,代码来源:ClusteringStep.java
示例10: inputPairs
import org.apache.crunch.types.PType; //导入依赖的package包/类
protected final <V extends RealVector> PCollection<Pair<Integer, V>> inputPairs(
Pipeline p,
String inputKey,
PType<V> ptype) {
PType<Pair<Integer, V>> inputType = Avros.pairs(Avros.ints(), ptype);
return p.read(avroInput(inputKey, inputType));
}
开发者ID:apsaltis,项目名称:oryx,代码行数:8,代码来源:KMeansJobStep.java
示例11: vectorRecord
import org.apache.crunch.types.PType; //导入依赖的package包/类
public static PType<Record> vectorRecord(PType<RealVector> ptype, boolean sparse) {
return ptype.getFamily().derived(Record.class,
new MapFn<RealVector, Record>() {
@Override
public Record map(RealVector v) {
return new VectorRecord(v);
}
},
new Record2VectorFn(sparse),
ptype);
}
开发者ID:apsaltis,项目名称:oryx,代码行数:12,代码来源:MLRecords.java
示例12: sample
import org.apache.crunch.types.PType; //导入依赖的package包/类
public static <T> PCollection<T> sample(
PCollection<T> input,
int sampleSize,
RandomGenerator random) {
PTypeFamily ptf = input.getTypeFamily();
PType<Pair<T, Integer>> ptype = ptf.pairs(input.getPType(), ptf.ints());
return weightedSample(
input.parallelDo(new MapFn<T, Pair<T, Integer>>() {
@Override
public Pair<T, Integer> map(T t) { return Pair.of(t, 1); }
}, ptype),
sampleSize,
random);
}
开发者ID:apsaltis,项目名称:oryx,代码行数:15,代码来源:ReservoirSampling.java
示例13: accept
import org.apache.crunch.types.PType; //导入依赖的package包/类
@Override
public boolean accept(final OutputHandler handler, final PType<?> pType) {
if (pType instanceof PTableType) {
final PTableType<?, ?> pTableType = (PTableType<?, ?>) pType;
PType<?> keyType = pTableType.getKeyType();
PType<?> valueType = pTableType.getValueType();
if (ByteBuffer.class.equals(keyType.getTypeClass())
&& CQLRecord.class.equals(valueType.getTypeClass())) {
handler.configure(this, pType);
return true;
}
}
return false;
}
开发者ID:spotify,项目名称:hdfs2cass,代码行数:15,代码来源:CQLTarget.java
示例14: DetachFunction
import org.apache.crunch.types.PType; //导入依赖的package包/类
public DetachFunction(PType<T> initializedPType) {
this.pType = initializedPType;
}
开发者ID:spotify,项目名称:crunch-lib,代码行数:4,代码来源:DoFns.java
示例15: DetachingDoFn
import org.apache.crunch.types.PType; //导入依赖的package包/类
public DetachingDoFn(DoFn<Pair<K, Iterable<V>>, T> reduceFn, PType<V> valueType) {
this.reduceFn = reduceFn;
this.valueType = valueType;
}
开发者ID:spotify,项目名称:crunch-lib,代码行数:5,代码来源:DoFns.java
示例16: detachIterable
import org.apache.crunch.types.PType; //导入依赖的package包/类
public Iterable<V> detachIterable(Iterable<V> iterable, final PType<V> pType) {
return Iterables.transform(iterable, new DetachFunction<V>(pType));
}
开发者ID:spotify,项目名称:crunch-lib,代码行数:4,代码来源:DoFns.java
示例17: BAMFileSource
import org.apache.crunch.types.PType; //导入依赖的package包/类
public BAMFileSource(Path path, PType<SAMRecord> ptype) {
super(path, ptype, AnySAMInputFormat.class);
}
开发者ID:arahuja,项目名称:varcrunch,代码行数:4,代码来源:BAMFileSource.java
示例18: YState
import org.apache.crunch.types.PType; //导入依赖的package包/类
YState(PType<MatrixRow> ptype) {
this.ptype = ptype;
}
开发者ID:apsaltis,项目名称:oryx,代码行数:4,代码来源:YState.java
示例19: CoMomentKeyFn
import org.apache.crunch.types.PType; //导入依赖的package包/类
CoMomentKeyFn(PType<K> keyType) {
this.keyType = keyType;
}
开发者ID:apsaltis,项目名称:oryx,代码行数:4,代码来源:CoMomentKeyFn.java
示例20: inputVectors
import org.apache.crunch.types.PType; //导入依赖的package包/类
protected final <V extends RealVector> PCollection<V> inputVectors(Pipeline p, String inputKey, PType<V> ptype) {
return PTables.asPTable(inputPairs(p, inputKey, ptype)).values();
}
开发者ID:apsaltis,项目名称:oryx,代码行数:4,代码来源:KMeansJobStep.java
注:本文中的org.apache.crunch.types.PType类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论