本文整理汇总了Java中org.apache.pig.data.NonSpillableDataBag类的典型用法代码示例。如果您正苦于以下问题:Java NonSpillableDataBag类的具体用法?Java NonSpillableDataBag怎么用?Java NonSpillableDataBag使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
NonSpillableDataBag类属于org.apache.pig.data包,在下文中一共展示了NonSpillableDataBag类的18个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: exec
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@SuppressWarnings("unchecked")
@Override
public DataBag exec(Tuple input) throws IOException {
if(input == null || input.size() == 0) {
return null;
}
Map<String, Object> m = null;
//Input must be of type Map. This is verified at compile time
m = (Map<String, Object>)(input.get(0));
if(m == null) {
return null;
}
Collection c = m.values();
DataBag bag = new NonSpillableDataBag(c.size());
Iterator<Object> iter = c.iterator();
while(iter.hasNext()) {
Tuple t = TUPLE_FACTORY.newTuple(iter.next());
bag.add(t);
}
return bag;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:24,代码来源:VALUELIST.java
示例2: exec
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@SuppressWarnings("unchecked")
@Override
public DataBag exec(Tuple input) throws IOException {
if(input == null || input.size() == 0) {
return null;
}
Map<String, Object> m = null;
//Input must be of type Map. This is verified at compile time
m = (Map<String, Object>)(input.get(0));
if(m == null) {
return null;
}
DataBag bag = new NonSpillableDataBag(m.size());
for (String s : m.keySet()) {
Tuple t = TUPLE_FACTORY.newTuple(s);
bag.add(t);
}
return bag;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:23,代码来源:KEYSET.java
示例3: exec
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Override
public DataBag exec(Tuple input) throws IOException {
try {
// The assumption is that if the bag contents fits into
// an input tuple, it will not need to be spilled.
DataBag bag = new NonSpillableDataBag(input.size());
for (int i = 0; i < input.size(); ++i) {
final Object object = input.get(i);
if (object instanceof Tuple) {
bag.add( (Tuple) object);
} else {
Tuple tp2 = TupleFactory.getInstance().newTuple(1);
tp2.set(0, object);
bag.add(tp2);
}
}
return bag;
} catch (Exception ee) {
throw new RuntimeException("Error while creating a bag", ee);
}
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:24,代码来源:TOBAG.java
示例4: testNonSpillableDataBag
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Test
public void testNonSpillableDataBag() throws Exception {
String[][] tupleContents = new String[][] {{"a", "b"},{"c", "d" }, { "e", "f"} };
NonSpillableDataBag bg = new NonSpillableDataBag();
for (int i = 0; i < tupleContents.length; i++) {
bg.add(Util.createTuple(tupleContents[i]));
}
Iterator<Tuple> it = bg.iterator();
int j = 0;
while(it.hasNext()) {
Tuple t = it.next();
assertEquals(Util.createTuple(tupleContents[j]), t);
j++;
}
assertEquals(tupleContents.length, j);
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:17,代码来源:TestDataBagAccess.java
示例5: tuple
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
private static Tuple tuple() throws ExecException {
TupleFactory tf = TupleFactory.getInstance();
Tuple t = tf.newTuple(TOP_LEVEL_COLS);
for (int i = 0; i < TOP_LEVEL_COLS; i++) {
Tuple ti = tf.newTuple(10);
for (int j = 0; j < 4; j++) {
ti.set(j, "foo"+i+","+j);
}
for (int k = 0; k < 4; k++) {
ti.set(4+k, (long)k);
}
for (int l = 0; l < 2; l++) {
DataBag bag = new NonSpillableDataBag();
for (int m = 0; m < 10; m++) {
bag.add(tf.newTuple((Object)new Integer(m)));
}
ti.set(8+l, bag);
}
t.set(i, ti);
}
return t;
}
开发者ID:apache,项目名称:parquet-mr,代码行数:23,代码来源:TupleConsumerPerfTest.java
示例6: POFRJoin
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
public POFRJoin(OperatorKey k, int rp, List<PhysicalOperator> inp,
List<List<PhysicalPlan>> ppLists, List<List<Byte>> keyTypes,
FileSpec[] replFiles, int fragment, boolean isLeftOuter,
Tuple nullTuple)
throws ExecException {
super(k, rp, inp);
phyPlanLists = ppLists;
this.fragment = fragment;
this.keyTypes = keyTypes;
this.replFiles = replFiles;
replicates = new Map[ppLists.size()];
LRs = new POLocalRearrange[ppLists.size()];
constExps = new ConstantExpression[ppLists.size()];
createJoinPlans(k);
processingPlan = false;
mTupleFactory = TupleFactory.getInstance();
List<Tuple> tupList = new ArrayList<Tuple>();
tupList.add(nullTuple);
nullBag = new NonSpillableDataBag(tupList);
this.isLeftOuterJoin = isLeftOuter;
}
开发者ID:PonIC,项目名称:PonIC,代码行数:23,代码来源:POFRJoin.java
示例7: exec
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@SuppressWarnings("unchecked")
@Override
public DataBag exec(Tuple input) throws IOException {
if (input == null || input.size() == 0) {
return null;
}
Map<String, Object> m = null;
// Input must be of type Map. This is verified at compile time
m = (Map<String, Object>) (input.get(0));
if (m == null) {
return null;
}
int initialSetSize = getInitialSetSize(m.values());
Set<Object> uniqueElements = new HashSet<Object>(initialSetSize);
DataBag bag = new NonSpillableDataBag();
Iterator<Object> iter = m.values().iterator();
while (iter.hasNext()) {
Object val = iter.next();
if (!uniqueElements.contains(val)) {
uniqueElements.add(val);
Tuple t = TUPLE_FACTORY.newTuple(val);
bag.add(t);
}
}
return bag;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:32,代码来源:VALUESET.java
示例8: getNextDataBag
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Override
public Result getNextDataBag() throws ExecException {
Result input = processInputBag();
// if this is called during accumulation, it is ok to have an empty bag
// we need to send STATUS_OK so that the UDF can be called.
if (isAccumulative()) {
reset();
}
if(input.returnStatus!=POStatus.STATUS_OK) {
if(input.returnStatus == POStatus.STATUS_EOP && sendEmptyBagOnEOP) {
// we received an EOP from the predecessor
// since the successor in the pipeline is
// expecting a bag, send an empty bag
input.result = new NonSpillableDataBag();
input.returnStatus = POStatus.STATUS_OK;
// we should send EOP the next time we are called
// if the foreach in which this operator is present
// calls this.getNext(bag) with new inputs then
// this flag will be reset in this.reset()
} else {
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return input;
}
}
Result r = consumeInputBag(input);
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return(r);
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:37,代码来源:PORelationToExprProject.java
示例9: createDataBag
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
private DataBag createDataBag(int numBags) {
String bagType = null;
if (PigMapReduce.sJobConfInternal.get() != null) {
bagType = PigMapReduce.sJobConfInternal.get().get("pig.cachedbag.type");
}
if (bagType != null && bagType.equalsIgnoreCase("default")) {
return new NonSpillableDataBag();
}
return new InternalCachedBag(numBags);
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:12,代码来源:POCombinerPackage.java
示例10: POFRJoin
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
public POFRJoin(OperatorKey k, int rp, List<PhysicalOperator> inp,
List<List<PhysicalPlan>> ppLists, List<List<Byte>> keyTypes,
FileSpec[] replFiles, int fragment, boolean isLeftOuter,
Tuple nullTuple,
Schema[] inputSchemas,
Schema[] keySchemas)
throws ExecException {
super(k, rp, inp);
phyPlanLists = ppLists;
this.fragment = fragment;
this.keyTypes = keyTypes;
this.replFiles = replFiles;
replicates = new TupleToMapKey[ppLists.size()];
LRs = new POLocalRearrange[ppLists.size()];
constExps = new ConstantExpression[ppLists.size()];
createJoinPlans(k);
processingPlan = false;
mTupleFactory = TupleFactory.getInstance();
List<Tuple> tupList = new ArrayList<Tuple>();
tupList.add(nullTuple);
nullBag = new NonSpillableDataBag(tupList);
this.isLeftOuterJoin = isLeftOuter;
if (inputSchemas != null) {
this.inputSchemas = inputSchemas;
} else {
this.inputSchemas = new Schema[replFiles == null ? 0 : replFiles.length];
}
if (keySchemas != null) {
this.keySchemas = keySchemas;
} else {
this.keySchemas = new Schema[replFiles == null ? 0 : replFiles.length];
}
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:35,代码来源:POFRJoin.java
示例11: getNextDataBag
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Override
public Result getNextDataBag() throws ExecException {
Result input = processInputBag();
// if this is called during accumulation, it is ok to have an empty bag
// we need to send STATUS_OK so that the UDF can be called.
if (isAccumulative()) {
reset();
}
if(input.returnStatus!=POStatus.STATUS_OK) {
if(input.returnStatus == POStatus.STATUS_NULL){
return input;
} else if (input.returnStatus == POStatus.STATUS_EOP && sendEmptyBagOnEOP) {
// we received an EOP from the predecessor
// since the successor in the pipeline is
// expecting a bag, send an empty bag
input.result = new NonSpillableDataBag();
input.returnStatus = POStatus.STATUS_OK;
// we should send EOP the next time we are called
// if the foreach in which this operator is present
// calls this.getNext(bag) with new inputs then
// this flag will be reset in this.reset()
} else {
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return input;
}
}
Result r = consumeInputBag(input);
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return(r);
}
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:39,代码来源:PORelationToExprProject.java
示例12: createDataBag
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
private DataBag createDataBag(int numBags) {
if (!initialized) {
initialized = true;
if (PigMapReduce.sJobConfInternal.get() != null) {
String bagType = PigMapReduce.sJobConfInternal.get().get(PigConfiguration.PIG_CACHEDBAG_TYPE);
if (bagType != null && bagType.equalsIgnoreCase("default")) {
useDefaultBag = true;
}
}
}
return useDefaultBag ? new NonSpillableDataBag() : new InternalCachedBag(numBags);
}
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:13,代码来源:CombinerPackager.java
示例13: generateRandomSortedSamples
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
private DataBag generateRandomSortedSamples(int numSamples, int max) throws Exception {
Random rand = new Random(1000);
List<Tuple> samples = new ArrayList<Tuple>();
for (int i=0; i<numSamples; i++) {
Tuple t = tFact.newTuple(1);
t.set(0, rand.nextInt(max));
samples.add(t);
}
Collections.sort(samples);
return new NonSpillableDataBag(samples);
}
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:12,代码来源:TestFindQuantiles.java
示例14: getNext
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Override
public Result getNext(DataBag db) throws ExecException {
Result input = processInputBag();
// if this is called during accumulation, it is ok to have an empty bag
// we need to send STATUS_OK so that the UDF can be called.
if (isAccumulative()) {
reset();
}
if(input.returnStatus!=POStatus.STATUS_OK) {
if(input.returnStatus == POStatus.STATUS_EOP && sendEmptyBagOnEOP) {
// we received an EOP from the predecessor
// since the successor in the pipeline is
// expecting a bag, send an empty bag
input.result = new NonSpillableDataBag();
input.returnStatus = POStatus.STATUS_OK;
// we should send EOP the next time we are called
// if the foreach in which this operator is present
// calls this.getNext(bag) with new inputs then
// this flag will be reset in this.reset()
} else {
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return input;
}
}
Result r = consumeInputBag(input);
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return(r);
}
开发者ID:PonIC,项目名称:PonIC,代码行数:37,代码来源:PORelationToExprProject.java
示例15: getNext
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Override
public Result getNext(DataBag db) throws ExecException {
Result input = processInputBag();
// if this is called during accumulation, it is ok to have an empty bag
// we need to send STATUS_OK so that the UDF can be called.
if (isAccumulative()) {
reset();
}
if(input.returnStatus!=SOStatus.STATUS_OK) {
if(input.returnStatus == SOStatus.STATUS_EOP && sendEmptyBagOnEOP) {
// we received an EOP from the predecessor
// since the successor in the pipeline is
// expecting a bag, send an empty bag
input.result = new NonSpillableDataBag();
input.returnStatus = SOStatus.STATUS_OK;
// we should send EOP the next time we are called
// if the foreach in which this operator is present
// calls this.getNext(bag) with new inputs then
// this flag will be reset in this.reset()
} else {
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return input;
}
}
Result r = consumeInputBag(input);
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return(r);
}
开发者ID:PonIC,项目名称:PonIC,代码行数:37,代码来源:PORelationToExprProject.java
示例16: addEmptyBagOuterJoin
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
public static void addEmptyBagOuterJoin(PhysicalPlan fePlan, Schema inputSchema) throws PlanException {
// we currently have POProject[bag] as the only operator in the plan
// If the bag is an empty bag, we should replace
// it with a bag with one tuple with null fields so that when we flatten
// we do not drop records (flatten will drop records if the bag is left
// as an empty bag) and actually project nulls for the fields in
// the empty bag
// So we need to get to the following state:
// POProject[Bag]
// \
// POUserFunc["IsEmpty()"] Const[Bag](bag with null fields)
// \ | POProject[Bag]
// \ | /
// POBinCond
POProject relationProject = (POProject) fePlan.getRoots().get(0);
try {
// condition of the bincond
POProject relationProjectForIsEmpty = relationProject.clone();
fePlan.add(relationProjectForIsEmpty);
String scope = relationProject.getOperatorKey().scope;
FuncSpec isEmptySpec = new FuncSpec(IsEmpty.class.getName());
Object f = PigContext.instantiateFuncFromSpec(isEmptySpec);
POUserFunc isEmpty = new POUserFunc(new OperatorKey(scope, NodeIdGenerator.getGenerator().
getNextNodeId(scope)), -1, null, isEmptySpec, (EvalFunc) f);
isEmpty.setResultType(DataType.BOOLEAN);
fePlan.add(isEmpty);
fePlan.connect(relationProjectForIsEmpty, isEmpty);
// lhs of bincond (const bag with null fields)
ConstantExpression ce = new ConstantExpression(new OperatorKey(scope,
NodeIdGenerator.getGenerator().getNextNodeId(scope)));
// the following should give a tuple with the
// required number of nulls
Tuple t = TupleFactory.getInstance().newTuple(inputSchema.size());
for(int i = 0; i < inputSchema.size(); i++) {
t.set(i, null);
}
List<Tuple> bagContents = new ArrayList<Tuple>(1);
bagContents.add(t);
DataBag bg = new NonSpillableDataBag(bagContents);
ce.setValue(bg);
ce.setResultType(DataType.BAG);
//this operator doesn't have any predecessors
fePlan.add(ce);
//rhs of bincond is the original project
// let's set up the bincond now
POBinCond bincond = new POBinCond(new OperatorKey(scope,
NodeIdGenerator.getGenerator().getNextNodeId(scope)));
bincond.setCond(isEmpty);
bincond.setLhs(ce);
bincond.setRhs(relationProject);
bincond.setResultType(DataType.BAG);
fePlan.add(bincond);
fePlan.connect(isEmpty, bincond);
fePlan.connect(ce, bincond);
fePlan.connect(relationProject, bincond);
} catch (Exception e) {
throw new PlanException("Error setting up outerjoin", e);
}
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:67,代码来源:CompilerUtils.java
示例17: end
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Override
public void end() {
parent.add(new NonSpillableDataBag(new ArrayList<Tuple>(buffer)));
}
开发者ID:apache,项目名称:parquet-mr,代码行数:5,代码来源:TupleConverter.java
示例18: bag
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
/**
* @param tuples
* @return a bag containing the provided objects
*/
public static DataBag bag(Tuple... tuples) {
return new NonSpillableDataBag(Arrays.asList(tuples));
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:8,代码来源:Storage.java
注:本文中的org.apache.pig.data.NonSpillableDataBag类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论