本文整理汇总了Java中org.apache.pig.data.DefaultBagFactory类的典型用法代码示例。如果您正苦于以下问题:Java DefaultBagFactory类的具体用法?Java DefaultBagFactory怎么用?Java DefaultBagFactory使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
DefaultBagFactory类属于org.apache.pig.data包,在下文中一共展示了DefaultBagFactory类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: testUniformDistribution
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testUniformDistribution() throws IOException {
Random rnd = getRandomInstance();
for (int i = 0; i < 100; i++) {
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
for (int j = 0; j < 100; j++) {
bag.add(TupleFactory.getInstance().newTuple(new Integer(rnd.nextInt(100))));
}
Tuple result = Histogram.exec(bag, 0, 99, 1);
//System.err.println(result);
Assert.assertEquals(result.size(), 1);
Assert.assertEquals(result.get(0), 100L);
result = Histogram.exec(bag, 0, 99, 2);
//System.err.println(result);
Assert.assertEquals(result.size(), 2);
result = Histogram.exec(bag, 0, 99, 10);
//System.err.println(result);
Assert.assertEquals(result.size(), 10);
}
}
开发者ID:lucidworks,项目名称:pig-solr,代码行数:23,代码来源:TestHistogram.java
示例2: testNormalDistribution
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testNormalDistribution() throws IOException {
Random rnd = getRandomInstance();
for (int i = 0; i < 1000; i++) {
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
for (int j = 0; j < 100; j++) {
int x = Math.max(Math.min((int) (50 + rnd.nextGaussian() * 50), 100), 0);
bag.add(TupleFactory.getInstance().newTuple(new Integer(x)));
}
Tuple result = Histogram.exec(bag, 0, 100, 1);
//System.err.println(result);
Assert.assertEquals(result.size(), 1);
Assert.assertEquals(result.get(0), 100L);
result = Histogram.exec(bag, 0, 100, 2);
//System.err.println(result);
Assert.assertEquals(result.size(), 2);
result = Histogram.exec(bag, 0, 100, 10);
//System.err.println(result);
Assert.assertEquals(result.size(), 10);
}
}
开发者ID:lucidworks,项目名称:pig-solr,代码行数:24,代码来源:TestHistogram.java
示例3: testSimple
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testSimple() throws IOException {
Extract extractor = new Extract();
TupleFactory maker = TupleFactory.getInstance();
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
int[] values = new int[] { 6, 22 };
String[] strings = new String[] { "term1", "term2" };
Tuple[] tuples = new Tuple[2];
for (int i = 0; i < tuples.length; i++) {
tuples[i] = maker.newTuple(Arrays.asList(strings[i], values[i]));
bag.add(tuples[i]);
}
Tuple test1 = maker.newTuple(Arrays.asList(bag, 0));
Tuple result1 = extractor.exec(test1);
for (int i = 0; i < tuples.length; i++) {
assertEquals(result1.get(i), strings[i]);
}
Tuple test2 = maker.newTuple(Arrays.asList(bag, 1));
Tuple result2 = extractor.exec(test2);
for (int i = 0; i < tuples.length; i++) {
assertEquals(result2.get(i), values[i]);
}
}
开发者ID:lucidworks,项目名称:pig-solr,代码行数:27,代码来源:ExtractTest.java
示例4: exec
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public DataBag exec(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return null;
try{
DataBag output = DefaultBagFactory.getInstance().newDefaultBag();
String query = (String)input.get(0);
String[] words = TutorialUtil.splitToWords(query);
Set<String> ngrams = new HashSet<String>();
TutorialUtil.makeNGram(words, ngrams, _ngramSizeLimit);
for (String ngram : ngrams) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, ngram);
output.add(t);
}
return output;
}catch(Exception e){
System.err.println("NGramGenerator: failed to process input; error - " + e.getMessage());
return null;
}
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:21,代码来源:NGramGenerator.java
示例5: exec
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public DataBag exec(Tuple input) throws IOException
{
if (input == null || input.size() == 0)
return null;
String str;
try{
str = (String)input.get(0);
} catch(Exception e){
System.err.println ("Failed to process input; error - " + e.getMessage());
return null;
}
DataBag output = DefaultBagFactory.getInstance().newDefaultBag();
StringTokenizer tok = new StringTokenizer(str, " \",()*", false);
while (tok.hasMoreTokens())
{
output.add(DefaultTupleFactory.getInstance().newTuple(tok.nextToken()));
}
return output;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:24,代码来源:TestEvalFunc.java
示例6: testSerialize__bag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testSerialize__bag() throws IOException {
Tuple t = tf.newTuple(1);
Tuple t1 = tf.newTuple(2);
Tuple t2 = tf.newTuple(2);
List<Tuple> bagTuples = new ArrayList<Tuple>();
bagTuples.add(t1);
bagTuples.add(t2);
t1.set(0, "A");
t1.set(1, "B");
t2.set(0, 1);
t2.set(1, 2);
DataBag b = DefaultBagFactory.getInstance().newDefaultBag(bagTuples);
t.set(0,b);
byte[] expectedOutput = "|{_|(_CA|,_CB|)_|,_|(_I1|,_I2|)_|}_|_\n".getBytes();
Assert.assertTrue(assertEquals(expectedOutput, ps.serializeToBytes(t)));
}
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:18,代码来源:TestPigStreamingUDF.java
示例7: testSerialize__bag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testSerialize__bag() throws IOException {
Tuple t = tf.newTuple(1);
Tuple t1 = tf.newTuple(2);
Tuple t2 = tf.newTuple(2);
List<Tuple> bagTuples = new ArrayList<Tuple>();
bagTuples.add(t1);
bagTuples.add(t2);
t1.set(0, "A");
t1.set(1, "B");
t2.set(0, 1);
t2.set(1, 2);
DataBag b = DefaultBagFactory.getInstance().newDefaultBag(bagTuples);
t.set(0,b);
byte[] expectedOutput = "{(A,B),(1,2)}\n".getBytes();
byte[] output = ps.serialize(t);
Assert.assertArrayEquals(expectedOutput, output);
}
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:19,代码来源:TestPigStreaming.java
示例8: numberArrayToDataBag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
private static DataBag numberArrayToDataBag(Number... values) {
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
for (Number value : values) {
bag.add(TupleFactory.getInstance().newTuple(value));
}
return bag;
}
开发者ID:lucidworks,项目名称:pig-solr,代码行数:8,代码来源:TestHistogram.java
示例9: testLongBag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testLongBag() throws IOException {
Extract extractor = new Extract();
TupleFactory maker = TupleFactory.getInstance();
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
int[] values = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
String[] strings = new String[] { "term0", "term1", "term2", "term3", "term4", "term5", "term6", "term7", "term8",
"term9", "term10" };
Tuple[] tuples = new Tuple[11];
for (int i = 0; i < tuples.length; i++) {
tuples[i] = maker.newTuple(Arrays.asList(strings[i], values[i]));
bag.add(tuples[i]);
}
Tuple test1 = maker.newTuple(Arrays.asList(bag, 0));
Tuple result1 = extractor.exec(test1);
for (int i = 0; i < tuples.length; i++) {
assertEquals(result1.get(i), strings[i]);
}
Tuple test2 = maker.newTuple(Arrays.asList(bag, 1));
Tuple result2 = extractor.exec(test2);
for (int i = 0; i < tuples.length; i++) {
assertEquals(result2.get(i), values[i]);
}
}
开发者ID:lucidworks,项目名称:pig-solr,代码行数:28,代码来源:ExtractTest.java
示例10: consumeBag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
private DataBag consumeBag(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException {
if (fieldSchema==null) {
throw new IOException("Schema is null");
}
ResourceFieldSchema[] fss=fieldSchema.getSchema().getFields();
Tuple t;
int buf;
while ((buf=in.read())!='{') {
if (buf==-1) {
throw new IOException("Unexpect end of bag");
}
}
if (fss.length!=1)
throw new IOException("Only tuple is allowed inside bag schema");
ResourceFieldSchema fs = fss[0];
DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
while (true) {
t = consumeTuple(in, fs);
if (t!=null)
db.add(t);
while ((buf=in.read())!='}'&&buf!=',') {
if (buf==-1) {
throw new IOException("Unexpect end of bag");
}
}
if (buf=='}')
break;
}
return db;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:31,代码来源:Utf8StorageConverter.java
示例11: exec
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Override
public DataBag exec(Tuple input) throws IOException {
try {
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
if ( input == null || input.size()== 0) {
return bag; //an empty bag
}
if ( this.fieldType == DataType.MAP ) {
Tuple t = DefaultTupleFactory.getInstance().newTuple(1);
t.set(0, createMap(input));
bag.add( t );
} else {
bag.add(input);
}
return bag;
} catch (Exception e) {
throw new RuntimeException( "Error while computing size in " + this.getClass().getSimpleName());
}
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:28,代码来源:TOBAG.java
示例12: testGetNextTuple
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testGetNextTuple() throws ExecException, IOException {
Tuple t = null;
DataBag outBag = DefaultBagFactory.getInstance().newDefaultBag();
for(Result res=sp.getNextTuple();res.returnStatus!=POStatus.STATUS_EOP;res=sp.getNextTuple()){
outBag.add(castToDBA((Tuple)res.result));
}
assertTrue(TestHelper.compareBags(expBag, outBag));
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:10,代码来源:TestUnion.java
示例13: genFloatDataBag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public static DataBag genFloatDataBag(Random r, int column, int row) {
DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
for (int i=0;i<row;i++) {
Tuple t = TupleFactory.getInstance().newTuple();
for (int j=0;j<column;j++) {
t.append(r.nextFloat()*1000);
}
db.add(t);
}
return db;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:12,代码来源:GenRandomData.java
示例14: projectBag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public static DataBag projectBag(DataBag db2, int i) throws ExecException {
DataBag ret = DefaultBagFactory.getInstance().newDefaultBag();
for (Tuple tuple : db2) {
Object o = tuple.get(i);
Tuple t1 = new DefaultTuple();
t1.append(o);
ret.add(t1);
}
return ret;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:11,代码来源:TestHelper.java
示例15: areFilesSame
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public static boolean areFilesSame(FileSpec expLocal, FileSpec actHadoop, PigContext pc) throws ExecException, IOException{
Random r = new Random();
POLoad ldExp = new POLoad(new OperatorKey("", r.nextLong()));
ldExp.setPc(pc);
ldExp.setLFile(expLocal);
POLoad ldAct = new POLoad(new OperatorKey("", r.nextLong()));
ldAct.setPc(pc);
ldAct.setLFile(actHadoop);
Tuple t = null;
int numActTuples = -1;
DataBag bagAct = DefaultBagFactory.getInstance().newDefaultBag();
Result resAct = null;
while((resAct = ldAct.getNextTuple()).returnStatus!=POStatus.STATUS_EOP){
++numActTuples;
bagAct.add(trimTuple((Tuple)resAct.result));
}
int numExpTuples = -1;
DataBag bagExp = DefaultBagFactory.getInstance().newDefaultBag();
Result resExp = null;
while((resExp = ldExp.getNextTuple()).returnStatus!=POStatus.STATUS_EOP){
++numExpTuples;
bagExp.add(trimTuple((Tuple)resExp.result));
}
if(numActTuples!=numExpTuples)
return false;
return compareBags(bagExp, bagAct);
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:34,代码来源:TestHelper.java
示例16: getBag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
private DataBag getBag(byte type) {
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
for(int i = 0; i < 10; i ++) {
Tuple t = TupleFactory.getInstance().newTuple();
switch(type) {
case DataType.BOOLEAN:
t.append(r.nextBoolean());
break;
case DataType.INTEGER:
t.append(r.nextInt(2));
break;
case DataType.LONG:
t.append(r.nextLong() % 2L);
break;
case DataType.FLOAT:
t.append((i % 2 == 0 ? 1.0f : 0.0f));
break;
case DataType.DOUBLE:
t.append((i % 2 == 0 ? 1.0 : 0.0));
break;
case DataType.DATETIME:
t.append(new DateTime(r.nextLong() % 2L));
break;
}
t.append(1);
t.append(0);
bag.add(t);
}
return bag;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:31,代码来源:TestPOBinCond.java
示例17: getBagWithNulls
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
private DataBag getBagWithNulls(byte type) {
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
for(int i = 0; i < 10; i ++) {
Tuple t = TupleFactory.getInstance().newTuple();
if (r.nextInt(4)%3 == 0){
t.append(null);
}else{
switch(type) {
case DataType.BOOLEAN:
t.append(r.nextBoolean());
break;
case DataType.INTEGER:
t.append(r.nextInt(2));
break;
case DataType.LONG:
t.append(r.nextLong() % 2L);
break;
case DataType.FLOAT:
t.append( (i % 2 == 0 ? 1.0f : 0.0f));
break;
case DataType.DOUBLE:
t.append( (i % 2 == 0 ? 1.0 : 0.0));
break;
case DataType.DATETIME:
t.append(new DateTime(r.nextLong() % 2L));
break;
}
}
t.append(1);
t.append(0);
bag.add(t);
}
return bag;
}
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:35,代码来源:TestPOBinCond.java
示例18: testDeserialize__bag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testDeserialize__bag() throws IOException {
byte[] input = "|{_|(_A|,_1|)_|,_|(_B|,_2|)_|}_|_".getBytes();
FieldSchema f1Inner = new FieldSchema("", DataType.CHARARRAY);
FieldSchema f2Inner = new FieldSchema("", DataType.INTEGER);
List<FieldSchema> fslInner = new ArrayList<FieldSchema>();
fslInner.add(f1Inner);
fslInner.add(f2Inner);
Schema schemaInner = new Schema(fslInner);
FieldSchema fsInner = new FieldSchema("", schemaInner, DataType.TUPLE);
List<FieldSchema> fsl = new ArrayList<FieldSchema>();
fsl.add(fsInner);
Schema schema = new Schema(fsl);
FieldSchema fs = new FieldSchema("", schema, DataType.BAG);
PigStreamingUDF sp = new PigStreamingUDF(fs);
Tuple expectedOutputInner1 = tf.newTuple(2);
expectedOutputInner1.set(0, "A");
expectedOutputInner1.set(1, 1);
Tuple expectedOutputInner2 = tf.newTuple(2);
expectedOutputInner2.set(0, "B");
expectedOutputInner2.set(1, 2);
List<Tuple> tuples = new ArrayList<Tuple>();
tuples.add(expectedOutputInner1);
tuples.add(expectedOutputInner2);
DataBag expectedOutput = DefaultBagFactory.getInstance().newDefaultBag(tuples);
Object out = sp.deserialize(input, 0, input.length);
Assert.assertEquals(tf.newTuple(expectedOutput), out);
}
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:36,代码来源:TestPigStreamingUDF.java
示例19: testGetNextTuple
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testGetNextTuple() throws ExecException, IOException {
Tuple t = null;
DataBag outBag = DefaultBagFactory.getInstance().newDefaultBag();
for(Result res=sp.getNext(t);res.returnStatus!=POStatus.STATUS_EOP;res=sp.getNext(t)){
outBag.add(castToDBA((Tuple)res.result));
}
assertEquals(true, TestHelper.compareBags(expBag, outBag));
}
开发者ID:PonIC,项目名称:PonIC,代码行数:10,代码来源:TestUnion.java
示例20: areFilesSame
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public static boolean areFilesSame(FileSpec expLocal, FileSpec actHadoop, PigContext pc) throws ExecException, IOException{
Random r = new Random();
POLoad ldExp = new POLoad(new OperatorKey("", r.nextLong()));
ldExp.setPc(pc);
ldExp.setLFile(expLocal);
POLoad ldAct = new POLoad(new OperatorKey("", r.nextLong()));
ldAct.setPc(pc);
ldAct.setLFile(actHadoop);
Tuple t = null;
int numActTuples = -1;
DataBag bagAct = DefaultBagFactory.getInstance().newDefaultBag();
Result resAct = null;
while((resAct = ldAct.getNext(t)).returnStatus!=POStatus.STATUS_EOP){
++numActTuples;
bagAct.add(trimTuple((Tuple)resAct.result));
}
int numExpTuples = -1;
DataBag bagExp = DefaultBagFactory.getInstance().newDefaultBag();
Result resExp = null;
while((resExp = ldExp.getNext(t)).returnStatus!=POStatus.STATUS_EOP){
++numExpTuples;
bagExp.add(trimTuple((Tuple)resExp.result));
}
if(numActTuples!=numExpTuples)
return false;
return compareBags(bagExp, bagAct);
}
开发者ID:PonIC,项目名称:PonIC,代码行数:34,代码来源:TestHelper.java
注:本文中的org.apache.pig.data.DefaultBagFactory类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论