本文整理汇总了Java中cascading.scheme.hadoop.TextLine类的典型用法代码示例。如果您正苦于以下问题:Java TextLine类的具体用法?Java TextLine怎么用?Java TextLine使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TextLine类属于cascading.scheme.hadoop包,在下文中一共展示了TextLine类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: testPaths
import cascading.scheme.hadoop.TextLine; //导入依赖的package包/类
/**
* Tests the content of an output path against the given expected path.
*/
@SuppressWarnings("unchecked")
private void testPaths(String actual, String expected) throws Exception {
Tap outputTest = new Hfs(new TextLine(), actual);
Tap expectedTest = new Hfs(new TextLine(), expected);
FlowProcess outputProcess = new HadoopFlowProcess(new JobConf(new Configuration()));
FlowProcess expectedProcess = new HadoopFlowProcess(new JobConf(new Configuration()));
TupleEntryIterator outputIterator = outputTest.openForRead(outputProcess);
TupleEntryIterator expectedIterator = expectedTest.openForRead(expectedProcess);
List<String> outputList = new ArrayList<>();
while (outputIterator.hasNext()) {
outputList.add(outputIterator.next().getTuple().getString(1));
}
List<String> expectedList = new ArrayList<>();
while (expectedIterator.hasNext()) {
expectedList.add(expectedIterator.next().getTuple().getString(1));
}
assertTrue(outputList.equals(expectedList));
}
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:29,代码来源:CsvSchemeTest.java
示例2: main
import cascading.scheme.hadoop.TextLine; //导入依赖的package包/类
public static void main(String [] args) {
Properties properties = new Properties();
properties.put(SplunkConf.SPLUNK_USERNAME, "admin");
properties.put(SplunkConf.SPLUNK_PASSWORD, "changeIt");
properties.put(SplunkConf.SPLUNK_HOST, "localhost");
properties.put(SplunkConf.SPLUNK_PORT, "9050");
SplunkDataQuery splunkSearch = new SplunkDataQuery();
SplunkScheme inputScheme = new SplunkScheme(splunkSearch);
SplunkTap input = new SplunkTap(properties,inputScheme);
TextLine outputScheme = new TextLine();
Hfs output = new Hfs( outputScheme, PATH_TO_OUTPUT, SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
Flow flow = new HadoopFlowConnector().connect( input, output, pipe );
flow.complete();
}
开发者ID:yolodata,项目名称:tbana,代码行数:22,代码来源:SplunkSchemeExample.java
示例3: runCascadingJob
import cascading.scheme.hadoop.TextLine; //导入依赖的package包/类
public Flow runCascadingJob( Path inputPath, Path outputPath) throws IOException
{
Properties properties = new Properties();
ShuttlCsv inputScheme = new ShuttlCsv(new SplunkDataQuery());
TextLine outputScheme = new TextLine();
Hfs input = new Hfs(inputScheme,inputPath.toString());
Hfs output = new Hfs(outputScheme,outputPath.toString(),SinkMode.REPLACE);
Pipe pipe = new Pipe( "test" );
Flow flow = new HadoopFlowConnector( properties ).connect( input, output, pipe );
flow.complete();
return flow;
}
开发者ID:yolodata,项目名称:tbana,代码行数:18,代码来源:ShuttlCsvTest.java
示例4: runSplunkScheme
import cascading.scheme.hadoop.TextLine; //导入依赖的package包/类
public void runSplunkScheme(String path, String inputData) throws IOException
{
Properties properties = TestConfigurations.getSplunkLoginAsProperties();
SplunkScheme inputScheme = new SplunkScheme(TestConfigurations.getSplunkSearch());
TextLine outputScheme = new TextLine();
SplunkTap input = new SplunkTap(properties,inputScheme);
Hfs output = new Hfs( outputScheme, outputPath + "/quoted/" + path, SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
Flow flow = new HadoopFlowConnector().connect( input, output, pipe );
flow.complete();
validateLength( flow, 10, 2 );
TupleEntryIterator iterator = flow.openSource();
// TODO: Header information not used in SplunkScheme yet
// verifyHeader(iterator.getFields());
verifyContent(iterator);
}
开发者ID:yolodata,项目名称:tbana,代码行数:24,代码来源:SplunkSchemeTest.java
示例5: getHeaderRecord
import cascading.scheme.hadoop.TextLine; //导入依赖的package包/类
/**
* Reads the header record from the source file.
*/
@SuppressWarnings("unchecked")
private CSVRecord getHeaderRecord(FlowProcess<JobConf> flowProcess, Tap tap) {
Tap textLine = new Hfs(new TextLine(new Fields("line")), tap.getFullIdentifier(flowProcess.getConfigCopy()));
try (TupleEntryIterator iterator = textLine.openForRead(flowProcess)) {
String line = iterator.next().getTuple().getString(0);
boolean skipHeaderRecord = format.getSkipHeaderRecord();
CSVRecord headerRecord = CSVParser.parse(line, format.withSkipHeaderRecord(false)).iterator().next();
format.withSkipHeaderRecord(skipHeaderRecord);
return headerRecord;
} catch (IOException e) {
throw new TapException(e);
}
}
开发者ID:datascienceinc,项目名称:cascading.csv,代码行数:18,代码来源:CsvScheme.java
示例6: getTextFile
import cascading.scheme.hadoop.TextLine; //导入依赖的package包/类
@Override
public Tap getTextFile(Fields sourceFields, Fields sinkFields, String filename, SinkMode mode) {
if( sourceFields == null ) {
return new Hfs(new TextLine(), filename, mode);
}
return new Hfs( new TextLine( sourceFields, sinkFields ), filename, mode );
}
开发者ID:dataArtisans,项目名称:cascading-flink,代码行数:9,代码来源:FlinkTestPlatform.java
示例7: OpenCsvScheme
import cascading.scheme.hadoop.TextLine; //导入依赖的package包/类
public OpenCsvScheme(final Fields fields, final boolean hasHeader, final char separator, final char quote, final char escape,
final boolean strict, final String charsetName) {
super(TextLine.Compress.DEFAULT);
setCharsetName(charsetName);
setSinkFields(fields);
setSourceFields(fields);
this.hasHeader = hasHeader;
this.separator = separator;
this.quote = quote;
this.escape = escape;
this.strict = strict;
this.charsetName = charsetName != null ? charsetName : "UTF8";
}
开发者ID:tresata,项目名称:cascading-opencsv,代码行数:15,代码来源:OpenCsvScheme.java
示例8: main
import cascading.scheme.hadoop.TextLine; //导入依赖的package包/类
public static void main(String [] args) {
ShuttlCsv csv = new ShuttlCsv();
Hfs input = new Hfs(csv,PATH_TO_SHUTTL_ARCHIVE);
TextLine outputScheme = new TextLine();
Hfs output = new Hfs(outputScheme, PATH_TO_OUTPUT);
Pipe pipe = new Pipe( "test" );
Properties properties = new Properties();
Flow flow = new HadoopFlowConnector( properties ).connect( input, output, pipe );
flow.complete();
}
开发者ID:yolodata,项目名称:tbana,代码行数:15,代码来源:ShuttlCsvExample.java
示例9: runCSVLine
import cascading.scheme.hadoop.TextLine; //导入依赖的package包/类
public void runCSVLine( String path, String inputData) throws IOException
{
Properties properties = new Properties();
CSVLine inputScheme = new CSVLine();
TextLine outputScheme = new TextLine();
Hfs input = new Hfs( inputScheme, inputData );
Hfs output = new Hfs( outputScheme, outputPath + "/quoted/" + path, SinkMode.REPLACE );
Pipe pipe = new Pipe( "test" );
Flow flow = new HadoopFlowConnector( properties ).connect( input, output, pipe );
flow.complete();
validateLength( flow, 4, 2 ); // The file contains 4 rows, however there are only 3 CSV rows (inc the header row)
TupleEntryIterator iterator = flow.openSource();
ArrayListTextWritable expected = new ArrayListTextWritable();
expected.add(new Text("header1"));
expected.add(new Text("header2"));
assertEquals(expected, iterator.next().getTuple().getObject(1));
expected.clear();
expected.add(new Text("Column1"));
expected.add(new Text("Column 2 using\ntwo rows"));
assertEquals(expected, iterator.next().getTuple().getObject(1));
expected.clear();
expected.add(new Text("c1"));
expected.add(new Text("c2"));
assertEquals(expected, iterator.next().getTuple().getObject(1));
}
开发者ID:yolodata,项目名称:tbana,代码行数:37,代码来源:CSVLineTest.java
注:本文中的cascading.scheme.hadoop.TextLine类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论