本文整理汇总了Java中org.apache.lucene.analysis.CannedTokenStream类的典型用法代码示例。如果您正苦于以下问题:Java CannedTokenStream类的具体用法?Java CannedTokenStream怎么用?Java CannedTokenStream使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CannedTokenStream类属于org.apache.lucene.analysis包,在下文中一共展示了CannedTokenStream类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: testCountPositions
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testCountPositions() throws IOException {
// We're looking to make sure that we:
Token t1 = new Token(); // Don't count tokens without an increment
t1.setPositionIncrement(0);
Token t2 = new Token();
t2.setPositionIncrement(1); // Count normal tokens with one increment
Token t3 = new Token();
t2.setPositionIncrement(2); // Count funny tokens with more than one increment
int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
Token[] tokens = new Token[] {t1, t2, t3};
Collections.shuffle(Arrays.asList(tokens), random());
final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
// TODO: we have no CannedAnalyzer?
Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer(), tokenStream);
}
};
assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7));
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:22,代码来源:TokenCountFieldMapperTests.java
示例2: testBogusTermVectors
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testBogusTermVectors() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true);
Field field = new Field("foo", "", ft);
field.setTokenStream(new CannedTokenStream(
new Token("bar", 5, 10), new Token("bar", 1, 4)
));
doc.add(field);
iw.addDocument(doc);
iw.close();
dir.close(); // checkindex
}
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TestCheckIndex.java
示例3: testIllegalPositions
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testIllegalPositions() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
Token t1 = new Token("foo", 0, 3);
t1.setPositionIncrement(Integer.MAX_VALUE);
Token t2 = new Token("bar", 4, 7);
t2.setPositionIncrement(200);
TokenStream overflowingTokenStream = new CannedTokenStream(
new Token[] { t1, t2 }
);
Field field = new TextField("foo", overflowingTokenStream);
doc.add(field);
try {
iw.addDocument(doc);
fail();
} catch (IllegalArgumentException expected) {
// expected exception
}
iw.close();
dir.close();
}
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:TestIndexWriterExceptions.java
示例4: testLegalbutVeryLargePositions
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testLegalbutVeryLargePositions() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
Token t1 = new Token("foo", 0, 3);
t1.setPositionIncrement(Integer.MAX_VALUE-500);
if (random().nextBoolean()) {
t1.setPayload(new BytesRef(new byte[] { 0x1 } ));
}
TokenStream overflowingTokenStream = new CannedTokenStream(
new Token[] { t1 }
);
Field field = new TextField("foo", overflowingTokenStream);
doc.add(field);
iw.addDocument(doc);
iw.close();
dir.close();
}
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:TestIndexWriterExceptions.java
示例5: testLegalbutVeryLargeOffsets
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testLegalbutVeryLargeOffsets() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500);
if (random().nextBoolean()) {
t1.setPayload(new BytesRef("test"));
}
Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE);
TokenStream tokenStream = new CannedTokenStream(
new Token[] { t1, t2 }
);
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
// store some term vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorOffsets(true);
Field field = new Field("foo", tokenStream, ft);
doc.add(field);
iw.addDocument(doc);
iw.close();
dir.close();
}
开发者ID:europeana,项目名称:search,代码行数:25,代码来源:TestPostingsOffsets.java
示例6: checkTokens
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
private void checkTokens(Token[] field1, Token[] field2) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
boolean success = false;
try {
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
// store some term vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorOffsets(true);
Document doc = new Document();
doc.add(new Field("body", new CannedTokenStream(field1), ft));
doc.add(new Field("body", new CannedTokenStream(field2), ft));
riw.addDocument(doc);
riw.close();
success = true;
} finally {
if (success) {
IOUtils.close(dir);
} else {
IOUtils.closeWhileHandlingException(riw, dir);
}
}
}
开发者ID:europeana,项目名称:search,代码行数:27,代码来源:TestPostingsOffsets.java
示例7: testTextFieldString
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testTextFieldString() throws Exception {
Field fields[] = new Field[] {
new TextField("foo", "bar", Field.Store.NO),
new TextField("foo", "bar", Field.Store.YES)
};
for (Field field : fields) {
field.setBoost(5f);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
trySetReaderValue(field);
trySetShortValue(field);
field.setStringValue("baz");
field.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3)));
assertEquals("baz", field.stringValue());
assertEquals(5f, field.boost(), 0f);
}
}
开发者ID:europeana,项目名称:search,代码行数:25,代码来源:TestField.java
示例8: testTextFieldReader
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testTextFieldReader() throws Exception {
Field field = new TextField("foo", new StringReader("bar"));
field.setBoost(5f);
trySetByteValue(field);
trySetBytesValue(field);
trySetBytesRefValue(field);
trySetDoubleValue(field);
trySetIntValue(field);
trySetFloatValue(field);
trySetLongValue(field);
field.setReaderValue(new StringReader("foobar"));
trySetShortValue(field);
trySetStringValue(field);
field.setTokenStream(new CannedTokenStream(new Token("foo", 0, 3)));
assertNotNull(field.readerValue());
assertEquals(5f, field.boost(), 0f);
}
开发者ID:europeana,项目名称:search,代码行数:20,代码来源:TestField.java
示例9: testBogusTermVectors
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testBogusTermVectors() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true);
Field field = new Field("foo", "", ft);
field.setTokenStream(new CannedTokenStream(
new Token("bar", 5, 10), new Token("bar", 1, 4)
));
doc.add(field);
iw.addDocument(doc);
iw.close();
dir.close(); // checkindex
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:17,代码来源:TestCheckIndex.java
示例10: testLegalbutVeryLargeOffsets
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testLegalbutVeryLargeOffsets() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
Document doc = new Document();
Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500);
if (random().nextBoolean()) {
t1.setPayload(new BytesRef("test"));
}
Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE);
TokenStream tokenStream = new CannedTokenStream(
new Token[] { t1, t2 }
);
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
// store some term vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorOffsets(true);
Field field = new Field("foo", tokenStream, ft);
doc.add(field);
iw.addDocument(doc);
iw.close();
dir.close();
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:25,代码来源:TestPostingsOffsets.java
示例11: checkTokens
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
private void checkTokens(Token[] tokens) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
boolean success = false;
try {
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
// store some term vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorOffsets(true);
Document doc = new Document();
doc.add(new Field("body", new CannedTokenStream(tokens), ft));
riw.addDocument(doc);
success = true;
} finally {
if (success) {
IOUtils.close(riw, dir);
} else {
IOUtils.closeWhileHandlingException(riw, dir);
}
}
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:25,代码来源:TestPostingsOffsets.java
示例12: testIllegalPositions
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testIllegalPositions() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
Document doc = new Document();
Token t1 = new Token("foo", 0, 3);
t1.setPositionIncrement(Integer.MAX_VALUE);
Token t2 = new Token("bar", 4, 7);
t2.setPositionIncrement(200);
TokenStream overflowingTokenStream = new CannedTokenStream(
new Token[] { t1, t2 }
);
Field field = new TextField("foo", overflowingTokenStream);
doc.add(field);
try {
iw.addDocument(doc);
fail();
} catch (IllegalArgumentException expected) {
// expected exception
}
iw.close();
dir.close();
}
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:23,代码来源:TestIndexWriterExceptions.java
示例13: testLegalbutVeryLargePositions
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testLegalbutVeryLargePositions() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
Document doc = new Document();
Token t1 = new Token("foo", 0, 3);
t1.setPositionIncrement(Integer.MAX_VALUE-500);
if (random().nextBoolean()) {
t1.setPayload(new BytesRef(new byte[] { 0x1 } ));
}
TokenStream overflowingTokenStream = new CannedTokenStream(
new Token[] { t1 }
);
Field field = new TextField("foo", overflowingTokenStream);
doc.add(field);
iw.addDocument(doc);
iw.close();
dir.close();
}
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:19,代码来源:TestIndexWriterExceptions.java
示例14: testBasic
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testBasic() throws IOException {
Index index = new Index("test", "_na_");
String name = "ngr";
Settings indexSettings = newAnalysisSettingsBuilder().build();
IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
Settings settings = newAnalysisSettingsBuilder().build();
// "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input:
TokenStream in = new CannedTokenStream(0, 12, new Token[] {
token("wtf", 1, 5, 0, 3),
token("what", 0, 1, 0, 3),
token("wow", 0, 3, 0, 3),
token("the", 1, 1, 0, 3),
token("fudge", 1, 3, 0, 3),
token("that's", 1, 1, 0, 3),
token("funny", 1, 1, 0, 3),
token("happened", 1, 1, 4, 12)
});
TokenStream tokens = new FlattenGraphTokenFilterFactory(indexProperties, null, name, settings).create(in);
// ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
assertTokenStreamContents(tokens,
new String[] {"wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened"},
new int[] {0, 0, 0, 0, 0, 0, 0, 4},
new int[] {3, 3, 3, 3, 3, 3, 3, 12},
new int[] {1, 0, 0, 1, 0, 1, 0, 1},
new int[] {3, 1, 1, 1, 1, 1, 1, 1},
12);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:32,代码来源:FlattenGraphTokenFilterFactoryTests.java
示例15: createFilter
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
private URLTokenFilter createFilter(final String url, final URLPart part, final boolean urlDecode, final boolean allowMalformed) {
int length = 0;
if (url != null) {
length = url.length();
}
return new URLTokenFilter(new CannedTokenStream(new Token(url, 0, length)), part, urlDecode, allowMalformed);
}
开发者ID:jlinn,项目名称:elasticsearch-analysis-url,代码行数:8,代码来源:URLTokenFilterTest.java
示例16: testEmptyString
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testEmptyString() throws IOException {
MemoryIndex memory = new MemoryIndex();
memory.addField("foo", new CannedTokenStream(new Token("", 0, 5)));
IndexSearcher searcher = memory.createSearcher();
TopDocs docs = searcher.search(new TermQuery(new Term("foo", "")), 10);
assertEquals(1, docs.totalHits);
}
开发者ID:europeana,项目名称:search,代码行数:8,代码来源:MemoryIndexTest.java
示例17: testAnyFromTokenStream
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testAnyFromTokenStream() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newTextField("field", "here comes the sun", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "here comes the moon", Field.Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(newTextField("field", "here comes sun", Field.Store.NO));
w.addDocument(doc);
// Should not match:
doc = new Document();
doc.add(newTextField("field", "here comes the other sun", Field.Store.NO));
w.addDocument(doc);
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
TokenStream ts = new CannedTokenStream(new Token[] {
token("comes", 1, 1),
token("comes", 0, 2),
token("*", 1, 1),
token("sun", 1, 1),
token("moon", 0, 1)
});
TermAutomatonQuery q = new TokenStreamToTermAutomatonQuery().toQuery("field", ts);
// System.out.println("DOT: " + q.toDot());
assertEquals(3, s.search(q, 1).totalHits);
w.close();
r.close();
dir.close();
}
开发者ID:europeana,项目名称:search,代码行数:40,代码来源:TestTermAutomatonQuery.java
示例18: shingleFilterTest
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
protected void shingleFilterTest(int maxSize, Token[] tokensToShingle, Token[] tokensToCompare,
int[] positionIncrements, String[] types,
boolean outputUnigrams)
throws IOException {
ShingleFilter filter = new ShingleFilter(new CannedTokenStream(tokensToShingle), maxSize);
filter.setOutputUnigrams(outputUnigrams);
shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
}
开发者ID:europeana,项目名称:search,代码行数:10,代码来源:ShingleFilterTest.java
示例19: testTrailingHole1
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testTrailingHole1() throws IOException {
// Analyzing "wizard of", where of is removed as a
// stopword leaving a trailing hole:
Token[] inputTokens = new Token[] {createToken("wizard", 0, 6)};
ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 9, inputTokens), 2, 2);
assertTokenStreamContents(filter,
new String[] {"wizard", "wizard _"},
new int[] {0, 0},
new int[] {6, 9},
new int[] {1, 0},
9);
}
开发者ID:europeana,项目名称:search,代码行数:14,代码来源:ShingleFilterTest.java
示例20: testTrailingHole2
import org.apache.lucene.analysis.CannedTokenStream; //导入依赖的package包/类
public void testTrailingHole2() throws IOException {
// Analyzing "purple wizard of", where of is removed as a
// stopword leaving a trailing hole:
Token[] inputTokens = new Token[] {createToken("purple", 0, 6),
createToken("wizard", 7, 13)};
ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 16, inputTokens), 2, 2);
assertTokenStreamContents(filter,
new String[] {"purple", "purple wizard", "wizard", "wizard _"},
new int[] {0, 0, 7, 7},
new int[] {6, 13, 13, 16},
new int[] {1, 0, 1, 0},
16);
}
开发者ID:europeana,项目名称:search,代码行数:15,代码来源:ShingleFilterTest.java
注:本文中的org.apache.lucene.analysis.CannedTokenStream类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论