本文整理汇总了Java中org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory类的典型用法代码示例。如果您正苦于以下问题:Java WordDelimiterFilterFactory类的具体用法?Java WordDelimiterFilterFactory怎么用?Java WordDelimiterFilterFactory使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
WordDelimiterFilterFactory类属于org.apache.lucene.analysis.miscellaneous包,在下文中一共展示了WordDelimiterFilterFactory类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getSearchMapping
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
@Factory
public SearchMapping getSearchMapping() {
SearchMapping mapping = new SearchMapping();
mapping.analyzerDef("autocompleteEdgeAnalyzer", PatternTokenizerFactory.class)
.tokenizerParam("pattern", "(.*)")
.tokenizerParam("group", "1")
.filter(LowerCaseFilterFactory.class)
.filter(StopFilterFactory.class)
.filter(EdgeNGramFilterFactory.class)
.param("minGramSize", "3")
.param("maxGramSize", "50")
.analyzerDef("autocompletePhoneticAnalyzer", StandardTokenizerFactory.class)
.filter(StandardFilterFactory.class)
.filter(StopFilterFactory.class)
.filter(PhoneticFilterFactory.class)
.param("encoder", "DoubleMetaphone")
.filter(SnowballPorterFilterFactory.class)
.param("language", "English")
.analyzerDef("autocompleteNGramAnalyzer", StandardTokenizerFactory.class)
.filter(WordDelimiterFilterFactory.class)
.filter(LowerCaseFilterFactory.class)
.filter(NGramFilterFactory.class)
.param("minGramSize", "3")
.param("maxGramSize", "20")
.analyzerDef("standardAnalyzer", StandardTokenizerFactory.class)
.filter(LowerCaseFilterFactory.class)
.analyzerDef("exactAnalyzer", StandardTokenizerFactory.class)
.analyzerDef("conceptParentPidsAnalyzer", WhitespaceTokenizerFactory.class);
return mapping;
}
开发者ID:jamesagnew,项目名称:hapi-fhir,代码行数:33,代码来源:LuceneSearchMappingFactory.java
示例2: IAViewTextGenAnalyser
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
/**
* Creates a new tokenizer
*
*/
public IAViewTextGenAnalyser(SynonymFilterFactory synonymFilterFactory,
WordDelimiterFilterFactory wordDelimiterFilterFactory, AnalyzerType analyzerType) {
this.synonymFilterFactory = synonymFilterFactory;
this.wordDelimiterFilterFactory = wordDelimiterFilterFactory;
this.analyzerType = analyzerType;
}
开发者ID:nationalarchives,项目名称:taxonomy,代码行数:11,代码来源:IAViewTextGenAnalyser.java
示例3: IAViewTextCasNoPuncAnalyser
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
/**
* Creates a new tokenizer
*
*/
public IAViewTextCasNoPuncAnalyser(SynonymFilterFactory synonymFilterFactory,
WordDelimiterFilterFactory wordDelimiterFilterFactory, AnalyzerType analyzerType) {
this.synonymFilterFactory = synonymFilterFactory;
this.wordDelimiterFilterFactory = wordDelimiterFilterFactory;
this.analyzerType = analyzerType;
}
开发者ID:nationalarchives,项目名称:taxonomy,代码行数:11,代码来源:IAViewTextCasNoPuncAnalyser.java
示例4: IAViewTextNoCasNoPuncAnalyser
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
/**
* Creates a new tokenizer
*
*/
public IAViewTextNoCasNoPuncAnalyser(SynonymFilterFactory synonymFilterFactory,
WordDelimiterFilterFactory wordDelimiterFilterFactory, AnalyzerType analyzerType) {
this.synonymFilterFactory = synonymFilterFactory;
this.wordDelimiterFilterFactory = wordDelimiterFilterFactory;
this.analyzerType = analyzerType;
}
开发者ID:nationalarchives,项目名称:taxonomy,代码行数:11,代码来源:IAViewTextNoCasNoPuncAnalyser.java
示例5: registerWithPrefix
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
protected void registerWithPrefix(String prefix, LuceneAnalyzerDefinitionRegistryBuilder builder) {
builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD).tokenizer(KeywordTokenizerFactory.class);
builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD_CLEAN).tokenizer(KeywordTokenizerFactory.class)
.tokenFilter(ASCIIFoldingFilterFactory.class)
.tokenFilter(LowerCaseFilterFactory.class);
builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT).tokenizer(WhitespaceTokenizerFactory.class)
.tokenFilter(ASCIIFoldingFilterFactory.class)
.tokenFilter(WordDelimiterFilterFactory.class)
.param("generateWordParts", "1")
.param("generateNumberParts", "1")
.param("catenateWords", "0")
.param("catenateNumbers", "0")
.param("catenateAll", "0")
.param("splitOnCaseChange", "0")
.param("splitOnNumerics", "0")
.param("preserveOriginal", "1")
.tokenFilter(LowerCaseFilterFactory.class);
builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_STEMMING).tokenizer(WhitespaceTokenizerFactory.class)
.tokenFilter(ASCIIFoldingFilterFactory.class)
.tokenFilter(WordDelimiterFilterFactory.class)
.param("generateWordParts", "1")
.param("generateNumberParts", "1")
.param("catenateWords", "0")
.param("catenateNumbers", "0")
.param("catenateAll", "0")
.param("splitOnCaseChange", "0")
.param("splitOnNumerics", "0")
.param("preserveOriginal", "1")
.tokenFilter(LowerCaseFilterFactory.class)
.tokenFilter(CoreFrenchMinimalStemFilterFactory.class);
builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_SORT).tokenizer(KeywordTokenizerFactory.class)
.tokenFilter(ASCIIFoldingFilterFactory.class)
.tokenFilter(LowerCaseFilterFactory.class)
.tokenFilter(PatternReplaceFilterFactory.class)
.param("pattern", "('-&\\.,\\(\\))")
.param("replacement", " ")
.param("replace", "all")
.tokenFilter(PatternReplaceFilterFactory.class)
.param("pattern", "([^0-9\\p{L} ])")
.param("replacement", "")
.param("replace", "all")
.tokenFilter(TrimFilterFactory.class);
}
开发者ID:openwide-java,项目名称:owsi-core-parent,代码行数:49,代码来源:CoreLuceneAnalyzersDefinitionProvider.java
示例6: testCustomTypes
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
@Test
public void testCustomTypes() throws Exception {
String testText = "I borrowed $5,400.00 at 25% interest-rate";
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
Map<String,String> args = new HashMap<>();
args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
args.put("generateWordParts", "1");
args.put("generateNumberParts", "1");
args.put("catenateWords", "1");
args.put("catenateNumbers", "1");
args.put("catenateAll", "0");
args.put("splitOnCaseChange", "1");
/* default behavior */
WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory(args);
factoryDefault.inform(loader);
TokenStream ts = factoryDefault.create(
new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
BaseTokenStreamTestCase.assertTokenStreamContents(ts,
new String[] { "I", "borrowed", "5", "540000", "400", "00", "at", "25", "interest", "interestrate", "rate" });
ts = factoryDefault.create(
new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
BaseTokenStreamTestCase.assertTokenStreamContents(ts,
new String[] { "foo", "foobar", "bar" });
/* custom behavior */
args = new HashMap<>();
// use a custom type mapping
args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
args.put("generateWordParts", "1");
args.put("generateNumberParts", "1");
args.put("catenateWords", "1");
args.put("catenateNumbers", "1");
args.put("catenateAll", "0");
args.put("splitOnCaseChange", "1");
args.put("types", "wdftypes.txt");
WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory(args);
factoryCustom.inform(loader);
ts = factoryCustom.create(
new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
BaseTokenStreamTestCase.assertTokenStreamContents(ts,
new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "interestrate", "rate" });
/* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
ts = factoryCustom.create(
new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
BaseTokenStreamTestCase.assertTokenStreamContents(ts,
new String[] { "foo\u200Dbar" });
}
开发者ID:europeana,项目名称:search,代码行数:54,代码来源:TestWordDelimiterFilterFactory.java
示例7: testCustomTypes
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; //导入依赖的package包/类
@Test
public void testCustomTypes() throws Exception {
String testText = "I borrowed $5,400.00 at 25% interest-rate";
WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory();
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
Map<String,String> args = new HashMap<String,String>();
args.put("generateWordParts", "1");
args.put("generateNumberParts", "1");
args.put("catenateWords", "1");
args.put("catenateNumbers", "1");
args.put("catenateAll", "0");
args.put("splitOnCaseChange", "1");
/* default behavior */
factoryDefault.init(args);
factoryDefault.inform(loader);
TokenStream ts = factoryDefault.create(
new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
BaseTokenStreamTestCase.assertTokenStreamContents(ts,
new String[] { "I", "borrowed", "5", "400", "00", "540000", "at", "25", "interest", "rate", "interestrate" });
ts = factoryDefault.create(
new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
BaseTokenStreamTestCase.assertTokenStreamContents(ts,
new String[] { "foo", "bar", "foobar" });
/* custom behavior */
WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory();
// use a custom type mapping
args.put("types", "wdftypes.txt");
factoryCustom.init(args);
factoryCustom.inform(loader);
ts = factoryCustom.create(
new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
BaseTokenStreamTestCase.assertTokenStreamContents(ts,
new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "rate", "interestrate" });
/* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
ts = factoryCustom.create(
new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
BaseTokenStreamTestCase.assertTokenStreamContents(ts,
new String[] { "foo\u200Dbar" });
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:47,代码来源:TestWordDelimiterFilterFactory.java
注:本文中的org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论