本文整理汇总了Java中org.apache.lucene.util.automaton.LevenshteinAutomata类的典型用法代码示例。如果您正苦于以下问题:Java LevenshteinAutomata类的具体用法?Java LevenshteinAutomata怎么用?Java LevenshteinAutomata使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
LevenshteinAutomata类属于org.apache.lucene.util.automaton包,在下文中一共展示了LevenshteinAutomata类的16个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: FuzzyQuery
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/**
* Create a new FuzzyQuery that will match terms with an edit distance
* of at most <code>maxEdits</code> to <code>term</code>.
* If a <code>prefixLength</code> > 0 is specified, a common prefix
* of that length is also required.
*
* @param term the term to search for
* @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
* @param prefixLength length of common (non-fuzzy) prefix
* @param maxExpansions the maximum number of terms to match. If this number is
* greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten,
* then the maxClauseCount will be used instead.
* @param transpositions true if transpositions should be treated as a primitive
* edit operation. If this is false, comparisons will implement the classic
* Levenshtein algorithm.
*/
public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions) {
super(term.field());
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
}
if (prefixLength < 0) {
throw new IllegalArgumentException("prefixLength cannot be negative.");
}
if (maxExpansions <= 0) {
throw new IllegalArgumentException("maxExpansions must be positive.");
}
this.term = term;
this.maxEdits = maxEdits;
this.prefixLength = prefixLength;
this.transpositions = transpositions;
this.maxExpansions = maxExpansions;
setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:37,代码来源:FuzzyQuery.java
示例2: initAutomata
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
//System.out.println("cached automata size: " + runAutomata.size());
if (runAutomata.size() <= maxDistance &&
maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
LevenshteinAutomata builder =
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
for (int i = runAutomata.size(); i <= maxDistance; i++) {
Automaton a = builder.toAutomaton(i, prefix);
//System.out.println("compute automaton n=" + i);
runAutomata.add(new CompiledAutomaton(a, true, false));
}
}
return runAutomata;
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:19,代码来源:FuzzyTermsEnum.java
示例3: testFuzzy
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/** test a fuzzy query */
public void testFuzzy() throws Exception {
Query regular = new TermQuery(new Term("field", "foobar"));
Query expected = new FuzzyQuery(new Term("field", "foobar"), 2);
assertEquals(expected, parse("foobar~2"));
assertEquals(regular, parse("foobar~"));
assertEquals(regular, parse("foobar~a"));
assertEquals(regular, parse("foobar~1a"));
BooleanQuery bool = new BooleanQuery();
FuzzyQuery fuzzy = new FuzzyQuery(new Term("field", "foo"), LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
bool.add(fuzzy, Occur.MUST);
bool.add(new TermQuery(new Term("field", "bar")), Occur.MUST);
assertEquals(bool, parse("foo~" + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + 1 + " bar"));
}
开发者ID:europeana,项目名称:search,代码行数:18,代码来源:TestSimpleQueryParser.java
示例4: FuzzyQuery
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/**
* Create a new FuzzyQuery that will match terms with an edit distance
* of at most <code>maxEdits</code> to <code>term</code>.
* If a <code>prefixLength</code> > 0 is specified, a common prefix
* of that length is also required.
*
* @param term the term to search for
* @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
* @param prefixLength length of common (non-fuzzy) prefix
* @param maxExpansions the maximum number of terms to match. If this number is
* greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten,
* then the maxClauseCount will be used instead.
* @param transpositions true if transpositions should be treated as a primitive
* edit operation. If this is false, comparisons will implement the classic
* Levenshtein algorithm.
*/
public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions) {
super(term.field());
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
}
if (prefixLength < 0) {
throw new IllegalArgumentException("prefixLength cannot be negative.");
}
if (maxExpansions < 0) {
throw new IllegalArgumentException("maxExpansions cannot be negative.");
}
this.term = term;
this.maxEdits = maxEdits;
this.prefixLength = prefixLength;
this.transpositions = transpositions;
this.maxExpansions = maxExpansions;
setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:37,代码来源:FuzzyQuery.java
示例5: initAutomata
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
//System.out.println("cached automata size: " + runAutomata.size());
if (runAutomata.size() <= maxDistance &&
maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
LevenshteinAutomata builder =
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
for (int i = runAutomata.size(); i <= maxDistance; i++) {
Automaton a = builder.toAutomaton(i);
//System.out.println("compute automaton n=" + i);
// constant prefix
if (realPrefixLength > 0) {
Automaton prefix = BasicAutomata.makeString(
UnicodeUtil.newString(termText, 0, realPrefixLength));
a = BasicOperations.concatenate(prefix, a);
}
runAutomata.add(new CompiledAutomaton(a, true, false));
}
}
return runAutomata;
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:24,代码来源:FuzzyTermsEnum.java
示例6: build
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
@Override
public PhraseSuggestionContext.DirectCandidateGenerator build(MapperService mapperService) throws IOException {
PhraseSuggestionContext.DirectCandidateGenerator generator = new PhraseSuggestionContext.DirectCandidateGenerator();
generator.setField(this.field);
transferIfNotNull(this.size, generator::size);
if (this.preFilter != null) {
generator.preFilter(mapperService.getIndexAnalyzers().get(this.preFilter));
if (generator.preFilter() == null) {
throw new IllegalArgumentException("Analyzer [" + this.preFilter + "] doesn't exists");
}
}
if (this.postFilter != null) {
generator.postFilter(mapperService.getIndexAnalyzers().get(this.postFilter));
if (generator.postFilter() == null) {
throw new IllegalArgumentException("Analyzer [" + this.postFilter + "] doesn't exists");
}
}
transferIfNotNull(this.accuracy, generator::accuracy);
if (this.suggestMode != null) {
generator.suggestMode(resolveSuggestMode(this.suggestMode));
}
if (this.sort != null) {
generator.sort(SortBy.resolve(this.sort));
}
if (this.stringDistance != null) {
generator.stringDistance(resolveDistance(this.stringDistance));
}
transferIfNotNull(this.maxEdits, generator::maxEdits);
if (generator.maxEdits() < 1 || generator.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new IllegalArgumentException("Illegal max_edits value " + generator.maxEdits());
}
transferIfNotNull(this.maxInspections, generator::maxInspections);
transferIfNotNull(this.maxTermFreq, generator::maxTermFreq);
transferIfNotNull(this.prefixLength, generator::prefixLength);
transferIfNotNull(this.minWordLength, generator::minWordLength);
transferIfNotNull(this.minDocFreq, generator::minDocFreq);
return generator;
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:39,代码来源:DirectCandidateGeneratorBuilder.java
示例7: floatToEdits
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/**
* Helper function to convert from deprecated "minimumSimilarity" fractions
* to raw edit distances.
*
* @param minimumSimilarity scaled similarity
* @param termLen length (in unicode codepoints) of the term.
* @return equivalent number of maxEdits
* @deprecated pass integer edit distances instead.
*/
@Deprecated
public static int floatToEdits(float minimumSimilarity, int termLen) {
if (minimumSimilarity >= 1f) {
return (int) Math.min(minimumSimilarity, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
} else if (minimumSimilarity == 0.0f) {
return 0; // 0 means exact, not infinite # of edits!
} else {
return Math.min((int) ((1D-minimumSimilarity) * termLen),
LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
}
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:21,代码来源:FuzzyQuery.java
示例8: parseDirectSpellcheckerSettings
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
public static boolean parseDirectSpellcheckerSettings(XContentParser parser, String fieldName,
DirectSpellcheckerSettings suggestion, ParseFieldMatcher parseFieldMatcher) throws IOException {
if ("accuracy".equals(fieldName)) {
suggestion.accuracy(parser.floatValue());
} else if (parseFieldMatcher.match(fieldName, Fields.SUGGEST_MODE)) {
suggestion.suggestMode(SuggestUtils.resolveSuggestMode(parser.text()));
} else if ("sort".equals(fieldName)) {
suggestion.sort(SuggestUtils.resolveSort(parser.text()));
} else if (parseFieldMatcher.match(fieldName, Fields.STRING_DISTANCE)) {
suggestion.stringDistance(SuggestUtils.resolveDistance(parser.text()));
} else if (parseFieldMatcher.match(fieldName, Fields.MAX_EDITS)) {
suggestion.maxEdits(parser.intValue());
if (suggestion.maxEdits() < 1 || suggestion.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new IllegalArgumentException("Illegal max_edits value " + suggestion.maxEdits());
}
} else if (parseFieldMatcher.match(fieldName, Fields.MAX_INSPECTIONS)) {
suggestion.maxInspections(parser.intValue());
} else if (parseFieldMatcher.match(fieldName, Fields.MAX_TERM_FREQ)) {
suggestion.maxTermFreq(parser.floatValue());
} else if (parseFieldMatcher.match(fieldName, Fields.PREFIX_LENGTH)) {
suggestion.prefixLength(parser.intValue());
} else if (parseFieldMatcher.match(fieldName, Fields.MIN_WORD_LENGTH)) {
suggestion.minQueryLength(parser.intValue());
} else if (parseFieldMatcher.match(fieldName, Fields.MIN_DOC_FREQ)) {
suggestion.minDocFreq(parser.floatValue());
} else {
return false;
}
return true;
}
开发者ID:baidu,项目名称:Elasticsearch,代码行数:31,代码来源:SuggestUtils.java
示例9: floatToEdits
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/**
* Helper function to convert from deprecated "minimumSimilarity" fractions
* to raw edit distances.
*
* @param minimumSimilarity
* scaled similarity
* @param termLen
* length (in unicode codepoints) of the term.
* @return equivalent number of maxEdits
* @deprecated pass integer edit distances instead.
*/
@Deprecated
public static int floatToEdits(final float minimumSimilarity,
final int termLen) {
if (minimumSimilarity >= 1f) {
return (int) Math.min(minimumSimilarity,
LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
} else if (minimumSimilarity == 0.0f) {
return 0; // 0 means exact, not infinite # of edits!
} else {
return Math.min((int) ((1D - minimumSimilarity) * termLen),
LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
}
}
开发者ID:quhfus,项目名称:DoSeR-Disambiguation,代码行数:25,代码来源:LearnToRankFuzzyQuery.java
示例10: LearnToRankFuzzyQuery
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/**
* Create a new FuzzyQuery that will match terms with an edit distance of at
* most <code>maxEdits</code> to <code>term</code>. If a
* <code>prefixLength</code> > 0 is specified, a common prefix of that
* length is also required.
*
* @param term
* the term to search for
* @param maxEdits
* must be >= 0 and <=
* {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
* @param prefixLength
* length of common (non-fuzzy) prefix
* @param maxExpansions
* the maximum number of terms to match. If this number is
* greater than {@link BooleanQuery#getMaxClauseCount} when the
* query is rewritten, then the maxClauseCount will be used
* instead.
* @param transpositions
* true if transpositions should be treated as a primitive edit
* operation. If this is false, comparisons will implement the
* classic Levenshtein algorithm.
*/
public LearnToRankFuzzyQuery(final Term term, final int maxEdits,
final int prefixLength, final int maxExpansions,
final boolean transpositions, final Similarity sim) {
super(term.field());
if ((maxEdits < 0)
|| (maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)) {
throw new IllegalArgumentException(
"maxEdits must be between 0 and "
+ LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
}
if (prefixLength < 0) {
throw new IllegalArgumentException(
"prefixLength cannot be negative.");
}
if (maxExpansions < 0) {
throw new IllegalArgumentException(
"maxExpansions cannot be negative.");
}
this.term = term;
this.maxEdits = maxEdits;
this.prefixLength = prefixLength;
this.transpositions = transpositions;
this.maxExpansions = maxExpansions;
setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(
maxExpansions, sim));
// setRewriteMethod(new
// LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(
// maxExpansions));
}
开发者ID:quhfus,项目名称:DoSeR-Disambiguation,代码行数:55,代码来源:LearnToRankFuzzyQuery.java
示例11: LearnToRankFuzzyQuery
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/**
* Create a new FuzzyQuery that will match terms with an edit distance of at
* most <code>maxEdits</code> to <code>term</code>. If a
* <code>prefixLength</code> > 0 is specified, a common prefix of that
* length is also required.
*
* @param term
* the term to search for
* @param maxEdits
* must be >= 0 and <=
* {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
* @param prefixLength
* length of common (non-fuzzy) prefix
* @param maxExpansions
* the maximum number of terms to match. If this number is
* greater than {@link BooleanQuery#getMaxClauseCount} when the
* query is rewritten, then the maxClauseCount will be used
* instead.
* @param transpositions
* true if transpositions should be treated as a primitive edit
* operation. If this is false, comparisons will implement the
* classic Levenshtein algorithm.
*/
public LearnToRankFuzzyQuery(Term term, int maxEdits, int prefixLength,
int maxExpansions, boolean transpositions, Similarity sim) {
super(term.field());
if (maxEdits < 0
|| maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new IllegalArgumentException(
"maxEdits must be between 0 and "
+ LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
}
if (prefixLength < 0) {
throw new IllegalArgumentException(
"prefixLength cannot be negative.");
}
if (maxExpansions < 0) {
throw new IllegalArgumentException(
"maxExpansions cannot be negative.");
}
this.term = term;
this.maxEdits = maxEdits;
this.prefixLength = prefixLength;
this.transpositions = transpositions;
this.maxExpansions = maxExpansions;
LearnToRankFuzzyQuery.sim = sim;
setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(maxExpansions));
// setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(
// maxExpansions));
}
开发者ID:quhfus,项目名称:DoSeR,代码行数:53,代码来源:LearnToRankFuzzyQuery.java
示例12: floatToEdits
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/**
* Helper function to convert from deprecated "minimumSimilarity" fractions
* to raw edit distances.
*
* @param minimumSimilarity
* scaled similarity
* @param termLen
* length (in unicode codepoints) of the term.
* @return equivalent number of maxEdits
* @deprecated pass integer edit distances instead.
*/
@Deprecated
public static int floatToEdits(float minimumSimilarity, int termLen) {
if (minimumSimilarity >= 1f) {
return (int) Math.min(minimumSimilarity,
LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
} else if (minimumSimilarity == 0.0f) {
return 0; // 0 means exact, not infinite # of edits!
} else {
return Math.min((int) ((1D - minimumSimilarity) * termLen),
LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
}
}
开发者ID:quhfus,项目名称:DoSeR,代码行数:24,代码来源:LearnToRankFuzzyQuery.java
示例13: XFuzzySuggester
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/**
* Creates a {@link FuzzySuggester} instance.
*
* @param indexAnalyzer Analyzer that will be used for
* analyzing suggestions while building the index.
* @param queryAnalyzer Analyzer that will be used for
* analyzing query text during lookup
* @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
* @param maxSurfaceFormsPerAnalyzedForm Maximum number of
* surface forms to keep for a single analyzed form.
* When there are too many surface forms we discard the
* lowest weighted ones.
* @param maxGraphExpansions Maximum number of graph paths
* to expand from the analyzed form. Set this to -1 for
* no limit.
* @param maxEdits must be >= 0 and <= {@link org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
* @param transpositions <code>true</code> if transpositions should be treated as a primitive
* edit operation. If this is false, comparisons will implement the classic
* Levenshtein algorithm.
* @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
* @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
* @param sepLabel separation label
* @param payloadSep payload separator byte
* @param endByte end byte marker byte
*/
public XFuzzySuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer,
int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength,
boolean unicodeAware, FST<PairOutputs.Pair<Long, BytesRef>> fst, boolean hasPayloads,
int maxAnalyzedPathsForOneInput, int sepLabel, int payloadSep, int endByte, int holeCharacter) {
super(indexAnalyzer, queryPrefix, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions,
true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter);
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new IllegalArgumentException(
"maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
}
if (nonFuzzyPrefix < 0) {
throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
}
if (minFuzzyLength < 0) {
throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
}
this.maxEdits = maxEdits;
this.transpositions = transpositions;
this.nonFuzzyPrefix = nonFuzzyPrefix;
this.minFuzzyLength = minFuzzyLength;
this.unicodeAware = unicodeAware;
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:50,代码来源:XFuzzySuggester.java
示例14: FuzzyTermsEnum
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/**
* Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
* length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity >
* <code>minSimilarity</code>.
* <p>
* After calling the constructor the enumeration is already pointing to the first
* valid term if such a term exists.
*
* @param terms Delivers terms.
* @param atts {@link AttributeSource} created by the rewrite method of {@link MultiTermQuery}
* thats contains information about competitive boosts during rewrite. It is also used
* to cache DFAs between segment transitions.
* @param term Pattern term.
* @param minSimilarity Minimum required similarity for terms from the reader. Pass an integer value
* representing edit distance. Passing a fraction is deprecated.
* @param prefixLength Length of required common prefix. Default value is 0.
* @throws IOException if there is a low-level IO error
*/
public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term,
final float minSimilarity, final int prefixLength, boolean transpositions) throws IOException {
if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
throw new IllegalArgumentException("fractional edit distances are not allowed");
if (minSimilarity < 0.0f)
throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
if(prefixLength < 0)
throw new IllegalArgumentException("prefixLength cannot be less than 0");
this.terms = terms;
this.term = term;
// convert the string into a utf32 int[] representation for fast comparisons
final String utf16 = term.text();
this.termText = new int[utf16.codePointCount(0, utf16.length())];
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
termText[j++] = cp = utf16.codePointAt(i);
this.termLength = termText.length;
this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class);
//The prefix could be longer than the word.
//It's kind of silly though. It means we must match the entire word.
this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
// if minSimilarity >= 1, we treat it as number of edits
if (minSimilarity >= 1f) {
this.minSimilarity = 0; // just driven by number of edits
maxEdits = (int) minSimilarity;
raw = true;
} else {
this.minSimilarity = minSimilarity;
// calculate the maximum k edits for this similarity
maxEdits = initialMaxDistance(this.minSimilarity, termLength);
raw = false;
}
if (transpositions && maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new UnsupportedOperationException("with transpositions enabled, distances > "
+ LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported ");
}
this.transpositions = transpositions;
this.scale_factor = 1.0f / (1.0f - this.minSimilarity);
this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
bottomTerm = maxBoostAtt.getCompetitiveTerm();
bottomChanged(null, true);
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:64,代码来源:FuzzyTermsEnum.java
示例15: FuzzySuggester
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/**
* Creates a {@link FuzzySuggester} instance.
*
* @param indexAnalyzer Analyzer that will be used for
* analyzing suggestions while building the index.
* @param queryAnalyzer Analyzer that will be used for
* analyzing query text during lookup
* @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
* @param maxSurfaceFormsPerAnalyzedForm Maximum number of
* surface forms to keep for a single analyzed form.
* When there are too many surface forms we discard the
* lowest weighted ones.
* @param maxGraphExpansions Maximum number of graph paths
* to expand from the analyzed form. Set this to -1 for
* no limit.
* @param preservePositionIncrements Whether position holes should appear in the automaton
* @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
* @param transpositions <code>true</code> if transpositions should be treated as a primitive
* edit operation. If this is false, comparisons will implement the classic
* Levenshtein algorithm.
* @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
* @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
* @param unicodeAware operate Unicode code points instead of bytes.
*/
public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer,
int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
boolean preservePositionIncrements, int maxEdits, boolean transpositions,
int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware) {
super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements);
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
}
if (nonFuzzyPrefix < 0) {
throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
}
if (minFuzzyLength < 0) {
throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
}
this.maxEdits = maxEdits;
this.transpositions = transpositions;
this.nonFuzzyPrefix = nonFuzzyPrefix;
this.minFuzzyLength = minFuzzyLength;
this.unicodeAware = unicodeAware;
}
开发者ID:europeana,项目名称:search,代码行数:46,代码来源:FuzzySuggester.java
示例16: FuzzySuggester
import org.apache.lucene.util.automaton.LevenshteinAutomata; //导入依赖的package包/类
/**
* Creates a {@link FuzzySuggester} instance.
*
* @param indexAnalyzer Analyzer that will be used for
* analyzing suggestions while building the index.
* @param queryAnalyzer Analyzer that will be used for
* analyzing query text during lookup
* @param options see {@link #EXACT_FIRST}, {@link #PRESERVE_SEP}
* @param maxSurfaceFormsPerAnalyzedForm Maximum number of
* surface forms to keep for a single analyzed form.
* When there are too many surface forms we discard the
* lowest weighted ones.
* @param maxGraphExpansions Maximum number of graph paths
* to expand from the analyzed form. Set this to -1 for
* no limit.
* @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} .
* @param transpositions <code>true</code> if transpositions should be treated as a primitive
* edit operation. If this is false, comparisons will implement the classic
* Levenshtein algorithm.
* @param nonFuzzyPrefix length of common (non-fuzzy) prefix (see default {@link #DEFAULT_NON_FUZZY_PREFIX}
* @param minFuzzyLength minimum length of lookup key before any edits are allowed (see default {@link #DEFAULT_MIN_FUZZY_LENGTH})
*/
public FuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer,
int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
int maxEdits, boolean transpositions, int nonFuzzyPrefix,
int minFuzzyLength) {
super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions);
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
}
if (nonFuzzyPrefix < 0) {
throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + nonFuzzyPrefix + ")");
}
if (minFuzzyLength < 0) {
throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + minFuzzyLength + ")");
}
this.maxEdits = maxEdits;
this.transpositions = transpositions;
this.nonFuzzyPrefix = nonFuzzyPrefix;
this.minFuzzyLength = minFuzzyLength;
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:43,代码来源:FuzzySuggester.java
注:本文中的org.apache.lucene.util.automaton.LevenshteinAutomata类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论