本文整理汇总了Java中org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute类的典型用法代码示例。如果您正苦于以下问题:Java PositionLengthAttribute类的具体用法?Java PositionLengthAttribute怎么用?Java PositionLengthAttribute使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
PositionLengthAttribute类属于org.apache.lucene.analysis.tokenattributes包,在下文中一共展示了PositionLengthAttribute类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
final Tokenizer tok = new WhitespaceTokenizer();
tok.setReader(new StringReader("dark sea green sea green"));
final SynonymMap.Builder builder = new SynonymMap.Builder(true);
addSynonym("dark sea green", "color", builder);
addSynonym("green", "color", builder);
addSynonym("dark sea", "color", builder);
addSynonym("sea green", "color", builder);
final SynonymMap synMap = builder.build();
final TokenStream ts = new SynonymFilter(tok, synMap, true);
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
final PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
final PositionLengthAttribute posLengthAtt = ts.addAttribute(PositionLengthAttribute.class);
ts.reset();
int pos = -1;
while (ts.incrementToken()) {
pos += posIncrAtt.getPositionIncrement();
System.out.println("term=" + termAtt + ", pos=" + pos + ", posLen=" + posLengthAtt.getPositionLength());
}
ts.end();
ts.close();
}
开发者ID:shaie,项目名称:lucenelab,代码行数:27,代码来源:SynonymFilterExample.java
示例2: simpleAnalyze
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
private static List<AnalyzeResponse.AnalyzeToken> simpleAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>();
int lastPosition = -1;
int lastOffset = 0;
for (String text : request.text()) {
try (TokenStream stream = analyzer.tokenStream(field, text)) {
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);
while (stream.incrementToken()) {
int increment = posIncr.getPositionIncrement();
if (increment > 0) {
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), null));
}
stream.end();
lastOffset += offset.endOffset();
lastPosition += posIncr.getPositionIncrement();
lastPosition += analyzer.getPositionIncrementGap(field);
lastOffset += analyzer.getOffsetGap(field);
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e);
}
}
return tokens;
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:35,代码来源:TransportAnalyzeAction.java
示例3: analyze
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
try {
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);
while (stream.incrementToken()) {
int increment = posIncr.getPositionIncrement();
if (increment > 0) {
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), extractExtendedAttributes(stream, includeAttributes)));
}
stream.end();
lastOffset += offset.endOffset();
lastPosition += posIncr.getPositionIncrement();
lastPosition += analyzer.getPositionIncrementGap(field);
lastOffset += analyzer.getOffsetGap(field);
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e);
} finally {
IOUtils.closeWhileHandlingException(stream);
}
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:32,代码来源:TransportAnalyzeAction.java
示例4: Lucene43NGramTokenFilter
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
/**
* Creates Lucene43NGramTokenFilter with given min and max n-grams.
* @param input {@link org.apache.lucene.analysis.TokenStream} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public Lucene43NGramTokenFilter(TokenStream input, int minGram, int maxGram) {
super(new CodepointCountFilter(input, minGram, Integer.MAX_VALUE));
this.charUtils = CharacterUtils.getJava4Instance();
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
this.minGram = minGram;
this.maxGram = maxGram;
posIncAtt = new PositionIncrementAttribute() {
@Override
public void setPositionIncrement(int positionIncrement) {}
@Override
public int getPositionIncrement() {
return 0;
}
};
posLenAtt = new PositionLengthAttribute() {
@Override
public void setPositionLength(int positionLength) {}
@Override
public int getPositionLength() {
return 0;
}
};
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:36,代码来源:Lucene43NGramTokenFilter.java
示例5: setAttributes
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
private void setAttributes() {
charTermAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
posLenAtt = addAttribute(PositionLengthAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
posAtt = addAttribute(PartOfSpeechAttribute.class);
semanticClassAtt = addAttribute(SemanticClassAttribute.class);
}
开发者ID:jaepil,项目名称:mecab-ko-lucene-analyzer,代码行数:10,代码来源:MeCabKoTokenizer.java
示例6: positionLength
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
protected Matcher<TokenStream> positionLength(final int expectedLength) {
return new TypeSafeMatcher<TokenStream>() {
@Override
public void describeTo(Description description) {
description.appendText("positionLength=").appendValue(expectedLength);
}
@Override
protected boolean matchesSafely(TokenStream stream) {
PositionLengthAttribute attr = stream.addAttribute(PositionLengthAttribute.class);
return attr.getPositionLength() == expectedLength;
}
};
}
开发者ID:shopping24,项目名称:solr-analyzers,代码行数:15,代码来源:AbstractTokenFilterTest.java
示例7: TokenStreamToDot
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
/** If inputText is non-null, and the TokenStream has
* offsets, we include the surface form in each arc's
* label. */
public TokenStreamToDot(String inputText, TokenStream in, PrintWriter out) {
this.in = in;
this.out = out;
this.inputText = inputText;
termAtt = in.addAttribute(CharTermAttribute.class);
posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
if (in.hasAttribute(OffsetAttribute.class)) {
offsetAtt = in.addAttribute(OffsetAttribute.class);
} else {
offsetAtt = null;
}
}
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TokenStreamToDot.java
示例8: printResultOfTokenStream
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
public static void printResultOfTokenStream(PrintStream out, TokenStream ts) throws IOException {
CharTermAttribute termAttr = ts.getAttribute(CharTermAttribute.class);
TypeAttribute typeAttr = ts.getAttribute(TypeAttribute.class);
OffsetAttribute offAttr = ts.getAttribute(OffsetAttribute.class);
PositionIncrementAttribute posIncAttr = ts.getAttribute(PositionIncrementAttribute.class);
PositionLengthAttribute posLenAttr = ts.getAttribute(PositionLengthAttribute.class);
ts.reset();
Table<String, String, String> contentTable = Tables.newCustomTable(new LinkedHashMap<String, Map<String, String>>(),
new Supplier<Map<String, String>>() {
@Override
public Map<String, String> get() {
return Maps.newLinkedHashMap();
}
});
int lineNo = 1;
int pos = 0;
while (ts.incrementToken()) {
String lineId = lineNo + ".";
contentTable.put(lineId, "term", termAttr.toString());
contentTable.put(lineId, "type", typeAttr.type());
contentTable.put(lineId, "startOffset", offAttr.startOffset() + "");
contentTable.put(lineId, "endOffset", offAttr.endOffset() + "");
contentTable.put(lineId, "posInc", posIncAttr.getPositionIncrement() + "");
contentTable.put(lineId, "posLen", posLenAttr.getPositionLength() + "");
pos += posIncAttr.getPositionIncrement();
contentTable.put(lineId, "pos", pos + "");
lineNo++;
}
printTable(out, contentTable);
}
开发者ID:thihy,项目名称:cc-analysis,代码行数:32,代码来源:AnalysisTestHelper.java
示例9: testNGrams
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws IOException {
// convert the string to code points
final int[] codePoints = toCodePoints(s);
final int[] offsets = new int[codePoints.length + 1];
for (int i = 0; i < codePoints.length; ++i) {
offsets[i+1] = offsets[i] + Character.charCount(codePoints[i]);
}
final TokenStream grams = new NGramTokenizer(Version.LATEST, new StringReader(s), minGram, maxGram, edgesOnly) {
@Override
protected boolean isTokenChar(int chr) {
return nonTokenChars.indexOf(chr) < 0;
}
};
final CharTermAttribute termAtt = grams.addAttribute(CharTermAttribute.class);
final PositionIncrementAttribute posIncAtt = grams.addAttribute(PositionIncrementAttribute.class);
final PositionLengthAttribute posLenAtt = grams.addAttribute(PositionLengthAttribute.class);
final OffsetAttribute offsetAtt = grams.addAttribute(OffsetAttribute.class);
grams.reset();
for (int start = 0; start < codePoints.length; ++start) {
nextGram:
for (int end = start + minGram; end <= start + maxGram && end <= codePoints.length; ++end) {
if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) {
// not on an edge
continue nextGram;
}
for (int j = start; j < end; ++j) {
if (!isTokenChar(nonTokenChars, codePoints[j])) {
continue nextGram;
}
}
assertTrue(grams.incrementToken());
assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt));
assertEquals(1, posIncAtt.getPositionIncrement());
assertEquals(1, posLenAtt.getPositionLength());
assertEquals(offsets[start], offsetAtt.startOffset());
assertEquals(offsets[end], offsetAtt.endOffset());
}
}
assertFalse(grams.incrementToken());
grams.end();
assertEquals(s.length(), offsetAtt.startOffset());
assertEquals(s.length(), offsetAtt.endOffset());
}
开发者ID:europeana,项目名称:search,代码行数:44,代码来源:NGramTokenizerTest.java
示例10: getGraph
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
/**
* Now the graph is more interesting! For each token (arc), the PositionIncrementAttribute tells
* us how many positions (nodes) ahead this arc starts from, while the new (as of 3.6.0)
* PositionLengthAttribute tells us how many positions (nodes) ahead the arc arrives to.
*/
private static String getGraph(String input) throws IOException {
final Tokenizer inputStream = new WhitespaceTokenizer();
inputStream.setReader(new StringReader(input));
// final TokenStream inputStream = new LowerCaseFilter(in);
TokenStream tokenStream = new SynonymGraphFilter(inputStream, builder.build(), false);
PositionIncrementAttribute posIncAtt =
tokenStream.addAttribute(PositionIncrementAttribute.class);
PositionLengthAttribute posLenAtt = tokenStream.addAttribute(PositionLengthAttribute.class);
CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
tokenStream.reset();
int srcNode = -1;
int destNode;
StringBuilder b = new StringBuilder();
b.append("digraph Automaton {\n");
b.append(" initial [shape=plaintext,label=\"\"]\n");
b.append(" initial -> 0\n");
while (tokenStream.incrementToken()) {
int posInc = posIncAtt.getPositionIncrement();
if (posInc != 0) {
srcNode += posInc;
b.append(" ");
b.append(srcNode);
b.append(" [shape=circle,label=\"" + srcNode + "\"]\n");
}
destNode = srcNode + posLenAtt.getPositionLength();
b.append(" ");
b.append(srcNode);
b.append(" -> ");
b.append(destNode);
b.append(" [label=\"");
b.append(termAtt);
b.append("\"");
b.append("]\n");
}
tokenStream.end();
tokenStream.close();
b.append('}');
return b.toString();
}
开发者ID:MysterionRise,项目名称:information-retrieval-adventure,代码行数:49,代码来源:EntradaSalida.java
示例11: testNGrams
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; //导入依赖的package包/类
static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws IOException {
// convert the string to code points
final int[] codePoints = toCodePoints(s);
final int[] offsets = new int[codePoints.length + 1];
for (int i = 0; i < codePoints.length; ++i) {
offsets[i+1] = offsets[i] + Character.charCount(codePoints[i]);
}
final TokenStream grams = new NGramTokenizer(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly) {
@Override
protected boolean isTokenChar(int chr) {
return nonTokenChars.indexOf(chr) < 0;
}
};
final CharTermAttribute termAtt = grams.addAttribute(CharTermAttribute.class);
final PositionIncrementAttribute posIncAtt = grams.addAttribute(PositionIncrementAttribute.class);
final PositionLengthAttribute posLenAtt = grams.addAttribute(PositionLengthAttribute.class);
final OffsetAttribute offsetAtt = grams.addAttribute(OffsetAttribute.class);
grams.reset();
for (int start = 0; start < codePoints.length; ++start) {
nextGram:
for (int end = start + minGram; end <= start + maxGram && end <= codePoints.length; ++end) {
if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) {
// not on an edge
continue nextGram;
}
for (int j = start; j < end; ++j) {
if (!isTokenChar(nonTokenChars, codePoints[j])) {
continue nextGram;
}
}
assertTrue(grams.incrementToken());
assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt));
assertEquals(1, posIncAtt.getPositionIncrement());
assertEquals(1, posLenAtt.getPositionLength());
assertEquals(offsets[start], offsetAtt.startOffset());
assertEquals(offsets[end], offsetAtt.endOffset());
}
}
assertFalse(grams.incrementToken());
grams.end();
assertEquals(s.length(), offsetAtt.startOffset());
assertEquals(s.length(), offsetAtt.endOffset());
}
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:44,代码来源:NGramTokenizerTest.java
注:本文中的org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论