本文整理汇总了Java中org.apache.lucene.util.IntsRefBuilder类的典型用法代码示例。如果您正苦于以下问题:Java IntsRefBuilder类的具体用法?Java IntsRefBuilder怎么用?Java IntsRefBuilder使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
IntsRefBuilder类属于org.apache.lucene.util包,在下文中一共展示了IntsRefBuilder类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: build
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
/** Builds the NormalizeCharMap; call this once you
* are done calling {@link #add}. */
public NormalizeCharMap build() {
final FST<CharsRef> map;
try {
final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
builder.add(Util.toUTF16(ent.getKey(), scratch),
new CharsRef(ent.getValue()));
}
map = builder.finish();
pendingPairs.clear();
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)
throw new RuntimeException(ioe);
}
return new NormalizeCharMap(map);
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:23,代码来源:NormalizeCharMap.java
示例2: parseConversions
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
Map<String,String> mappings = new TreeMap<>();
for (int i = 0; i < num; i++) {
String line = reader.readLine();
String parts[] = line.split("\\s+");
if (parts.length != 3) {
throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
}
if (mappings.put(parts[1], parts[2]) != null) {
throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
}
}
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
IntsRefBuilder scratchInts = new IntsRefBuilder();
for (Map.Entry<String,String> entry : mappings.entrySet()) {
Util.toUTF16(entry.getKey(), scratchInts);
builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
}
return builder.finish();
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:25,代码来源:Dictionary.java
示例3: build
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
/**
* Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
* @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
* @throws IOException if an {@link IOException} occurs;
*/
public StemmerOverrideMap build() throws IOException {
ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>(
FST.INPUT_TYPE.BYTE4, outputs);
final int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
IntsRefBuilder intsSpare = new IntsRefBuilder();
final int size = hash.size();
BytesRef spare = new BytesRef();
for (int i = 0; i < size; i++) {
int id = sort[i];
BytesRef bytesRef = hash.get(id, spare);
intsSpare.copyUTF8Bytes(bytesRef);
builder.add(intsSpare.get(), new BytesRef(outputValues.get(id)));
}
return new StemmerOverrideMap(builder.finish(), ignoreCase);
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:22,代码来源:StemmerOverrideFilter.java
示例4: addStartPaths
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
/** Adds all leaving arcs, including 'finished' arc, if
* the node is final, from this node into the queue. */
public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input) throws IOException {
// De-dup NO_OUTPUT since it must be a singleton:
if (startOutput.equals(fst.outputs.getNoOutput())) {
startOutput = fst.outputs.getNoOutput();
}
FSTPath<T> path = new FSTPath<>(startOutput, node, input);
fst.readFirstTargetArc(node, path.arc, bytesReader);
//System.out.println("add start paths");
// Bootstrap: find the min starting arc
while (true) {
if (allowEmptyString || path.arc.label != FST.END_LABEL) {
addIfCompetitive(path);
}
if (path.arc.isLast()) {
break;
}
fst.readNextArc(path.arc, bytesReader);
}
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:26,代码来源:Util.java
示例5: writeFST
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
meta.writeVInt(field.number);
meta.writeByte(FST);
meta.writeLong(data.getFilePointer());
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
IntsRefBuilder scratch = new IntsRefBuilder();
long ord = 0;
for (BytesRef v : values) {
builder.add(Util.toIntsRef(v, scratch), ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
meta.writeVLong(ord);
}
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:MemoryDocValuesConsumer.java
示例6: buildAutomaton
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
/**
* Builds the final automaton from a list of entries.
*/
private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException {
// Build the automaton.
final Outputs<Object> outputs = NoOutputs.getSingleton();
final Object empty = outputs.getNoOutput();
final Builder<Object> builder = new Builder<>(
FST.INPUT_TYPE.BYTE1, 0, 0, true, true,
shareMaxTailLength, outputs, false,
PackedInts.DEFAULT, true, 15);
BytesRefBuilder scratch = new BytesRefBuilder();
BytesRef entry;
final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
int count = 0;
BytesRefIterator iter = sorter.iterator();
while((entry = iter.next()) != null) {
count++;
if (scratch.get().compareTo(entry) != 0) {
builder.add(Util.toIntsRef(entry, scratchIntsRef), empty);
scratch.copyBytes(entry);
}
}
return count == 0 ? null : builder.finish();
}
开发者ID:europeana,项目名称:search,代码行数:28,代码来源:FSTCompletionBuilder.java
示例7: testDuplicateFSAString
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
public void testDuplicateFSAString() throws Exception {
String str = "foobar";
final Outputs<Object> outputs = NoOutputs.getSingleton();
final Builder<Object> b = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
IntsRefBuilder ints = new IntsRefBuilder();
for(int i=0; i<10; i++) {
b.add(Util.toIntsRef(new BytesRef(str), ints), outputs.getNoOutput());
}
FST<Object> fst = b.finish();
// count the input paths
int count = 0;
final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<>(fst);
while(fstEnum.next()!=null) {
count++;
}
assertEquals(1, count);
assertNotNull(Util.get(fst, new BytesRef(str)));
assertNull(Util.get(fst, new BytesRef("foobaz")));
}
开发者ID:europeana,项目名称:search,代码行数:22,代码来源:TestFSTs.java
示例8: testInternalFinalState
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
public void testInternalFinalState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final boolean willRewrite = random().nextBoolean();
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, willRewrite, PackedInts.DEFAULT, true, 15);
builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRefBuilder()), outputs.getNoOutput());
builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRefBuilder()), outputs.getNoOutput());
final FST<Long> fst = builder.finish();
StringWriter w = new StringWriter();
//Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
Util.toDot(fst, w, false, false);
w.close();
//System.out.println(w.toString());
// check for accept state at label t
assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1);
// check for accept state at label n
assertTrue(w.toString().indexOf("[label=\"n\" style=\"bold\"") != -1);
}
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:TestFSTs.java
示例9: testLargeOutputsOnArrayArcs
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
public void testLargeOutputsOnArrayArcs() throws Exception {
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final Builder<BytesRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
final byte[] bytes = new byte[300];
final IntsRefBuilder input = new IntsRefBuilder();
input.append(0);
final BytesRef output = new BytesRef(bytes);
for(int arc=0;arc<6;arc++) {
input.setIntAt(0, arc);
output.bytes[0] = (byte) arc;
builder.add(input.get(), BytesRef.deepCopyOf(output));
}
final FST<BytesRef> fst = builder.finish();
for(int arc=0;arc<6;arc++) {
input.setIntAt(0, arc);
final BytesRef result = Util.get(fst, input.get());
assertNotNull(result);
assertEquals(300, result.length);
assertEquals(result.bytes[result.offset], arc);
for(int byteIDX=1;byteIDX<result.length;byteIDX++) {
assertEquals(0, result.bytes[result.offset+byteIDX]);
}
}
}
开发者ID:europeana,项目名称:search,代码行数:27,代码来源:TestFSTs.java
示例10: split
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
/**
* Splits the input sequence of characters into separate words if this sequence is
* potentially a compound word.
*
* @param word The word to be split.
* @return Returns <code>null</code> if this word is not recognized at all. Returns a
* character sequence with '.'-delimited compound chunks (if ambiguous
* interpretations are possible, they are separated by a ',' character). The
* returned buffer will change with each call to <code>split</code> so copy the
* content if needed.
*/
public CharSequence split(CharSequence word) {
try {
StringBuilder builder = new StringBuilder();
builder.append(word);
builder.reverse();
for (int i = builder.length(); --i > 0; ) {
builder.setCharAt(i, Character.toLowerCase(builder.charAt(i)));
}
IntsRefBuilder utf32Builder = new IntsRefBuilder();
IntsRef utf32 = fromUTF16ToUTF32(builder, utf32Builder).get();
builder.setLength(0);
Deque<Chunk> chunks = new ArrayDeque<>();
matchWord(utf32, utf32.offset, builder, chunks);
return builder.length() == 0 ? null : builder;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
开发者ID:jprante,项目名称:elasticsearch-plugin-bundle,代码行数:30,代码来源:FstDecompounder.java
示例11: append
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
while((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
}
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:11,代码来源:BlockTreeTermsWriter.java
示例12: affixFST
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
IntsRefBuilder scratch = new IntsRefBuilder();
for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
Util.toUTF32(entry.getKey(), scratch);
List<Integer> entries = entry.getValue();
IntsRef output = new IntsRef(entries.size());
for (Integer c : entries) {
output.ints[output.length++] = c;
}
builder.add(scratch.get(), output);
}
return builder.finish();
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:16,代码来源:Dictionary.java
示例13: shortestPaths
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
/** Starting from node, find the top N min cost
* completions to a final node. */
public static <T> TopResults<T> shortestPaths(FST<T> fst, FST.Arc<T> fromNode, T startOutput, Comparator<T> comparator, int topN,
boolean allowEmptyString) throws IOException {
// All paths are kept, so we can pass topN for
// maxQueueDepth and the pruning is admissible:
TopNSearcher<T> searcher = new TopNSearcher<>(fst, topN, topN, comparator);
// since this search is initialized with a single start node
// it is okay to start with an empty input path here
searcher.addStartPaths(fromNode, startOutput, allowEmptyString, new IntsRefBuilder());
return searcher.search();
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:15,代码来源:Util.java
示例14: toUTF16
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
/** Just maps each UTF16 unit (char) to the ints in an
* IntsRef. */
public static IntsRef toUTF16(CharSequence s, IntsRefBuilder scratch) {
final int charLimit = s.length();
scratch.setLength(charLimit);
scratch.grow(charLimit);
for (int idx = 0; idx < charLimit; idx++) {
scratch.setIntAt(idx, (int) s.charAt(idx));
}
return scratch.get();
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:12,代码来源:Util.java
示例15: toUTF32
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
/** Decodes the Unicode codepoints from the provided
* CharSequence and places them in the provided scratch
* IntsRef, which must not be null, returning it. */
public static IntsRef toUTF32(CharSequence s, IntsRefBuilder scratch) {
int charIdx = 0;
int intIdx = 0;
final int charLimit = s.length();
while(charIdx < charLimit) {
scratch.grow(intIdx+1);
final int utf32 = Character.codePointAt(s, charIdx);
scratch.setIntAt(intIdx, utf32);
charIdx += Character.charCount(utf32);
intIdx++;
}
scratch.setLength(intIdx);
return scratch.get();
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:18,代码来源:Util.java
示例16: toIntsRef
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
/** Just takes unsigned byte values from the BytesRef and
* converts into an IntsRef. */
public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) {
scratch.clear();
for(int i=0;i<input.length;i++) {
scratch.append(input.bytes[i+input.offset] & 0xFF);
}
return scratch.get();
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:10,代码来源:Util.java
示例17: append
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
private void append(Builder<Output> builder, FST<Output> subIndex, long termOrdOffset, IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<Output> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<Output> indexEnt;
while ((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
Output output = indexEnt.output;
long blockTermCount = output.endOrd - output.startOrd + 1;
Output newOutput = FST_OUTPUTS.newOutput(output.bytes, termOrdOffset+output.startOrd, output.endOrd-termOrdOffset);
//System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output + " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput=" + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
}
}
开发者ID:europeana,项目名称:search,代码行数:15,代码来源:OrdsBlockTreeTermsWriter.java
示例18: loadTermsIndex
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
private void loadTermsIndex() throws IOException {
if (fst == null) {
IndexInput clone = in.clone();
clone.seek(indexStart);
fst = new FST<>(clone, fstOutputs);
clone.close();
/*
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
Util.toDot(fst, w, false, false);
System.out.println("FST INDEX: SAVED to " + dotFileName);
w.close();
*/
if (indexDivisor > 1) {
// subsample
final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
BytesRefFSTEnum.InputOutput<Long> result;
int count = indexDivisor;
while((result = fstEnum.next()) != null) {
if (count == indexDivisor) {
builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output);
count = 0;
}
count++;
}
fst = builder.finish();
}
}
}
开发者ID:europeana,项目名称:search,代码行数:35,代码来源:VariableGapTermsIndexReader.java
示例19: Path
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
/** Sole constructor. */
public Path(int state, FST.Arc<T> fstNode, T output, IntsRefBuilder input) {
this.state = state;
this.fstNode = fstNode;
this.output = output;
this.input = input;
}
开发者ID:europeana,项目名称:search,代码行数:8,代码来源:FSTUtil.java
示例20: testListOfOutputs
import org.apache.lucene.util.IntsRefBuilder; //导入依赖的package包/类
public void testListOfOutputs() throws Exception {
PositiveIntOutputs _outputs = PositiveIntOutputs.getSingleton();
ListOfOutputs<Long> outputs = new ListOfOutputs<>(_outputs);
final Builder<Object> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
// Add the same input more than once and the outputs
// are merged:
builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 1L);
builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 3L);
builder.add(Util.toIntsRef(new BytesRef("a"), scratch), 0L);
builder.add(Util.toIntsRef(new BytesRef("b"), scratch), 17L);
final FST<Object> fst = builder.finish();
Object output = Util.get(fst, new BytesRef("a"));
assertNotNull(output);
List<Long> outputList = outputs.asList(output);
assertEquals(3, outputList.size());
assertEquals(1L, outputList.get(0).longValue());
assertEquals(3L, outputList.get(1).longValue());
assertEquals(0L, outputList.get(2).longValue());
output = Util.get(fst, new BytesRef("b"));
assertNotNull(output);
outputList = outputs.asList(output);
assertEquals(1, outputList.size());
assertEquals(17L, outputList.get(0).longValue());
}
开发者ID:europeana,项目名称:search,代码行数:29,代码来源:TestFSTsMisc.java
注:本文中的org.apache.lucene.util.IntsRefBuilder类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论