本文整理汇总了Java中org.apache.lucene.index.TermFreqVector类的典型用法代码示例。如果您正苦于以下问题:Java TermFreqVector类的具体用法?Java TermFreqVector怎么用?Java TermFreqVector使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TermFreqVector类属于org.apache.lucene.index包,在下文中一共展示了TermFreqVector类的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: computeMultivaluedTFV
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
private static Map<String, FacetCounter> computeMultivaluedTFV(ReaderAbstract reader, String fieldName,
DocIdInterface docIdInterface) throws IOException, SearchLibException {
final Map<String, FacetCounter> termMap = new LinkedHashMap<>();
if (docIdInterface.getSize() == 0)
return termMap;
for (int docId : docIdInterface.getIds()) {
final TermFreqVector tfv = reader.getTermFreqVector(docId, fieldName);
if (tfv == null)
continue;
final String[] terms = tfv.getTerms();
final int[] freqs = tfv.getTermFrequencies();
if (terms == null || freqs == null)
continue;
int i = 0;
for (String term : terms) {
if (freqs[i++] > 0) {
final FacetCounter facetItem = termMap.get(term);
if (facetItem == null)
termMap.put(term, new FacetCounter(1));
else
facetItem.increment();
}
}
}
return termMap;
}
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:27,代码来源:Facet.java
示例2: getTermsVectorFields
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public Set<FieldValue> getTermsVectorFields(int docId, Set<String> fieldNameSet) throws IOException {
Set<FieldValue> fieldValueList = new HashSet<FieldValue>();
for (String fieldName : fieldNameSet) {
TermFreqVector termFreqVector = indexReader.getTermFreqVector(docId, fieldName);
if (termFreqVector == null)
continue;
String[] terms = termFreqVector.getTerms();
if (terms == null)
continue;
FieldValueItem[] fieldValueItem = new FieldValueItem[terms.length];
int i = 0;
for (String term : terms)
fieldValueItem[i++] = new FieldValueItem(FieldValueOriginEnum.TERM_VECTOR, term);
fieldValueList.add(new FieldValue(fieldName, fieldValueItem));
}
return fieldValueList;
}
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:18,代码来源:ReaderLocal.java
示例3: populate
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
@Override
public void populate(List<IndexDocumentResult> indexDocuments) throws IOException, SearchLibException {
SchemaFieldList schemaFieldList = request.getConfig().getSchema().getFieldList();
for (int docId : docArray) {
IndexDocumentResult indexDocument = new IndexDocumentResult(schemaFieldList.size());
Map<String, FieldValue> storedFieldMap = reader.getDocumentStoredField(docId);
for (SchemaField schemaField : schemaFieldList) {
String fieldName = schemaField.getName();
List<IndexTerm> indexTermList = null;
if (schemaField.checkIndexed(Indexed.YES)) {
if (schemaField.getTermVector() == TermVector.NO) {
indexTermList = IndexTerm.toList(reader, fieldName, docId);
} else {
TermFreqVector termFreqVector = reader.getTermFreqVector(docId, fieldName);
indexTermList = IndexTerm.toList(termFreqVector);
}
}
IndexField indexField = new IndexField(fieldName, storedFieldMap.get(fieldName), indexTermList);
indexDocument.add(indexField);
}
indexDocuments.add(indexDocument);
}
}
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:24,代码来源:ResultDocuments.java
示例4: buildCategoryVectors
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
private void buildCategoryVectors() throws IOException {
IndexReader reader = DirectoryReader.open(TestUtil.getBookIndexDirectory());
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
if (!reader.isDeleted(i)) {
Document doc = reader.document(i);
String category = doc.get("category");
Map vectorMap = (Map) categoryMap.get(category);
if (vectorMap == null) {
vectorMap = new TreeMap();
categoryMap.put(category, vectorMap);
}
TermFreqVector termFreqVector =
reader.getTermFreqVector(i, "subject");
addTermFreqToMap(vectorMap, termFreqVector);
}
}
}
开发者ID:xuzhikethinker,项目名称:t4f-data,代码行数:24,代码来源:CategorizerTest.java
示例5: addTermFreqToMap
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
private void addTermFreqToMap(Map vectorMap,
TermFreqVector termFreqVector) {
String[] terms = termFreqVector.getTerms();
int[] freqs = termFreqVector.getTermFrequencies();
for (int i = 0; i < terms.length; i++) {
String term = terms[i];
if (vectorMap.containsKey(term)) {
Integer value = (Integer) vectorMap.get(term);
vectorMap.put(term,
new Integer(value.intValue() + freqs[i]));
} else {
vectorMap.put(term, new Integer(freqs[i]));
}
}
}
开发者ID:xuzhikethinker,项目名称:t4f-data,代码行数:18,代码来源:CategorizerTest.java
示例6: putTermFreqVectors
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public void putTermFreqVectors(final int[] docIds, final String field,
final Collection<TermFreqVector> termFreqVectors) throws IOException {
if (termFreqVectors == null || docIds == null || docIds.length == 0)
return;
for (int docId : docIds)
termFreqVectors.add(indexReader.getTermFreqVector(docId, field));
}
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:8,代码来源:ReaderLocal.java
示例7: putTermVectors
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
@Override
public void putTermVectors(int[] docIds, String field, Collection<String[]> termVectors) throws IOException {
if (docIds == null || docIds.length == 0 || field == null || termVectors == null)
return;
List<TermFreqVector> termFreqVectors = new ArrayList<TermFreqVector>(docIds.length);
putTermFreqVectors(docIds, field, termFreqVectors);
for (TermFreqVector termFreqVector : termFreqVectors)
termVectors.add(termFreqVector.getTerms());
}
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:10,代码来源:ReaderLocal.java
示例8: getTermFreqVector
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
@Override
final public TermFreqVector getTermFreqVector(final int docId, final String field)
throws IOException, SearchLibException {
checkOnline(true);
ReaderLocal reader = acquire();
try {
return reader.getTermFreqVector(docId, field);
} finally {
release(reader);
}
}
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:12,代码来源:IndexSingle.java
示例9: getTermPositionVector
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
private static final TermPositionVector getTermPositionVector(
final String[] terms, final ReaderInterface readerInterface,
final int docId, final String field, List<FieldValueItem> values,
CompiledAnalyzer analyzer, Timer timer) throws IOException,
SearchLibException, ParseException, SyntaxError {
TermFreqVector termFreqVector = readerInterface.getTermFreqVector(
docId, field);
if (termFreqVector != null)
if (termFreqVector instanceof TermPositionVector)
return (TermPositionVector) termFreqVector;
if (analyzer == null)
return null;
SnippetTermPositionVector stpv = new SnippetTermPositionVector(field,
terms);
int positionOffset = 0;
int characterOffset = 0;
List<TokenTerm> tokenTerms = new ArrayList<TokenTerm>();
for (FieldValueItem fieldValueItem : values) {
if (fieldValueItem.value == null)
continue;
analyzer.populate(fieldValueItem.value, tokenTerms);
positionOffset = stpv.addCollection(tokenTerms, characterOffset,
positionOffset);
characterOffset += fieldValueItem.value.length() + 1;
tokenTerms.clear();
}
stpv.compile();
return stpv;
}
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:30,代码来源:SnippetVectors.java
示例10: toList
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public final static List<IndexTerm> toList(TermFreqVector termVector) {
if (termVector == null)
return null;
String[] terms = termVector.getTerms();
if (terms == null)
return null;
int[] frequencies = termVector.getTermFrequencies();
List<IndexTerm> indexTerms = new ArrayList<IndexTerm>(terms.length);
if (termVector instanceof TermPositionVector)
toListPosition((TermPositionVector) termVector, terms, frequencies, indexTerms);
else
toListFreq(termVector, terms, frequencies, indexTerms);
return indexTerms;
}
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:15,代码来源:IndexDocumentResult.java
示例11: toListFreq
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
private final static void toListFreq(TermFreqVector termVector, String[] terms, int[] frequencies,
List<IndexTerm> indexTerms) {
int i = 0;
for (String term : terms) {
IndexTerm indexTerm = new IndexTerm(term, frequencies[i], null, null);
indexTerms.add(indexTerm);
i++;
}
}
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:10,代码来源:IndexDocumentResult.java
示例12: getCosineSimilarityMatrix
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public static DocVector[] getCosineSimilarityMatrix(List<String> fileSentences) throws IOException{
RAMDirectory ramDir = new RAMDirectory();
FileReader fr=new FileReader(new File("lib/stoplists/en.txt"));
// Set<String> stopWords = new HashSet<String>(FileUtils.readLines(new File("stop-words.txt")));
Analyzer analyzer = new StopAnalyzer(Version.LUCENE_36, fr );
//Index the full text of both documents
//IndexWriter writer = new IndexWriter(ramDir, new StandardAnalyzer(Version.LUCENE_36), true, IndexWriter.MaxFieldLength.UNLIMITED);
IndexWriter writer =new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_36, analyzer));
for (String s:fileSentences)
{
Document doc1 = new Document();
StringReader d1reader=new StringReader(s);
doc1.add(new Field("contents", d1reader, TermVector.YES));
writer.addDocument(doc1);
}
// writer.commit();
writer.close();
DocVector[] docs = new DocVector[fileSentences.size()];
//Build a term vector for each document
IndexReader RAMreader = IndexReader.open(ramDir);
Map<String,Integer> terms = new HashMap<String,Integer>();
TermEnum termEnum = RAMreader.terms(new Term("contents"));
//System.out.println(RAMreader.numDocs());
int pos = 0;
while (termEnum.next()) {
Term term = termEnum.term();
if (!"contents".equals(term.field()))
break;
terms.put(term.text(), pos++);
}
//System.out.println("Num terms:"+terms.size());
for(int i=0;i<fileSentences.size();i++)
{
TermFreqVector[] tfvs = RAMreader.getTermFreqVectors(i);
docs[i]=new DocVector(terms);
if (tfvs==null)
continue;
for (TermFreqVector tfv : tfvs)
{
String[] termTexts = tfv.getTerms();
int[] termFreqs = tfv.getTermFrequencies();
for (int j = 0; j < termTexts.length; j++) {
double idfValue=getIDF(RAMreader,termTexts[j]);
double tfIdfValue=termFreqs[j]*idfValue;
docs[i].setEntry(termTexts[j], tfIdfValue);
}
}
docs[i].normalize();
}
RAMreader.close();
ramDir.close();
//ramDir.close();
//System.out.println(RAMreader.numDocs());
//System.out.println("Similarity:" + calcCosineSimilarity(docs[5], docs[19]));
return docs;
}
开发者ID:siddBanPsu,项目名称:WikiKreator,代码行数:71,代码来源:CosineDocumentSimilarity.java
示例13: getTermFreqVector
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public TermFreqVector getTermFreqVector(int docNumber, String field)
throws IOException
{
return wrapped.getTermFreqVector(docNumber, field);
}
开发者ID:CDLUC3,项目名称:dash-xtf,代码行数:6,代码来源:LimIndexReader.java
示例14: getTermFreqVectors
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public TermFreqVector[] getTermFreqVectors(int docNumber)
throws IOException
{
return wrapped.getTermFreqVectors(docNumber);
}
开发者ID:CDLUC3,项目名称:dash-xtf,代码行数:6,代码来源:LimIndexReader.java
示例15: getTermFreqVector
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
@Override
public TermFreqVector getTermFreqVector(final int docId, final String field) throws IOException {
return indexReader.getTermFreqVector(docId, field);
}
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:5,代码来源:ReaderLocal.java
示例16: run
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public double run(String doc1,String doc2) throws IOException
{
// index strings
s[0]=doc1;
s[1]=doc2;
//System.out.print(s[0]+"\n"+s[1]+"\n");
Directory index = new RAMDirectory();
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
IndexWriter writer = new IndexWriter(index, config);
for (String si : s) {
Document doc = new Document();
doc.add(new Field("content", si, Field.Store.YES, Field.Index.ANALYZED,TermVector.WITH_POSITIONS_OFFSETS));
writer.addDocument(doc);
}
writer.close();
// read the index
IndexReader reader = IndexReader.open(index);
// calculate tf/idf
Map<String,Integer> terms = new HashMap<String,Integer>();
TermEnum termEnum = reader.terms(new Term("content"));
int pos = 0;
while (termEnum.next()) {
Term term = termEnum.term();
if (! "content".equals(term.field())) break;
terms.put(term.text(), pos++);
}
// for (int i=0; i<reader.maxDoc(); i++) {
// if (reader.isDeleted(i))
// continue;
//
// Document doc = reader.document(i);
// System.out.println(doc);
// TermFreqVector tfvs = reader.getTermFreqVector(i,"content");
// System.out.println(tfvs);
// }
//
// apply cosine similarity
DocVector[] docs = new DocVector[s.length];
for (int i=0; i<s.length; i++) {
TermFreqVector[] tfvs = reader.getTermFreqVectors(i);
//String strip_str=tfvs.toString();
//strip_str.replaceAll("null", "");
docs[i] = new DocVector(terms);
//System.out.print(tfvs);
//}
for (TermFreqVector tfv : tfvs) {
String[] termTexts = tfv.getTerms();
int[] termFreqs = tfv.getTermFrequencies();
for (int j = 0; j < termTexts.length; j++) {
docs[i].setEntry(termTexts[j], termFreqs[j]);
}
}
docs[i].normalize();
}
// now get similarity between doc[0] and doc[1]
double cosim01 = getCosineSimilarity(docs[0], docs[1]);
//System.out.println("cosim(0,1)=" + cosim01);
// between doc[0] and doc[2]
// double cosim02 = getCosineSimilarity(docs[0], docs[3]);
//System.out.println("cosim(0,2)=" + cosim02);
// between doc[1] and doc[3]
//double cosim03 = getCosineSimilarity(docs[1], docs[2]);
//System.out.println("cosim(1,2)=" + cosim03);
// }
//double cosim01=10.0;
reader.close();
return cosim01;
}
开发者ID:amark-india,项目名称:eventspotter,代码行数:78,代码来源:CosineSimilarity.java
示例17: getTermFreqVector
import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
TermFreqVector getTermFreqVector(final int docId, final String field) throws IOException, SearchLibException;
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:2,代码来源:ReaderInterface.java
注:本文中的org.apache.lucene.index.TermFreqVector类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论