• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Java TermFreqVector类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Java中org.apache.lucene.index.TermFreqVector的典型用法代码示例。如果您正苦于以下问题:Java TermFreqVector类的具体用法?Java TermFreqVector怎么用?Java TermFreqVector使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



TermFreqVector类属于org.apache.lucene.index包,在下文中一共展示了TermFreqVector类的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: computeMultivaluedTFV

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
private static Map<String, FacetCounter> computeMultivaluedTFV(ReaderAbstract reader, String fieldName,
		DocIdInterface docIdInterface) throws IOException, SearchLibException {
	final Map<String, FacetCounter> termMap = new LinkedHashMap<>();
	if (docIdInterface.getSize() == 0)
		return termMap;
	for (int docId : docIdInterface.getIds()) {
		final TermFreqVector tfv = reader.getTermFreqVector(docId, fieldName);
		if (tfv == null)
			continue;
		final String[] terms = tfv.getTerms();
		final int[] freqs = tfv.getTermFrequencies();
		if (terms == null || freqs == null)
			continue;
		int i = 0;
		for (String term : terms) {
			if (freqs[i++] > 0) {
				final FacetCounter facetItem = termMap.get(term);
				if (facetItem == null)
					termMap.put(term, new FacetCounter(1));
				else
					facetItem.increment();
			}
		}
	}
	return termMap;
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:27,代码来源:Facet.java


示例2: getTermsVectorFields

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public Set<FieldValue> getTermsVectorFields(int docId, Set<String> fieldNameSet) throws IOException {
	Set<FieldValue> fieldValueList = new HashSet<FieldValue>();
	for (String fieldName : fieldNameSet) {
		TermFreqVector termFreqVector = indexReader.getTermFreqVector(docId, fieldName);
		if (termFreqVector == null)
			continue;
		String[] terms = termFreqVector.getTerms();
		if (terms == null)
			continue;
		FieldValueItem[] fieldValueItem = new FieldValueItem[terms.length];
		int i = 0;
		for (String term : terms)
			fieldValueItem[i++] = new FieldValueItem(FieldValueOriginEnum.TERM_VECTOR, term);
		fieldValueList.add(new FieldValue(fieldName, fieldValueItem));
	}
	return fieldValueList;
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:18,代码来源:ReaderLocal.java


示例3: populate

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
@Override
public void populate(List<IndexDocumentResult> indexDocuments) throws IOException, SearchLibException {
	SchemaFieldList schemaFieldList = request.getConfig().getSchema().getFieldList();
	for (int docId : docArray) {
		IndexDocumentResult indexDocument = new IndexDocumentResult(schemaFieldList.size());
		Map<String, FieldValue> storedFieldMap = reader.getDocumentStoredField(docId);
		for (SchemaField schemaField : schemaFieldList) {
			String fieldName = schemaField.getName();
			List<IndexTerm> indexTermList = null;
			if (schemaField.checkIndexed(Indexed.YES)) {
				if (schemaField.getTermVector() == TermVector.NO) {
					indexTermList = IndexTerm.toList(reader, fieldName, docId);
				} else {
					TermFreqVector termFreqVector = reader.getTermFreqVector(docId, fieldName);
					indexTermList = IndexTerm.toList(termFreqVector);
				}
			}
			IndexField indexField = new IndexField(fieldName, storedFieldMap.get(fieldName), indexTermList);
			indexDocument.add(indexField);
		}
		indexDocuments.add(indexDocument);
	}
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:24,代码来源:ResultDocuments.java


示例4: buildCategoryVectors

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
private void buildCategoryVectors() throws IOException {
  IndexReader reader = DirectoryReader.open(TestUtil.getBookIndexDirectory());

  int maxDoc = reader.maxDoc();

  for (int i = 0; i < maxDoc; i++) {
    if (!reader.isDeleted(i)) {
      Document doc = reader.document(i);
      String category = doc.get("category");

      Map vectorMap = (Map) categoryMap.get(category);
      if (vectorMap == null) {
        vectorMap = new TreeMap();
        categoryMap.put(category, vectorMap);
      }

      TermFreqVector termFreqVector =
          reader.getTermFreqVector(i, "subject");

      addTermFreqToMap(vectorMap, termFreqVector);
    }
  }
}
 
开发者ID:xuzhikethinker,项目名称:t4f-data,代码行数:24,代码来源:CategorizerTest.java


示例5: addTermFreqToMap

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
private void addTermFreqToMap(Map vectorMap,
                              TermFreqVector termFreqVector) {
  String[] terms = termFreqVector.getTerms();
  int[] freqs = termFreqVector.getTermFrequencies();

  for (int i = 0; i < terms.length; i++) {
    String term = terms[i];

    if (vectorMap.containsKey(term)) {
      Integer value = (Integer) vectorMap.get(term);
      vectorMap.put(term,
          new Integer(value.intValue() + freqs[i]));
    } else {
      vectorMap.put(term, new Integer(freqs[i]));
    }
  }
}
 
开发者ID:xuzhikethinker,项目名称:t4f-data,代码行数:18,代码来源:CategorizerTest.java


示例6: putTermFreqVectors

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public void putTermFreqVectors(final int[] docIds, final String field,
		final Collection<TermFreqVector> termFreqVectors) throws IOException {
	if (termFreqVectors == null || docIds == null || docIds.length == 0)
		return;
	for (int docId : docIds)
		termFreqVectors.add(indexReader.getTermFreqVector(docId, field));
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:8,代码来源:ReaderLocal.java


示例7: putTermVectors

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
@Override
public void putTermVectors(int[] docIds, String field, Collection<String[]> termVectors) throws IOException {
	if (docIds == null || docIds.length == 0 || field == null || termVectors == null)
		return;
	List<TermFreqVector> termFreqVectors = new ArrayList<TermFreqVector>(docIds.length);
	putTermFreqVectors(docIds, field, termFreqVectors);
	for (TermFreqVector termFreqVector : termFreqVectors)
		termVectors.add(termFreqVector.getTerms());
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:10,代码来源:ReaderLocal.java


示例8: getTermFreqVector

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
@Override
final public TermFreqVector getTermFreqVector(final int docId, final String field)
		throws IOException, SearchLibException {
	checkOnline(true);
	ReaderLocal reader = acquire();
	try {
		return reader.getTermFreqVector(docId, field);
	} finally {
		release(reader);
	}
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:12,代码来源:IndexSingle.java


示例9: getTermPositionVector

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
private static final TermPositionVector getTermPositionVector(
		final String[] terms, final ReaderInterface readerInterface,
		final int docId, final String field, List<FieldValueItem> values,
		CompiledAnalyzer analyzer, Timer timer) throws IOException,
		SearchLibException, ParseException, SyntaxError {
	TermFreqVector termFreqVector = readerInterface.getTermFreqVector(
			docId, field);
	if (termFreqVector != null)
		if (termFreqVector instanceof TermPositionVector)
			return (TermPositionVector) termFreqVector;
	if (analyzer == null)
		return null;
	SnippetTermPositionVector stpv = new SnippetTermPositionVector(field,
			terms);
	int positionOffset = 0;
	int characterOffset = 0;
	List<TokenTerm> tokenTerms = new ArrayList<TokenTerm>();
	for (FieldValueItem fieldValueItem : values) {
		if (fieldValueItem.value == null)
			continue;
		analyzer.populate(fieldValueItem.value, tokenTerms);
		positionOffset = stpv.addCollection(tokenTerms, characterOffset,
				positionOffset);
		characterOffset += fieldValueItem.value.length() + 1;
		tokenTerms.clear();
	}
	stpv.compile();
	return stpv;
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:30,代码来源:SnippetVectors.java


示例10: toList

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public final static List<IndexTerm> toList(TermFreqVector termVector) {
	if (termVector == null)
		return null;
	String[] terms = termVector.getTerms();
	if (terms == null)
		return null;
	int[] frequencies = termVector.getTermFrequencies();
	List<IndexTerm> indexTerms = new ArrayList<IndexTerm>(terms.length);
	if (termVector instanceof TermPositionVector)
		toListPosition((TermPositionVector) termVector, terms, frequencies, indexTerms);
	else
		toListFreq(termVector, terms, frequencies, indexTerms);
	return indexTerms;
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:15,代码来源:IndexDocumentResult.java


示例11: toListFreq

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
private final static void toListFreq(TermFreqVector termVector, String[] terms, int[] frequencies,
		List<IndexTerm> indexTerms) {
	int i = 0;
	for (String term : terms) {
		IndexTerm indexTerm = new IndexTerm(term, frequencies[i], null, null);
		indexTerms.add(indexTerm);
		i++;
	}
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:10,代码来源:IndexDocumentResult.java


示例12: getCosineSimilarityMatrix

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public static DocVector[] getCosineSimilarityMatrix(List<String> fileSentences) throws IOException{

		RAMDirectory ramDir = new RAMDirectory();
		FileReader fr=new FileReader(new File("lib/stoplists/en.txt"));

		//	Set<String> stopWords = new HashSet<String>(FileUtils.readLines(new File("stop-words.txt")));  
		Analyzer analyzer = new StopAnalyzer(Version.LUCENE_36, fr );
		//Index the full text of both documents
		//IndexWriter writer = new IndexWriter(ramDir, new StandardAnalyzer(Version.LUCENE_36), true, IndexWriter.MaxFieldLength.UNLIMITED);
		IndexWriter writer =new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_36, analyzer));
		for (String s:fileSentences)
		{
			Document doc1 = new Document();
			StringReader d1reader=new StringReader(s);
			doc1.add(new Field("contents", d1reader, TermVector.YES));
			writer.addDocument(doc1);
		}


		//  writer.commit();
		writer.close();

		DocVector[] docs = new DocVector[fileSentences.size()];
		//Build a term vector for each document
		IndexReader RAMreader = IndexReader.open(ramDir);
		Map<String,Integer> terms = new HashMap<String,Integer>();
		TermEnum termEnum = RAMreader.terms(new Term("contents"));

		//System.out.println(RAMreader.numDocs());
		int pos = 0;
	    while (termEnum.next()) {
	      Term term = termEnum.term();
	      if (!"contents".equals(term.field())) 
	        break;
	      terms.put(term.text(), pos++);
	    }
	    
	    //System.out.println("Num terms:"+terms.size());
   
		for(int i=0;i<fileSentences.size();i++)
		{
			TermFreqVector[] tfvs = RAMreader.getTermFreqVectors(i);
			docs[i]=new DocVector(terms);
			if (tfvs==null)
					continue;
			for (TermFreqVector tfv : tfvs) 
			{
				String[] termTexts = tfv.getTerms();
				int[] termFreqs = tfv.getTermFrequencies();
				for (int j = 0; j < termTexts.length; j++) {
					double idfValue=getIDF(RAMreader,termTexts[j]);
					double tfIdfValue=termFreqs[j]*idfValue;
					docs[i].setEntry(termTexts[j], tfIdfValue);
				}
			       
			}
			docs[i].normalize();
			
			
		}
		

		RAMreader.close();
		ramDir.close();
		//ramDir.close();
		//System.out.println(RAMreader.numDocs());
		//System.out.println("Similarity:" + calcCosineSimilarity(docs[5], docs[19]));
		return docs;

	}
 
开发者ID:siddBanPsu,项目名称:WikiKreator,代码行数:71,代码来源:CosineDocumentSimilarity.java


示例13: getTermFreqVector

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public TermFreqVector getTermFreqVector(int docNumber, String field)
  throws IOException 
{
  return wrapped.getTermFreqVector(docNumber, field);
}
 
开发者ID:CDLUC3,项目名称:dash-xtf,代码行数:6,代码来源:LimIndexReader.java


示例14: getTermFreqVectors

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public TermFreqVector[] getTermFreqVectors(int docNumber)
  throws IOException 
{
  return wrapped.getTermFreqVectors(docNumber);
}
 
开发者ID:CDLUC3,项目名称:dash-xtf,代码行数:6,代码来源:LimIndexReader.java


示例15: getTermFreqVector

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
@Override
public TermFreqVector getTermFreqVector(final int docId, final String field) throws IOException {
	return indexReader.getTermFreqVector(docId, field);
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:5,代码来源:ReaderLocal.java


示例16: run

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
public double run(String doc1,String doc2) throws IOException 
    {
        // index strings
    	s[0]=doc1;
    	s[1]=doc2;
    	//System.out.print(s[0]+"\n"+s[1]+"\n");
        Directory index = new RAMDirectory();
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        IndexWriter writer = new IndexWriter(index, config);        
        for (String si : s) {
            Document doc = new Document();
            doc.add(new Field("content", si, Field.Store.YES, Field.Index.ANALYZED,TermVector.WITH_POSITIONS_OFFSETS));
            writer.addDocument(doc);
        }
        writer.close();
        
        // read the index
        IndexReader reader = IndexReader.open(index);
        
        // calculate tf/idf
        Map<String,Integer> terms = new HashMap<String,Integer>();
        TermEnum termEnum = reader.terms(new Term("content"));
        int pos = 0;
        while (termEnum.next()) {
            Term term = termEnum.term();
            if (! "content".equals(term.field())) break;
                terms.put(term.text(), pos++);
        }
        
//        for (int i=0; i<reader.maxDoc(); i++) {
//            if (reader.isDeleted(i))
//                continue;
//
//            Document doc = reader.document(i);
//            System.out.println(doc);
//            TermFreqVector tfvs = reader.getTermFreqVector(i,"content");
//            System.out.println(tfvs);
//        }
//        
        // apply cosine similarity
        DocVector[] docs = new DocVector[s.length];
        for (int i=0; i<s.length; i++) {
            TermFreqVector[] tfvs = reader.getTermFreqVectors(i);
            //String strip_str=tfvs.toString();
            //strip_str.replaceAll("null", "");
            
            docs[i] = new DocVector(terms); 
            //System.out.print(tfvs);
        //}
            
           for (TermFreqVector tfv : tfvs) {
                String[] termTexts = tfv.getTerms();
                int[] termFreqs = tfv.getTermFrequencies();
                for (int j = 0; j < termTexts.length; j++) {
                docs[i].setEntry(termTexts[j], termFreqs[j]);
              }
            }
            docs[i].normalize();
           
          }
        
        // now get similarity between doc[0] and doc[1]
        double cosim01 = getCosineSimilarity(docs[0], docs[1]);
        //System.out.println("cosim(0,1)=" + cosim01);
        // between doc[0] and doc[2]
       // double cosim02 = getCosineSimilarity(docs[0], docs[3]);
        //System.out.println("cosim(0,2)=" + cosim02);
        // between doc[1] and doc[3]
        //double cosim03 = getCosineSimilarity(docs[1], docs[2]);
        //System.out.println("cosim(1,2)=" + cosim03);
       
       // }
        //double cosim01=10.0;
        reader.close();
        return cosim01;
    }
 
开发者ID:amark-india,项目名称:eventspotter,代码行数:78,代码来源:CosineSimilarity.java


示例17: getTermFreqVector

import org.apache.lucene.index.TermFreqVector; //导入依赖的package包/类
TermFreqVector getTermFreqVector(final int docId, final String field) throws IOException, SearchLibException; 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:2,代码来源:ReaderInterface.java



注:本文中的org.apache.lucene.index.TermFreqVector类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Java RefreshListenerAdapter类代码示例发布时间:2022-05-23
下一篇:
Java StepPerformanceSnapShot类代码示例发布时间:2022-05-23
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap