本文整理汇总了Java中com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary类的典型用法代码示例。如果您正苦于以下问题:Java CoreStopWordDictionary类的具体用法?Java CoreStopWordDictionary怎么用?Java CoreStopWordDictionary使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CoreStopWordDictionary类属于com.hankcs.hanlp.dictionary.stopword包,在下文中一共展示了CoreStopWordDictionary类的16个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: convertSentenceListToDocument
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
/**
* 将句子列表转化为文档
*
* @param sentenceList
* @return
*/
private static List<List<String>> convertSentenceListToDocument(List<String> sentenceList)
{
List<List<String>> docs = new ArrayList<List<String>>(sentenceList.size());
for (String sentence : sentenceList)
{
List<Term> termList = StandardTokenizer.segment(sentence.toCharArray());
List<String> wordList = new LinkedList<String>();
for (Term term : termList)
{
if (CoreStopWordDictionary.shouldInclude(term))
{
wordList.add(term.word);
}
}
docs.add(wordList);
}
return docs;
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:25,代码来源:TextRankSentence.java
示例2: seg2sentence
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
/**
* 切分为句子形式
*
* @param text
* @return
*/
public static List<List<Term>> seg2sentence(String text)
{
List<List<Term>> sentenceList = SEGMENT.seg2sentence(text);
for (List<Term> sentence : sentenceList)
{
ListIterator<Term> listIterator = sentence.listIterator();
while (listIterator.hasNext())
{
if (!CoreStopWordDictionary.shouldInclude(listIterator.next()))
{
listIterator.remove();
}
}
}
return sentenceList;
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:24,代码来源:NotionalTokenizer.java
示例3: testSegmentCorpus
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
public void testSegmentCorpus() throws Exception
{
File root = new File("D:\\Doc\\语料库\\搜狗文本分类语料库精简版");
for (File folder : root.listFiles())
{
if (folder.isDirectory())
{
for (File file : folder.listFiles())
{
System.out.println(file.getAbsolutePath());
List<Term> termList = HanLP.segment(IOUtil.readTxt(file.getAbsolutePath()));
StringBuilder sbOut = new StringBuilder();
for (Term term : termList)
{
if (CoreStopWordDictionary.shouldInclude(term))
{
sbOut.append(term.word).append(" ");
}
}
IOUtil.saveTxt("D:\\Doc\\语料库\\segmented\\" + folder.getName() + "_" + file.getName(), sbOut.toString());
}
}
}
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:25,代码来源:TestLDA.java
示例4: StandardSegment
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
/**
* 标准分词
* @param content 文本
* @param filterStopWord 滤掉停用词
* @return
*/
public static List<Term> StandardSegment(String content, boolean filterStopWord) {
List<Term> result = StandardTokenizer.segment(content);
if (filterStopWord)
CoreStopWordDictionary.apply(result);
return result;
}
开发者ID:jsksxs360,项目名称:AHANLP,代码行数:13,代码来源:Segment.java
示例5: segment
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
/**
* 分词
*
* @param text 文本
* @return 分词结果
*/
public static List<Term> segment(char[] text)
{
List<Term> resultList = SEGMENT.seg(text);
ListIterator<Term> listIterator = resultList.listIterator();
while (listIterator.hasNext())
{
if (!CoreStopWordDictionary.shouldInclude(listIterator.next()))
{
listIterator.remove();
}
}
return resultList;
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:21,代码来源:NotionalTokenizer.java
示例6: main
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
public static void main(String[] args)
{
String text = "小区居民有的反对喂养流浪猫,而有的居民却赞成喂养这些小宝贝";
// 可以动态修改停用词词典
CoreStopWordDictionary.add("居民");
System.out.println(NotionalTokenizer.segment(text));
CoreStopWordDictionary.remove("居民");
System.out.println(NotionalTokenizer.segment(text));
// 可以对任意分词器的结果执行过滤
List<Term> termList = BasicTokenizer.segment(text);
System.out.println(termList);
CoreStopWordDictionary.apply(termList);
System.out.println(termList);
// 还可以自定义过滤逻辑
CoreStopWordDictionary.FILTER = new Filter()
{
@Override
public boolean shouldInclude(Term term)
{
switch (term.nature)
{
case nz:
return !CoreStopWordDictionary.contains(term.word);
}
return false;
}
};
System.out.println(NotionalTokenizer.segment(text));
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:30,代码来源:DemoStopWord.java
示例7: getTopSentenceList
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
/**
* 一句话调用接口
* @param document 目标文档
* @param size 需要的关键句的个数
* @return 关键句列表
*/
public static List<String> getTopSentenceList(String document, int size)
{
List<String> sentenceList = spiltSentence(document);
List<List<String>> docs = new ArrayList<List<String>>();
for (String sentence : sentenceList)
{
List<Term> termList = StandardTokenizer.segment(sentence.toCharArray());
List<String> wordList = new LinkedList<String>();
for (Term term : termList)
{
if (CoreStopWordDictionary.shouldInclude(term))
{
wordList.add(term.word);
}
}
docs.add(wordList);
// System.out.println(wordList);
}
TextRankSentence textRank = new TextRankSentence(docs);
int[] topSentence = textRank.getTopSentence(size);
List<String> resultList = new LinkedList<String>();
for (int i : topSentence)
{
resultList.add(sentenceList.get(i));
}
return resultList;
}
开发者ID:ml-distribution,项目名称:HanLP,代码行数:34,代码来源:TextRankSentence.java
示例8: main
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
public static void main(String[] args)
{
String text = "小区居民有的反对喂养流浪猫,而有的居民却赞成喂养这些小宝贝";
// 可以动态修改停用词词典
CoreStopWordDictionary.add("居民");
System.out.println(NotionalTokenizer.segment(text));
CoreStopWordDictionary.remove("居民");
System.out.println(NotionalTokenizer.segment(text));
// 可以对任意分词器的结果执行过滤
List<Term> termList = BasicTokenizer.segment(text);
System.out.println(termList);
CoreStopWordDictionary.apply(termList);
System.out.println(termList);
}
开发者ID:ml-distribution,项目名称:HanLP,代码行数:15,代码来源:DemoStopWord.java
示例9: testContains
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
public void testContains() throws Exception
{
HanLP.Config.enableDebug();
System.out.println(CoreStopWordDictionary.contains("这就是说"));
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:6,代码来源:TestStopWordDictionary.java
示例10: testContainsSomeWords
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
public void testContainsSomeWords() throws Exception
{
assertEquals(true, CoreStopWordDictionary.contains("可以"));
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:5,代码来源:TestStopWordDictionary.java
示例11: getSummary
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
/**
* 一句话调用接口
* @param document 目标文档
* @param max_length 需要摘要的长度
* @return 摘要文本
*/
public static String getSummary(String document, int max_length)
{
if(!validate_document(document, max_length)){
return "";
}
List<String> sentenceList = spiltSentence(document);
int sentence_count = sentenceList.size();
List<List<String>> docs = new ArrayList<List<String>>();
for (String sentence : sentenceList)
{
List<Term> termList = StandardTokenizer.segment(sentence.toCharArray());
List<String> wordList = new LinkedList<String>();
for (Term term : termList)
{
if (CoreStopWordDictionary.shouldInclude(term))
{
wordList.add(term.word);
}
}
docs.add(wordList);
// System.out.println(wordList);
}
TextRankSentence textRank = new TextRankSentence(docs);
int[] topSentence = textRank.getTopSentence(sentence_count);
List<String> resultList = new LinkedList<String>();
for (int i : topSentence)
{
resultList.add(sentenceList.get(i));
}
resultList = permutation(resultList, sentenceList);
resultList = pick_sentences(resultList, max_length);
String summary = "";
for(String temp : resultList)
{
summary += temp;
}
if (summary.length() < 15){
summary = "";
}
return summary;
}
开发者ID:furaoing,项目名称:HanLP-1.2.4-Taikor,代码行数:53,代码来源:TextRankSentence.java
示例12: NLPSegment
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
/**
* NLP分词
* 执行全部命名实体识别和词性标注
* @param content 文本
* @param filterStopWord 滤掉停用词
* @return
*/
public static List<Term> NLPSegment(String content, boolean filterStopWord) {
List<Term> result = NLPTokenizer.segment(content);
if (filterStopWord)
CoreStopWordDictionary.apply(result);
return result;
}
开发者ID:jsksxs360,项目名称:AHANLP,代码行数:14,代码来源:Segment.java
示例13: shouldInclude
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
/**
* judge whether a word belongs to stop words
* @param term(Term): word needed to be judged
* @return(boolean): if the word is a stop word,return false;otherwise return true
*/
public static boolean shouldInclude(Term term)
{
return CoreStopWordDictionary.shouldInclude(term);
}
开发者ID:WuLC,项目名称:KeywordExtraction,代码行数:10,代码来源:TFIDF.java
示例14: shouldInclude
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
/**
* judge whether a word belongs to stop words
* @param term(Term): word needed to be judged
* @return(boolean): if the word is a stop word,return false;otherwise return true
*/
public static boolean shouldInclude(Term term)
{
return CoreStopWordDictionary.shouldInclude(term);
}
开发者ID:WuLC,项目名称:KeywordExtraction,代码行数:10,代码来源:TextRank.java
示例15: shouldInclude
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
/**
* 是否应当将这个term纳入计算,词性属于名词、动词、副词、形容词
* @param term
* @return 是否应当
*/
public boolean shouldInclude(Term term)
{
return CoreStopWordDictionary.shouldInclude(term);
}
开发者ID:hankcs,项目名称:TextRank,代码行数:10,代码来源:TextRankKeyword.java
示例16: shouldInclude
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary; //导入依赖的package包/类
/**
* 是否应当将这个term纳入计算,词性属于名词、动词、副词、形容词
* @param term
* @return 是否应当
*/
public static boolean shouldInclude(Term term)
{
return CoreStopWordDictionary.shouldInclude(term);
}
开发者ID:hankcs,项目名称:TextRank,代码行数:10,代码来源:TextRankSummary.java
注:本文中的com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论