本文整理汇总了Java中com.hankcs.hanlp.corpus.tag.Nature类的典型用法代码示例。如果您正苦于以下问题:Java Nature类的具体用法?Java Nature怎么用?Java Nature使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Nature类属于com.hankcs.hanlp.corpus.tag包,在下文中一共展示了Nature类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: roleTag
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
@Override
protected void roleTag(List<List<IWord>> sentenceList)
{
logger.info("开始标注");
int i = 0;
for (List<IWord> wordList : sentenceList)
{
logger.info(++i + " / " + sentenceList.size());
for (IWord word : wordList)
{
Precompiler.compile(word); // 编译为等效字符串
}
LinkedList<IWord> wordLinkedList = (LinkedList<IWord>) wordList;
wordLinkedList.addFirst(new Word(Predefine.TAG_BIGIN, Nature.begin.toString()));
wordLinkedList.addLast(new Word(Predefine.TAG_END, Nature.end.toString()));
}
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:18,代码来源:NatureDictionaryMaker.java
示例2: loadDictionary
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
* 将一个BufferedReader中的词条加载到词典
* @param br 源
* @param storage 储存位置
* @throws IOException 异常表示加载失败
*/
public static void loadDictionary(BufferedReader br, TreeMap<String, CoreDictionary.Attribute> storage) throws IOException
{
String line;
while ((line = br.readLine()) != null)
{
String param[] = line.split("\\s");
int natureCount = (param.length - 1) / 2;
CoreDictionary.Attribute attribute = new CoreDictionary.Attribute(natureCount);
for (int i = 0; i < natureCount; ++i)
{
attribute.nature[i] = Enum.valueOf(Nature.class, param[1 + 2 * i]);
attribute.frequency[i] = Integer.parseInt(param[2 + 2 * i]);
attribute.totalFrequency += attribute.frequency[i];
}
storage.put(param[0], attribute);
}
br.close();
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:25,代码来源:IOUtil.java
示例3: toVertexList
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
private static List<Vertex> toVertexList(List<Term> termList, boolean appendStart)
{
ArrayList<Vertex> vertexList = new ArrayList<Vertex>(termList.size() + 1);
if (appendStart) vertexList.add(Vertex.B);
for (Term term : termList)
{
CoreDictionary.Attribute attribute = CoreDictionary.get(term.word);
if (attribute == null)
{
if (term.word.trim().length() == 0) attribute = new CoreDictionary.Attribute(Nature.x);
else attribute = new CoreDictionary.Attribute(Nature.nz);
}
else term.nature = attribute.nature[0];
Vertex vertex = new Vertex(term.word, attribute);
vertexList.add(vertex);
}
return vertexList;
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:20,代码来源:CRFSegment.java
示例4: confirmNature
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
* 将属性的词性锁定为nature,此重载会降低性能
*
* @param nature
* 词性
* @param updateWord
* 是否更新预编译字串
* @return 如果锁定词性在词性列表中,返回真,否则返回假
*/
public boolean confirmNature(Nature nature, boolean updateWord) {
switch (nature) {
case m:
word = Predefine.TAG_NUMBER;
break;
case t:
word = Predefine.TAG_TIME;
break;
default:
logger.warning("没有与" + nature + "对应的case");
break;
}
return confirmNature(nature);
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:26,代码来源:Vertex.java
示例5: segment
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
* 分词
* @param text 文本
* @return 分词结果
*/
public static List<Term> segment(String text)
{
List<Term> termList = new LinkedList<Term>();
Matcher matcher = WEB_URL.matcher(text);
int begin = 0;
int end;
while (matcher.find())
{
end = matcher.start();
termList.addAll(SEGMENT.seg(text.substring(begin, end)));
termList.add(new Term(matcher.group(), Nature.xu));
begin = matcher.end();
}
if (begin < text.length()) termList.addAll(SEGMENT.seg(text.substring(begin)));
return termList;
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:23,代码来源:URLTokenizer.java
示例6: main
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
public static void main(String[] args)
{
String text =
"HanLP的项目地址是https://github.com/hankcs/HanLP," +
"发布地址是https://github.com/hankcs/HanLP/releases," +
"我有时候会在www.hankcs.com上面发布一些消息," +
"我的微博是http://weibo.com/hankcs/,会同步推送hankcs.com的新闻。" +
"听说.中国域名开放申请了,但我并没有申请hankcs.中国,因为穷……";
List<Term> termList = URLTokenizer.segment(text);
System.out.println(termList);
for (Term term : termList)
{
if (term.nature == Nature.xu)
System.out.println(term.word);
}
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:17,代码来源:DemoURLRecognition.java
示例7: testRemoveNotNS
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
* data/dictionary/custom/全国地名大全.txt中有很多人名,删掉它们
* @throws Exception
*/
public void testRemoveNotNS() throws Exception
{
String path = "data/dictionary/custom/全国地名大全.txt";
final Set<Character> suffixSet = new TreeSet<Character>();
for (char c : Predefine.POSTFIX_SINGLE.toCharArray())
{
suffixSet.add(c);
}
DictionaryMaker.load(path).saveTxtTo(path, new DictionaryMaker.Filter()
{
Segment segment = HanLP.newSegment().enableCustomDictionary(false);
@Override
public boolean onSave(Item item)
{
if (suffixSet.contains(item.key.charAt(item.key.length() - 1))) return true;
List<Term> termList = segment.seg(item.key);
if (termList.size() == 1 && termList.get(0).nature == Nature.nr)
{
System.out.println(item);
return false;
}
return true;
}
});
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:30,代码来源:TestCustomDictionary.java
示例8: testBatch
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
public void testBatch() throws Exception
{
List<File> fileList = FolderWalker.open(FOLDER);
int i = 0;
for (File file : fileList)
{
System.out.println(++i + " / " + fileList.size() + " " + file.getName() + " ");
String path = file.getAbsolutePath();
String content = IOUtil.readTxt(path);
DijkstraSegment segment = new DijkstraSegment();
List<List<Term>> sentenceList = segment.seg2sentence(content);
for (List<Term> sentence : sentenceList)
{
if (SentencesUtil.hasNature(sentence, Nature.nr))
{
System.out.println(sentence);
}
}
}
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:21,代码来源:TestPersonRecognition.java
示例9: create
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
public static Attribute create(String natureWithFrequency)
{
try
{
String param[] = natureWithFrequency.split(" ");
int natureCount = param.length / 2;
Attribute attribute = new Attribute(natureCount);
for (int i = 0; i < natureCount; ++i)
{
attribute.nature[i] = Enum.valueOf(Nature.class, param[2 * i]);
attribute.frequency[i] = Integer.parseInt(param[1 + 2 * i]);
attribute.totalFrequency += attribute.frequency[i];
}
return attribute;
}
catch (Exception e)
{
logger.warning("使用字符串" + natureWithFrequency + "创建词条属性失败!" + TextUtility.exceptionToString(e));
return null;
}
}
开发者ID:ml-distribution,项目名称:HanLP,代码行数:22,代码来源:CoreDictionary.java
示例10: roleTag
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
public static List<EnumItem<NR>> roleTag(List<Vertex> pWordSegResult)
{
List<EnumItem<NR>> tagList = new LinkedList<EnumItem<NR>>();
for (Vertex vertex : pWordSegResult)
{
// 有些双名实际上可以构成更长的三名
if (Nature.nr == vertex.getNature() && vertex.getAttribute().totalFrequency <= 1000)
{
if (vertex.realWord.length() == 2)
{
tagList.add(new EnumItem<NR>(NR.X, NR.G));
continue;
}
}
EnumItem<NR> nrEnumItem = PersonDictionary.dictionary.get(vertex.realWord);
if (nrEnumItem == null)
{
nrEnumItem = new EnumItem<NR>(NR.A, PersonDictionary.transformMatrixDictionary.getTotalFrequency(NR.A));
}
tagList.add(nrEnumItem);
}
return tagList;
}
开发者ID:ml-distribution,项目名称:HanLP,代码行数:24,代码来源:PersonRecognition.java
示例11: confirmNature
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
* 将属性的词性锁定为nature,此重载会降低性能
*
* @param nature 词性
* @param updateWord 是否更新预编译字串
* @return 如果锁定词性在词性列表中,返回真,否则返回假
*/
public boolean confirmNature(Nature nature, boolean updateWord)
{
switch (nature)
{
case m:
word = Predefine.TAG_NUMBER;
break;
case t:
word = Predefine.TAG_TIME;
break;
default:
logger.warning("没有与" + nature + "对应的case");
break;
}
return confirmNature(nature);
}
开发者ID:ml-distribution,项目名称:HanLP,代码行数:26,代码来源:Vertex.java
示例12: incrementToken
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
@Override
final public boolean incrementToken() throws IOException {
clearAttributes();
int position = 0;
Term term;
boolean un_increased = true;
do {
term = segment.next();
if (term == null) {
break;
}
if (enablePorterStemming && term.nature == Nature.nx) {
term.word = stemmer.stem(term.word);
}
if (filter != null && filter.containsKey(term.word)) {
continue;
} else {
++position;
un_increased = false;
}
}
while (un_increased);
if (term != null) {
positionAttr.setPositionIncrement(position);
termAtt.setEmpty().append(term.word);
offsetAtt.setOffset(term.offset, term.offset + term.word.length());
typeAtt.setType(term.nature == null ? "null" : term.nature.toString());
return true;
} else {
return false;
}
}
开发者ID:hualongdata,项目名称:hanlp-ext,代码行数:35,代码来源:HanLPTokenizer.java
示例13: getWordsInPath
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
* 获得词语依存路径中的词语
* @param word 词语
* @param maxReturn 最大路径长度
* @return 依存路径词语列表
*/
public static List<Term> getWordsInPath(CoNLLWord word, int maxReturn) {
List<Term> words = new ArrayList<Term>();
if (word == CoNLLWord.ROOT || maxReturn < 1) return words;
while (word != CoNLLWord.ROOT) {
words.add(new Term(word.LEMMA, Nature.fromString(word.POSTAG)));
word = word.HEAD;
if (--maxReturn < 1) break;
}
return words;
}
开发者ID:jsksxs360,项目名称:AHANLP,代码行数:17,代码来源:DependencyParser.java
示例14: loadDat
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
* 从磁盘加载双数组
*
* @param path
* @return
*/
static boolean loadDat(String path)
{
try
{
ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
if (byteArray == null) return false;
int size = byteArray.nextInt();
CoreDictionary.Attribute[] attributes = new CoreDictionary.Attribute[size];
final Nature[] natureIndexArray = Nature.values();
for (int i = 0; i < size; ++i)
{
// 第一个是全部频次,第二个是词性个数
int currentTotalFrequency = byteArray.nextInt();
int length = byteArray.nextInt();
attributes[i] = new CoreDictionary.Attribute(length);
attributes[i].totalFrequency = currentTotalFrequency;
for (int j = 0; j < length; ++j)
{
attributes[i].nature[j] = natureIndexArray[byteArray.nextInt()];
attributes[i].frequency[j] = byteArray.nextInt();
}
}
if (!trie.load(byteArray, attributes) || byteArray.hasMore()) return false;
}
catch (Exception e)
{
logger.warning("读取失败,问题发生在" + e);
return false;
}
return true;
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:38,代码来源:CoreDictionary.java
示例15: Attribute
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
public Attribute(Nature nature, int frequency)
{
this(1);
this.nature[0] = nature;
this.frequency[0] = frequency;
totalFrequency = frequency;
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:8,代码来源:CoreDictionary.java
示例16: create
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
* 从字节流中加载
* @param byteArray
* @param natureIndexArray
* @return
*/
public static Attribute create(ByteArray byteArray, Nature[] natureIndexArray)
{
int currentTotalFrequency = byteArray.nextInt();
int length = byteArray.nextInt();
Attribute attribute = new Attribute(length);
attribute.totalFrequency = currentTotalFrequency;
for (int j = 0; j < length; ++j)
{
attribute.nature[j] = natureIndexArray[byteArray.nextInt()];
attribute.frequency[j] = byteArray.nextInt();
}
return attribute;
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:21,代码来源:CoreDictionary.java
示例17: getNatureFrequency
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
* 获取词性的词频
*
* @param nature 字符串词性
* @return 词频
* @deprecated 推荐使用Nature参数!
*/
public int getNatureFrequency(String nature)
{
try
{
Nature pos = Enum.valueOf(Nature.class, nature);
return getNatureFrequency(pos);
}
catch (IllegalArgumentException e)
{
return 0;
}
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:20,代码来源:CoreDictionary.java
示例18: hasNatureStartsWith
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
* 是否有以某个前缀开头的词性
* @param prefix 词性前缀,比如u会查询是否有ude, uzhe等等
* @return
*/
public boolean hasNatureStartsWith(String prefix)
{
for (Nature n : nature)
{
if (n.startsWith(prefix)) return true;
}
return false;
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:14,代码来源:CoreDictionary.java
示例19: insert
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
* 往自定义词典中插入一个新词(覆盖模式)<br>
* 动态增删不会持久化到词典文件
*
* @param word 新词 如“裸婚”
* @param natureWithFrequency 词性和其对应的频次,比如“nz 1 v 2”,null时表示“nz 1”。
* @return 是否插入成功(失败的原因可能是natureWithFrequency问题,可以通过调试模式了解原因)
*/
public static boolean insert(String word, String natureWithFrequency)
{
if (word == null) return false;
if (HanLP.Config.Normalization) word = CharTable.convert(word);
CoreDictionary.Attribute att = natureWithFrequency == null ? new CoreDictionary.Attribute(Nature.nz, 1) : CoreDictionary.Attribute.create(natureWithFrequency);
if (att == null) return false;
if (dat.set(word, att)) return true;
if (trie == null) trie = new BinTrie<CoreDictionary.Attribute>();
trie.put(word, att);
return true;
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:20,代码来源:CustomDictionary.java
示例20: loadDat
import com.hankcs.hanlp.corpus.tag.Nature; //导入依赖的package包/类
/**
* 从磁盘加载双数组
*
* @param path
* @return
*/
static boolean loadDat(String path)
{
try
{
ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
if (byteArray == null) return false;
int size = byteArray.nextInt();
if (size < 0) // 一种兼容措施,当size小于零表示文件头部储存了-size个用户词性
{
while (++size <= 0)
{
Nature.create(byteArray.nextString());
}
size = byteArray.nextInt();
}
CoreDictionary.Attribute[] attributes = new CoreDictionary.Attribute[size];
final Nature[] natureIndexArray = Nature.values();
for (int i = 0; i < size; ++i)
{
// 第一个是全部频次,第二个是词性个数
int currentTotalFrequency = byteArray.nextInt();
int length = byteArray.nextInt();
attributes[i] = new CoreDictionary.Attribute(length);
attributes[i].totalFrequency = currentTotalFrequency;
for (int j = 0; j < length; ++j)
{
attributes[i].nature[j] = natureIndexArray[byteArray.nextInt()];
attributes[i].frequency[j] = byteArray.nextInt();
}
}
if (!dat.load(byteArray, attributes)) return false;
}
catch (Exception e)
{
logger.warning("读取失败,问题发生在" + TextUtility.exceptionToString(e));
return false;
}
return true;
}
开发者ID:priester,项目名称:hanlpStudy,代码行数:46,代码来源:CustomDictionary.java
注:本文中的com.hankcs.hanlp.corpus.tag.Nature类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论