本文整理汇总了Java中edu.stanford.nlp.trees.Treebank类的典型用法代码示例。如果您正苦于以下问题:Java Treebank类的具体用法?Java Treebank怎么用?Java Treebank使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Treebank类属于edu.stanford.nlp.trees包,在下文中一共展示了Treebank类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getSegmentedWordLengthDistribution
import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
private Distribution<Integer> getSegmentedWordLengthDistribution(Treebank tb) {
// CharacterLevelTagExtender ext = new CharacterLevelTagExtender();
ClassicCounter<Integer> c = new ClassicCounter<Integer>();
for (Iterator iterator = tb.iterator(); iterator.hasNext();) {
Tree gold = (Tree) iterator.next();
StringBuilder goldChars = new StringBuilder();
Sentence goldYield = gold.yield();
for (Iterator wordIter = goldYield.iterator(); wordIter.hasNext();) {
Word word = (Word) wordIter.next();
goldChars.append(word);
}
Sentence ourWords = segmentWords(goldChars.toString());
for (int i = 0; i < ourWords.size(); i++) {
c.incrementCount(Integer.valueOf(ourWords.get(i).toString().length()));
}
}
return Distribution.getDistribution(c);
}
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:19,代码来源:ChineseMarkovWordSegmenter.java
示例2: getSegmentedWordLengthDistribution
import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
private Distribution<Integer> getSegmentedWordLengthDistribution(Treebank tb) {
// CharacterLevelTagExtender ext = new CharacterLevelTagExtender();
ClassicCounter<Integer> c = new ClassicCounter<Integer>();
for (Iterator iterator = tb.iterator(); iterator.hasNext();) {
Tree gold = (Tree) iterator.next();
StringBuilder goldChars = new StringBuilder();
ArrayList goldYield = gold.yield();
for (Iterator wordIter = goldYield.iterator(); wordIter.hasNext();) {
Word word = (Word) wordIter.next();
goldChars.append(word);
}
List<HasWord> ourWords = segment(goldChars.toString());
for (int i = 0; i < ourWords.size(); i++) {
c.incrementCount(Integer.valueOf(ourWords.get(i).word().length()));
}
}
return Distribution.getDistribution(c);
}
开发者ID:amark-india,项目名称:eventspotter,代码行数:19,代码来源:ChineseMarkovWordSegmenter.java
示例3: simplify
import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
public Treebank simplify(Tree tree) {
Treebank totalSimplified = new MemoryTreebank();
Queue<Tree> queue = new LinkedList<Tree>();
queue.offer(tree);
while (!queue.isEmpty()) {
Tree t = queue.poll();
boolean hasSimplification = false;
int types[] = new int[] { //
Simplifier.Parenthesis, //
Simplifier.Coordination };
for (int type : types) {
hasSimplification = simplify(t, queue, totalSimplified, type);
if (hasSimplification) {
break;
}
}
if (!hasSimplification) {
if (t != tree) {
totalSimplified.add(t);
}
}
}
return totalSimplified;
}
开发者ID:leebird,项目名称:legonlp,代码行数:31,代码来源:GenerateParCooSimplification.java
示例4: simplify
import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
public Treebank simplify(Tree tree) {
Treebank totalSimplified = new MemoryTreebank();
Queue<Tree> queue = new LinkedList<Tree>();
queue.offer(tree);
while (!queue.isEmpty()) {
Tree t = queue.poll();
boolean hasSimplification = false;
int types[] = new int[] { //
Simplifier.Parenthesis, //
Simplifier.Coordination, //
Simplifier.Relative, //
Simplifier.Apposition,//
Simplifier.Others //
};
for (int type : types) {
hasSimplification = simplify(t, queue, totalSimplified, type);
if (hasSimplification) {
break;
}
}
if (!hasSimplification && t != tree) {
totalSimplified.add(t);
}
}
return totalSimplified;
}
开发者ID:leebird,项目名称:legonlp,代码行数:33,代码来源:GenerateSimplification.java
示例5: getCandidates
import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
protected List<Entity> getCandidates(Entity entity, Treebank treebank, boolean intraSentence)
{
List<Entity> entityList = new ArrayList<Entity>();
for(Tree tree : treebank)
{
List<Tree> leaves = tree.getLeaves();
OffsetLabel first = (OffsetLabel) leaves.get(0).label();
OffsetLabel last = (OffsetLabel) leaves.get(leaves.size() - 1).label();
int start = first.beginPosition();
int end = last.endPosition();
TregexPattern np = TregexPattern.compile("NP|NNP|NNPS|NN|NNS");
TregexMatcher m = np.matcher(tree);
while(m.find())
{
Tree npTree = m.getMatch();
List<Token> tokens = Utils.getTokens(tree, npTree);
if(!npTree.isLeaf())
{
Entity candidate = new Entity("",npTree.nodeString(),tokens);
if (entity.from() > candidate.to()) {
if (intraSentence) {
if ((entity.from() > start) && (entity.to() < end))
entityList.add(candidate);
} else
entityList.add(candidate);
}
}
}
}
return entityList;
}
开发者ID:leebird,项目名称:legonlp,代码行数:36,代码来源:ResoluteAnaphora.java
示例6: main
import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
public static void main(String[] args) {
TreebankLangParserParams tlpp = new NegraPennTreebankParserParams();
Treebank tb = tlpp.memoryTreebank();
tb.loadPath(args[0]);
for (Tree aTb : tb) {
aTb.pennPrint();
}
}
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:9,代码来源:NegraPennTreebankParserParams.java
示例7: getTuningSet
import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
private static List<FactoredLexiconEvent> getTuningSet(Treebank devTreebank,
FactoredLexicon lexicon, TreebankLangParserParams tlpp) {
List<Tree> devTrees = new ArrayList<Tree>(3000);
for (Tree tree : devTreebank) {
for (Tree subTree : tree) {
if (!subTree.isLeaf()) {
tlpp.transformTree(subTree, tree);
}
}
devTrees.add(tree);
}
List<FactoredLexiconEvent> tuningSet = treebankToLexiconEvents(devTrees, lexicon);
return tuningSet;
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:15,代码来源:FactoredLexicon.java
示例8: main
import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
public static void main(String[] args) {
// simple testing code
Treebank treebank = new DiskTreebank();
CategoryWordTag.suppressTerminalDetails = true;
treebank.loadPath(args[0]);
final HeadFinder chf = new NoPunctuationHeadFinder();
treebank.apply(new TreeVisitor() {
public void visitTree(Tree pt) {
pt.percolateHeads(chf);
pt.pennPrint();
System.out.println();
}
});
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:15,代码来源:NoPunctuationHeadFinder.java
示例9: processFile
import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
@Override
public final void processFile(String dir, String filename) {
super.processFile(dir, filename);
if (filename.equals("PMC-3062687-12-Methods")) {
return;
}
readResource(dir, filename);
if (index != -1) {
MemoryTreebank newTreebank = new MemoryTreebank();
newTreebank.add(treebank.get(index));
treebank = newTreebank;
}
// general
Treebank simpTreebank = new MemoryTreebank();
for (Tree t : treebank) {
simpTreebank.addAll(simplify(t));
}
// output
try
{
PrintStream out = new PrintStream(new FileOutputStream(Env.DIR_SIMP
+ filename
+ ".ptb.simp"));
Set<String> noDuplicates = new HashSet<String>();
for (Tree tree : simpTreebank) {
String line = tree.toString();
if (!noDuplicates.contains(line)) {
out.println(line);
noDuplicates.add(line);
}
}
out.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
System.exit(1);
}
}
开发者ID:leebird,项目名称:legonlp,代码行数:43,代码来源:GenerateSimplification.java
示例10: recover
import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
public String recover(String text, List<Entity> entityList, HashMap<String,HashMap<String,Integer>> map)
{
PtbReader ptbReader = new PtbReader(Env.DIR_PARSE + filename + ".ptb");
Treebank treebank = ptbReader.readTreebank();
String res = "";
String pattern = Env.ENTITY_REPLACE;
for(Tree t : treebank)
{
List<Tree> leaves = t.getLeaves();
for(Tree l : leaves)
{
String word = Utils.adaptValue(l.label().toString());
Pattern r = Pattern.compile(pattern);
Matcher m = r.matcher(word);
while(m.find())
{
String needle = m.group();
try {
int entStart = map.get(needle).get("start");
int entend = map.get(needle).get("end");
for(Entity entity : entityList)
{
int start = entity.from();
int end = entity.to();
String entityText = entity.getText();
if(start == entStart && entend == end)
{
word = word.replace(needle, entityText);
m = r.matcher(word);
break;
}
}
} catch (Exception e){
System.out.println(needle);
System.exit(1);
}
}
l.setValue(word);
}
res += t.toString()+"\n";
}
return res;
}
开发者ID:leebird,项目名称:legonlp,代码行数:50,代码来源:RecoverEntity.java
示例11: main
import edu.stanford.nlp.trees.Treebank; //导入依赖的package包/类
/**
* Execute with no arguments for usage.
*/
public static void main(String[] args) {
if(!validateCommandLine(args)) {
System.err.println(usage);
System.exit(-1);
}
final TreebankLangParserParams tlpp = Languages.getLanguageParams(LANGUAGE);
final PrintWriter pwOut = tlpp.pw();
final Treebank guessTreebank = tlpp.diskTreebank();
guessTreebank.loadPath(guessFile);
pwOut.println("GUESS TREEBANK:");
pwOut.println(guessTreebank.textualSummary());
final Treebank goldTreebank = tlpp.diskTreebank();
goldTreebank.loadPath(goldFile);
pwOut.println("GOLD TREEBANK:");
pwOut.println(goldTreebank.textualSummary());
final LeafAncestorEval metric = new LeafAncestorEval("LeafAncestor");
final TreeTransformer tc = tlpp.collinizer();
//The evalb ref implementation assigns status for each tree pair as follows:
//
// 0 - Ok (yields match)
// 1 - length mismatch
// 2 - null parse e.g. (()).
//
//In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
final Iterator<Tree> goldItr = goldTreebank.iterator();
final Iterator<Tree> guessItr = guessTreebank.iterator();
int goldLineId = 0;
int guessLineId = 0;
int skippedGuessTrees = 0;
while( guessItr.hasNext() && goldItr.hasNext() ) {
Tree guessTree = guessItr.next();
List<Label> guessYield = guessTree.yield();
guessLineId++;
Tree goldTree = goldItr.next();
List<Label> goldYield = goldTree.yield();
goldLineId++;
// Check that we should evaluate this tree
if(goldYield.size() > MAX_GOLD_YIELD) {
skippedGuessTrees++;
continue;
}
// Only trees with equal yields can be evaluated
if(goldYield.size() != guessYield.size()) {
pwOut.printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.size(), guessYield.size(), goldLineId, guessLineId);
skippedGuessTrees++;
continue;
}
final Tree evalGuess = tc.transformTree(guessTree);
final Tree evalGold = tc.transformTree(goldTree);
metric.evaluate(evalGuess, evalGold, ((VERBOSE) ? pwOut : null));
}
if(guessItr.hasNext() || goldItr.hasNext()) {
System.err.printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
}
pwOut.println("================================================================================");
if(skippedGuessTrees != 0) pwOut.printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
metric.display(true, pwOut);
pwOut.close();
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:77,代码来源:LeafAncestorEval.java
注:本文中的edu.stanford.nlp.trees.Treebank类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论