本文整理汇总了Java中edu.stanford.nlp.io.EncodingPrintWriter类的典型用法代码示例。如果您正苦于以下问题:Java EncodingPrintWriter类的具体用法?Java EncodingPrintWriter怎么用?Java EncodingPrintWriter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
EncodingPrintWriter类属于edu.stanford.nlp.io包,在下文中一共展示了EncodingPrintWriter类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: main
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
/** Mainly for testing. Usage:
* <code>ChineseUtils ascii spaceChar word*</code>
* <p>
* ascii and spaceChar are integers: 0 = leave, 1 = ascii, 2 = fullwidth.
* The words listed are then normalized and sent to stdout.
* If no words are given, the program reads from and normalizes stdin.
* Input is assumed to be in UTF-8.
*
* @param args Command line arguments as above
* @throws IOException If any problems accessing command-line files
*/
public static void main(String[] args) throws IOException {
if (args.length < 3) {
System.err.println("usage: ChineseUtils ascii space midDot word*");
System.err.println(" First 3 args are int flags; a filter or maps args as words; assumes UTF-8");
return;
}
int i = Integer.parseInt(args[0]);
int j = Integer.parseInt(args[1]);
int midDot = Integer.parseInt(args[2]);
if (args.length > 3) {
for (int k = 3; k < args.length; k++) {
EncodingPrintWriter.out.println(normalize(args[k], i, j, midDot));
}
} else {
BufferedReader r =
new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
String line;
while ((line = r.readLine()) != null) {
EncodingPrintWriter.out.println(normalize(line, i, j, midDot));
}
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:34,代码来源:ChineseUtils.java
示例2: main
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
if (args.length < 1 ||
! (args[0].equals("-a2b") || args[0].equals("-b2a"))) {
System.err.println("usage: java Buckwalter [-a2b|-b2a] words+ OR, as a filter, just [-a2b|-b2a]");
return;
}
Properties p = StringUtils.argsToProperties(args);
Buckwalter b;
b = new Buckwalter(args[0].equals("-a2b"));
if(p.containsKey("outputUnicodeValues"))
b.outputUnicodeValues = true;
int j = (p.containsKey("outputUnicodeValues") ? 2 : 1);
if (j < args.length) {
for (; j < args.length; j++) {
EncodingPrintWriter.out.println(args[j] + " -> " + b.apply(args[j]), "utf-8");
}
} else {
BufferedReader br = new BufferedReader(new InputStreamReader(System.in, "utf-8"));
String line;
while ((line = br.readLine()) != null) {
EncodingPrintWriter.out.println(b.apply(line), "utf-8");
}
}
if (DEBUG) {
if ( ! b.unmappable.keySet().isEmpty()) {
EncodingPrintWriter.err.println("Characters that could not be converted [passed through!]:", "utf-8");
EncodingPrintWriter.err.println(b.unmappable.toString(), "utf-8");
} else {
EncodingPrintWriter.err.println("All characters successfully converted!", "utf-8");
}
}
}
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:33,代码来源:Buckwalter.java
示例3: printDebug
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
public void printDebug() {
if (DEBUG) {
if ( ! b.unmappable.keySet().isEmpty()) {
EncodingPrintWriter.err.println("Characters that could not be converted [passed through!]:", "utf-8");
EncodingPrintWriter.err.println(b.unmappable.toString(), "utf-8");
} else {
EncodingPrintWriter.err.println("All characters successfully converted!", "utf-8");
}
}
}
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:11,代码来源:Buckwalter.java
示例4: makeObjects
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
/**
* Build the set of dependencies for evaluation. This set excludes
* all dependencies for which the argument is a punctuation tag.
*/
@Override
protected
Set<?> makeObjects(Tree tree) {
Set<Dependency<Label, Label, Object>> deps = new HashSet<Dependency<Label, Label, Object>>();
for (Tree node : tree.subTreeList()) {
if (DEBUG) EncodingPrintWriter.err.println("Considering " + node.label());
// every child with a different head is an argument, as are ones with
// the same head after the first one found
if (node.isLeaf() || node.children().length < 2) {
continue;
}
// System.err.println("XXX node is " + node + "; label type is " +
// node.label().getClass().getName());
String head = ((HasWord) node.label()).word();
boolean seenHead = false;
for (int cNum = 0; cNum < node.children().length; cNum++) {
Tree child = node.children()[cNum];
String arg = ((HasWord) child.label()).word();
if (DEBUG) EncodingPrintWriter.err.println("Considering " + head + " --> " + arg);
if (head.equals(arg) && !seenHead) {
seenHead = true;
if (DEBUG) EncodingPrintWriter.err.println(" ... is head");
} else if (!punctFilter.accept(arg)) {
deps.add(new UnnamedDependency(head, arg));
if (DEBUG) EncodingPrintWriter.err.println(" ... added");
} else if (DEBUG) {
if (DEBUG) EncodingPrintWriter.err.println(" ... is punct dep");
}
}
}
if (DEBUG) {
EncodingPrintWriter.err.println("Deps: " + deps);
}
return deps;
}
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:40,代码来源:DependencyEval.java
示例5: accept
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
/** Doesn't accept nodes that only cover an empty. */
public boolean accept(Tree t) {
Tree[] kids = t.children();
Label l = t.label();
if ((l != null) && l.value() != null && // there appears to be a mistake in CTB3 where the label "-NONE-1" is used once
// presumably it should be "-NONE-" and be spliced out here.
(l.value().matches("-NONE-.*")) && !t.isLeaf() && kids.length == 1 && kids[0].isLeaf()) {
// Delete empty/trace nodes (ones marked '-NONE-')
if ( ! l.value().equals("-NONE-")) {
EncodingPrintWriter.err.println("Deleting errant node " + l.value() + " as if -NONE-: " + t, ChineseTreebankLanguagePack.ENCODING);
}
return false;
}
return true;
}
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:16,代码来源:CTBErrorCorrectingTreeNormalizer.java
示例6: main
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
/**
* The main() method tokenizes a file in the specified Encoding
* and prints it to standard output in the specified Encoding.
* Its arguments are (Infile, Encoding).
*/
public static void main(String[] args) throws IOException {
String encoding = args[1];
Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), encoding));
Tokenizer<String> st = new CHTBTokenizer(in);
while (st.hasNext()) {
String s = st.next();
EncodingPrintWriter.out.println(s, encoding);
// EncodingPrintWriter.out.println("|" + s + "| (" + s.length() + ")",
// encoding);
}
}
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:20,代码来源:CHTBTokenizer.java
示例7: WordToSentenceProcessor
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
/**
* Flexibly set the set of acceptable sentence boundary tokens,
* the set of tokens commonly following sentence boundaries, and also
* the set of tokens that are sentences boundaries that should be
* discarded.
* This is private because it is a dangerous constructor. It's not clear what the semantics
* should be if there are both boundary token sets, and patterns to match.
*/
private WordToSentenceProcessor(Set<String> boundaryTokens, Set<String> boundaryFollowers, Set<String> boundaryToDiscard, Pattern regionBeginPattern, Pattern regionEndPattern) {
sentenceBoundaryTokens = boundaryTokens;
sentenceBoundaryFollowers = boundaryFollowers;
sentenceBoundaryToDiscard = boundaryToDiscard;
sentenceRegionBeginPattern = regionBeginPattern;
sentenceRegionEndPattern = regionEndPattern;
if (DEBUG) {
EncodingPrintWriter.err.println("WordToSentenceProcessor: boundaryTokens=" + boundaryTokens, "UTF-8");
EncodingPrintWriter.err.println(" boundaryFollowers=" + boundaryFollowers, "UTF-8");
EncodingPrintWriter.err.println(" boundaryToDiscard=" + boundaryToDiscard, "UTF-8");
}
}
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:21,代码来源:WordToSentenceProcessor.java
示例8: WordToSentenceProcessor
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
/**
* Flexibly set a pattern that matches acceptable sentence boundaries,
* the set of tokens commonly following sentence boundaries, and also
* the set of tokens that are sentence boundaries that should be discarded.
* This is private because it is a dangerous constructor. It's not clear what the semantics
* should be if there are both boundary token sets, and patterns to match.
*/
private WordToSentenceProcessor(String boundaryTokenRegex, Set<String> boundaryFollowers, Set<String> boundaryToDiscard, Pattern regionBeginPattern, Pattern regionEndPattern) {
sentenceBoundaryTokenPattern = Pattern.compile(boundaryTokenRegex);
sentenceBoundaryFollowers = boundaryFollowers;
setSentenceBoundaryToDiscard(boundaryToDiscard);
sentenceRegionBeginPattern = regionBeginPattern;
sentenceRegionEndPattern = regionEndPattern;
if (DEBUG) {
EncodingPrintWriter.err.println("WordToSentenceProcessor: boundaryTokens=" + boundaryTokenRegex, "UTF-8");
EncodingPrintWriter.err.println(" boundaryFollowers=" + boundaryFollowers, "UTF-8");
EncodingPrintWriter.err.println(" boundaryToDiscard=" + boundaryToDiscard, "UTF-8");
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:20,代码来源:WordToSentenceProcessor.java
示例9: train
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
/**
* Trains this UWM on the Collection of trees.
*/
public void train(TaggedWord tw, int loc, double weight) {
IntTaggedWord iTW =
new IntTaggedWord(tw.word(), tw.tag(), wordIndex, tagIndex);
IntTaggedWord iT = new IntTaggedWord(nullWord, iTW.tag);
IntTaggedWord iW = new IntTaggedWord(iTW.word, nullTag);
seenCounter.incrementCount(iW, weight);
IntTaggedWord i = NULL_ITW;
if (treesRead > indexToStartUnkCounting) {
// start doing this once some way through trees;
// treesRead is 1 based counting
if (seenCounter.getCount(iW) < 1.5) {
// it's an entirely unknown word
int s = model.getSignatureIndex(iTW.word, loc,
wordIndex.get(iTW.word));
if (DOCUMENT_UNKNOWNS) {
String wStr = wordIndex.get(iTW.word);
String tStr = tagIndex.get(iTW.tag);
String sStr = wordIndex.get(s);
EncodingPrintWriter.err.println("Unknown word/tag/sig:\t" +
wStr + '\t' + tStr + '\t' +
sStr, "UTF-8");
}
IntTaggedWord iTS = new IntTaggedWord(s, iTW.tag);
IntTaggedWord iS = new IntTaggedWord(s, nullTag);
unSeenCounter.incrementCount(iTS, weight);
unSeenCounter.incrementCount(iT, weight);
unSeenCounter.incrementCount(iS, weight);
unSeenCounter.incrementCount(i, weight);
// rules.add(iTS);
// sigs.add(iS);
} // else {
// if (seenCounter.getCount(iTW) < 2) {
// it's a new tag for a known word
// do nothing for now
// }
// }
}
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:43,代码来源:EnglishUnknownWordModelTrainer.java
示例10: run
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
/**
* Runs this session by reading a string, tagging it, and writing
* back the result. The input should be a single line (no embedded
* newlines), which represents a whole sentence or document.
*/
@Override
public void run() {
if (DEBUG) {System.err.println("Created new session");}
try {
String input = in.readLine();
if (DEBUG) {
EncodingPrintWriter.err.println("Receiving: \"" + input + '\"', charset);
}
if (! (input == null)) {
String output = tagger.apply(input);
if (DEBUG) {
EncodingPrintWriter.err.println("Sending: \"" + output + '\"', charset);
}
out.print(output);
out.flush();
}
close();
} catch (IOException e) {
System.err.println("MaxentTaggerServer:Session: couldn't read input or error running POS tagger");
e.printStackTrace(System.err);
} catch (NullPointerException npe) {
System.err.println("MaxentTaggerServer:Session: connection closed by peer");
npe.printStackTrace(System.err);
}
}
开发者ID:jaimeguzman,项目名称:data_mining,代码行数:32,代码来源:MaxentTaggerServer.java
示例11: train
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
/**
* Trains this lexicon on the Collection of trees.
*/
public void train(TaggedWord tw, int loc, double weight) {
IntTaggedWord iTW =
new IntTaggedWord(tw.word(), tw.tag(), wordIndex, tagIndex);
IntTaggedWord iT = new IntTaggedWord(nullWord, iTW.tag);
IntTaggedWord iW = new IntTaggedWord(iTW.word, nullTag);
seenCounter.incrementCount(iW, weight);
IntTaggedWord i = NULL_ITW;
if (treesRead > indexToStartUnkCounting) {
// start doing this once some way through trees;
// treesRead is 1 based counting
if (seenCounter.getCount(iW) < 2) {
// it's an entirely unknown word
int s = model.getSignatureIndex(iTW.word, loc,
wordIndex.get(iTW.word));
if (DOCUMENT_UNKNOWNS) {
String wStr = wordIndex.get(iTW.word);
String tStr = tagIndex.get(iTW.tag);
String sStr = wordIndex.get(s);
EncodingPrintWriter.err.println("Unknown word/tag/sig:\t" +
wStr + '\t' + tStr + '\t' +
sStr, "UTF-8");
}
IntTaggedWord iTS = new IntTaggedWord(s, iTW.tag);
IntTaggedWord iS = new IntTaggedWord(s, nullTag);
unSeenCounter.incrementCount(iTS, weight);
unSeenCounter.incrementCount(iT, weight);
unSeenCounter.incrementCount(iS, weight);
unSeenCounter.incrementCount(i, weight);
} // else {
}
}
开发者ID:amark-india,项目名称:eventspotter,代码行数:36,代码来源:ArabicUnknownWordModelTrainer.java
示例12: printlnErr
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
private static void printlnErr(String s) {
EncodingPrintWriter.err.println(s, ChineseTreebankLanguagePack.ENCODING);
}
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:4,代码来源:ChineseTreebankParserParams.java
示例13: writeTagsAndErrors
import edu.stanford.nlp.io.EncodingPrintWriter; //导入依赖的package包/类
/** Write the tagging and note any errors (if pf != null) and accumulate
* global statistics.
*
* @param finalTags Chosen tags for sentence
* @param pf File to write tagged output to (can be null, then no output;
* at present it is non-null iff the debug property is set)
*/
protected void writeTagsAndErrors(String[] finalTags, PrintFile pf, boolean verboseResults) {
StringWriter sw = new StringWriter(200);
for (int i = 0; i < correctTags.length; i++) {
sw.write(toNice(sent.get(i)));
sw.write(tagSeparator);
sw.write(finalTags[i]);
sw.write(' ');
if (pf != null) {
pf.print(toNice(sent.get(i)));
pf.print(tagSeparator);
pf.print(finalTags[i]);
}
if ((correctTags[i]).equals(finalTags[i])) {
numRight++;
} else {
numWrong++;
if (pf != null) pf.print('|' + correctTags[i]);
if (verboseResults) {
EncodingPrintWriter.err.println((maxentTagger.dict.isUnknown(sent.get(i)) ? "Unk" : "") + "Word: " + sent.get(i) + "; correct: " + correctTags[i] + "; guessed: " + finalTags[i], encoding);
}
if (maxentTagger.dict.isUnknown(sent.get(i))) {
numWrongUnknown++;
if (pf != null) pf.print("*");
}// if
}// else
if (pf != null) pf.print(' ');
}// for
if (pf != null) pf.println();
if (verboseResults) {
PrintWriter pw;
try {
pw = new PrintWriter(new OutputStreamWriter(System.out, encoding), true);
} catch (UnsupportedEncodingException uee) {
pw = new PrintWriter(new OutputStreamWriter(System.out), true);
}
pw.println(sw);
}
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:48,代码来源:TestSentence.java
注:本文中的edu.stanford.nlp.io.EncodingPrintWriter类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论