本文整理汇总了Java中edu.stanford.nlp.stats.Counters类的典型用法代码示例。如果您正苦于以下问题:Java Counters类的具体用法?Java Counters怎么用?Java Counters使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Counters类属于edu.stanford.nlp.stats包,在下文中一共展示了Counters类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: existsTokenMatch
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
public boolean existsTokenMatch(List<String> exampleTokens, List<String> exampleLemmas, Set<String> fbDescs) {
// generate stems
List<String> exampleStems = new ArrayList<String>();
for (String token : exampleTokens)
exampleStems.add(stemmer.stem(token));
Counter<String> tokenFeatures = new ClassicCounter<String>();
Counter<String> stemFeatures = new ClassicCounter<String>();
for (String fbDescription : fbDescs) {
List<String> fbDescTokens = FbFormulasInfo.BinaryFormulaInfo.tokenizeFbDescription(fbDescription);
List<String> fbDescStems = new ArrayList<>();
for (String fbDescToken : fbDescTokens)
fbDescStems.add(stemmer.stem(fbDescToken));
Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleTokens, fbDescTokens, true));
Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleLemmas, fbDescTokens, true));
Counters.maxInPlace(stemFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleStems, fbDescStems, false));
if (tokenFeatures.size() > 0 || stemFeatures.size() > 0)
return true;
}
return false;
}
开发者ID:cgraywang,项目名称:TextHIN,代码行数:23,代码来源:TextToTextMatcher.java
示例2: extractTokenMatchFeatures
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
private void extractTokenMatchFeatures(List<String> exampleTokens, List<String> exampleLemmas, Set<String> fbDescs, FeatureVector vector) {
if (!FeatureExtractor.containsDomain("tokenMatch")) return;
// generate stems
List<String> exampleStems = new ArrayList<>();
for (String token : exampleTokens)
exampleStems.add(stemmer.stem(token));
Counter<String> tokenFeatures = new ClassicCounter<>();
Counter<String> stemFeatures = new ClassicCounter<>();
for (String fbDescription : fbDescs) {
List<String> fbDescTokens = FbFormulasInfo.BinaryFormulaInfo.tokenizeFbDescription(fbDescription);
List<String> fbDescStems = new ArrayList<>();
for (String fbDescToken : fbDescTokens)
fbDescStems.add(stemmer.stem(fbDescToken));
Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleTokens, fbDescTokens, true));
Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleLemmas, fbDescTokens, true));
Counters.maxInPlace(stemFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleStems, fbDescStems, false));
}
if (opts.verbose >= 3) {
LogInfo.logs("Binary formula desc: %s, token match: %s, stem match: %s", fbDescs, tokenFeatures, stemFeatures);
}
addFeaturesToVector(tokenFeatures, "binary_token", vector);
addFeaturesToVector(stemFeatures, "binary_stem", vector);
}
开发者ID:cgraywang,项目名称:TextHIN,代码行数:27,代码来源:TextToTextMatcher.java
示例3: classify
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
/**
* Score the given input, returning both the classification decision and the
* probability of that decision.
* Note that this method will not return a relation which does not type check.
*
* @param input The input to classify.
* @return A pair with the relation we classified into, along with its confidence.
*/
public Pair<String, Double> classify(KBPInput input) {
RVFDatum<String, String> datum = new RVFDatum<>(features(input));
Counter<String> scores = classifier.scoresOf(datum);
Counters.expInPlace(scores);
Counters.normalize(scores);
String best = Counters.argmax(scores);
// While it doesn't type check, continue going down the list.
// NO_RELATION is always an option somewhere in there, so safe to keep going...
while (!NO_RELATION.equals(best) &&
(RelationType.fromString(best).isPresent()
&& (!RelationType.fromString(best).get().validNamedEntityLabels.contains(input.objectType) ||
RelationType.fromString(best).get().entityType != input.subjectType))) {
scores.remove(best);
Counters.normalize(scores);
best = Counters.argmax(scores);
}
return Pair.makePair(best, scores.getCount(best));
}
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:27,代码来源:KBPStatisticalExtractor.java
示例4: classify
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
/**
* Score the given input, returning both the classification decision and the
* probability of that decision.
* Note that this method will not return a relation which does not type check.
*
*
* @param input The input to classify.
* @return A pair with the relation we classified into, along with its confidence.
*/
public Pair<String,Double> classify(KBPInput input) {
RVFDatum<String, String> datum = new RVFDatum<>(features(input));
Counter<String> scores = classifier.scoresOf(datum);
Counters.expInPlace(scores);
Counters.normalize(scores);
String best = Counters.argmax(scores);
// While it doesn't type check, continue going down the list.
// NO_RELATION is always an option somewhere in there, so safe to keep going...
while (!NO_RELATION.equals(best) &&
(!edu.stanford.nlp.ie.KBPRelationExtractor.RelationType.fromString(best).get().validNamedEntityLabels.contains(input.objectType) ||
RelationType.fromString(best).get().entityType != input.subjectType) ) {
scores.remove(best);
Counters.normalize(scores);
best = Counters.argmax(scores);
}
return Pair.makePair(best, scores.getCount(best));
}
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:27,代码来源:KBPStatisticalExtractor.java
示例5: featurizeEecFineMaxNer
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
public static void featurizeEecFineMaxNer(ConnectedComponent cc) {
for (int i = 0; i < cc.tuples.size(); i++) {
Tuple t = cc.tuples.get(i);
Factor tf = cc.tupleFactors.get(i);
Counter<String> t1s = t.getArg1FineGrainedNer();
Counter<String> t2s = t.getArg2FineGrainedNer();
String fner1max = Counters.argmax(t1s);
String fner2max = Counters.argmax(t2s);
if ((fner1max.equals(cc.eventtype.arg1type)
|| cc.eventtype.arg1typelen > 1 && t1s.getCount(cc.eventtype.arg1type) > 0)
&& (fner2max.equals(cc.eventtype.arg2type)
|| cc.eventtype.arg2typelen > 1 && t2s.getCount(cc.eventtype.arg2type) > 0)) {
tf.add("[email protected]" + cc.eventtype.arg1type + "_" + cc.eventtype.arg2type);
// tf.add("fnermax");
}
}
}
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:18,代码来源:NewsSpikeExtractor.java
示例6: featurizeEecFineMaxNer
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
public static void featurizeEecFineMaxNer(ConnectedComponent cc) {
for (int i = 0; i < cc.tuples.size(); i++) {
Tuple t = cc.tuples.get(i);
Factor tf = cc.tupleFactors.get(i);
Counter<String> t1s = t.getArg1FineGrainedNer();
Counter<String> t2s = t.getArg2FineGrainedNer();
String fner1max = Counters.argmax(t1s);
String fner2max = Counters.argmax(t2s);
if ((fner1max.equals(cc.eventtype.arg1type) ||
cc.eventtype.arg1typelen > 1
&& t1s.getCount(cc.eventtype.arg1type) > 0)
&&
(fner2max.equals(cc.eventtype.arg2type) ||
cc.eventtype.arg2typelen > 1
&& t2s.getCount(cc.eventtype.arg2type) > 0)) {
tf.add("[email protected]" + cc.eventtype.arg1type + "_"
+ cc.eventtype.arg2type);
// tf.add("fnermax");
}
}
}
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:22,代码来源:NewsSpikeGenerate.java
示例7: trainRVF
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
public LinearClassifier trainRVF(List<HashMap<String, Double>> list_feature2values,
List<String> list_labels) {
List<Datum<String, String>> trainingData = new ArrayList<Datum<String, String>>();
for (int i = 0; i < list_feature2values.size(); i++) {
HashMap<String, Double> feature2values = list_feature2values.get(i);
String label = list_labels.get(i);
Datum<String, String> d = new RVFDatum(Counters.fromMap(feature2values), label);
trainingData.add(d);
}
// Build a classifier factory
LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<String, String>();
factory.setSigma(3);
factory.setEpsilon(15);
factory.useQuasiNewton();
factory.setVerbose(true);
LinearClassifier<String, String> classifier = factory.trainClassifier(trainingData);
// {
// ArrayList<String> temp = new ArrayList<String>();
// temp.add("NS=" + GREEN);
// System.out.println(classifier.scoreOf(new BasicDatum<String,
// String>(temp, BROKEN), BROKEN));
// }
this.classifier = classifier;
return classifier;
}
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:27,代码来源:StanfordRegression.java
示例8: toString
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
public String toString() {
StringBuffer str = new StringBuffer("--SCHEMA--\n");
str.append(docname + "\n");
if( entityNER != null )
for( Map.Entry<Integer, Set<TextEntity.TYPE>> entry : entityNER.entrySet() ) {
str.append(entry.getKey());
for( TextEntity.TYPE type : entry.getValue() )
str.append("\t" + type);
str.append("\n");
}
for( Relation rel : relations )
str.append(rel + "\n");
str.append(Counters.toString(tokenCounts, 200));
str.append("\n");
return str.toString();
}
开发者ID:nchambers,项目名称:schemas,代码行数:20,代码来源:Schema.java
示例9: updateCountsWith
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
private int updateCountsWith(PartialStateUpdate result) {
// Update counts
Counters.addInPlace(classCount, result.deltaClassCount);
Set<Integer> classes = result.deltaClassHistoryCount.firstKeySet();
for (Integer classId : classes) {
Counter<NgramHistory> counter = this.classHistoryCount.getCounter(classId);
Counter<NgramHistory> delta = result.deltaClassHistoryCount.getCounter(classId);
Counters.addInPlace(counter, delta);
}
// Update assignments
int numUpdates = 0;
for (Map.Entry<IString, Integer> assignment : result.wordToClass.entrySet()) {
int oldAssignment = wordToClass.get(assignment.getKey());
int newAssignment = assignment.getValue();
if (oldAssignment != newAssignment) {
++numUpdates;
wordToClass.put(assignment.getKey(), assignment.getValue());
}
}
return numUpdates;
}
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:23,代码来源:MakeWordClasses.java
示例10: featureWhiteList
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
public static Set<String> featureWhiteList(FlatNBestList nbest, int minSegmentCount) {
List<List<ScoredFeaturizedTranslation<IString, String>>> nbestlists = nbest.nbestLists();
Counter<String> featureSegmentCounts = new ClassicCounter<String>();
for (List<ScoredFeaturizedTranslation<IString, String>> nbestlist : nbestlists) {
Set<String> segmentFeatureSet = new HashSet<String>();
for (ScoredFeaturizedTranslation<IString, String> trans : nbestlist) {
for (FeatureValue<String> feature : trans.features) {
segmentFeatureSet.add(feature.name);
}
}
for (String featureName : segmentFeatureSet) {
featureSegmentCounts.incrementCount(featureName);
}
}
return Counters.keysAbove(featureSegmentCounts, minSegmentCount -1);
}
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:17,代码来源:OptimizerUtils.java
示例11: updatefeatureWhiteList
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
/**
* Update an existing feature whitelist according to nbestlists. Then return the features that appear
* more than minSegmentCount times.
*
* @param featureWhitelist
* @param nbestlists
* @param minSegmentCount
* @return features that appear more than minSegmentCount times
*/
public static Set<String> updatefeatureWhiteList(
Counter<String> featureWhitelist,
List<List<RichTranslation<IString, String>>> nbestlists,
int minSegmentCount) {
for (List<RichTranslation<IString, String>> nbestlist : nbestlists) {
Set<String> segmentFeatureSet = new HashSet<String>(1000);
for (RichTranslation<IString, String> trans : nbestlist) {
for (FeatureValue<String> feature : trans.features) {
if ( ! segmentFeatureSet.contains(feature.name)) {
segmentFeatureSet.add(feature.name);
featureWhitelist.incrementCount(feature.name);
}
}
}
}
return Counters.keysAbove(featureWhitelist, minSegmentCount-1);
}
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:27,代码来源:OptimizerUtils.java
示例12: updateBest
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
public static boolean updateBest(Counter<String> newWts, double obj, boolean force) {
boolean nonZero = Counters.L2Norm(newWts) > 0.0;
synchronized (MERT.class) {
boolean better = false;
if (bestObj > obj) {
System.err.printf("\n<<<IMPROVED BEST: %f -> %f with {{{%s}}}.>>>\n",
-bestObj, -obj, Counters.toString(newWts, 100));
better = true;
} else if (bestObj == obj && breakTiesWithLastBest) {
System.err.printf("\n<<<SAME BEST: %f with {{{%s}}}.>>>\n", -bestObj,
Counters.toString(newWts, 100));
better = true;
}
if (force) {
System.err.printf("\n<<<FORCED BEST UPDATE: %f -> %f>>>\n", -bestObj,
-obj);
}
if ((better && nonZero) || force) {
bestWts = newWts;
bestObj = obj;
return true;
}
return false;
}
}
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:26,代码来源:MERT.java
示例13: getMostFrequentTokens
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
private static Set<String> getMostFrequentTokens(LineNumberReader reader, int k) throws IOException {
Counter<String> tokenCounts = new ClassicCounter<String>();
String line;
while ((line = reader.readLine()) != null) {
String tokens[] = line.split("\\s+");
for (String t : tokens) {
tokenCounts.incrementCount(t);
}
}
Set<String> mostFrequentTokens = new HashSet<>(k);
Counters.retainTop(tokenCounts, k);
mostFrequentTokens.addAll(tokenCounts.keySet());
tokenCounts = null;
return mostFrequentTokens;
}
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:19,代码来源:DependencyBnBPreorderer.java
示例14: toDistributionString
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
/**
* Similar to histogram but exact values of the weights
* to see whether there are many equal weights.
*
* @return A human readable string about the classifier distribution.
*/
public String toDistributionString(int threshold) {
Counter<Double> weightCounts = new ClassicCounter<Double>();
StringBuilder s = new StringBuilder();
s.append("Total number of weights: ").append(totalSize());
for (double[] weightArray : weights) {
for (double weight : weightArray) {
weightCounts.incrementCount(weight);
}
}
s.append("Counts of weights\n");
Set<Double> keys = Counters.keysAbove(weightCounts, threshold);
s.append(keys.size()).append(" keys occur more than ").append(threshold).append(" times ");
return s.toString();
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:22,代码来源:LinearClassifier.java
示例15: makeProbabilitiesElement
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
private static Element makeProbabilitiesElement(ExtractionObject object, String curNS) {
Element probs = new Element("probabilities", curNS);
if (object.getTypeProbabilities() != null){
List<Pair<String, Double>> sorted = Counters.toDescendingMagnitudeSortedListWithCounts(object.getTypeProbabilities());
for(Pair<String, Double> lv: sorted) {
Element prob = new Element("probability", curNS);
Element label = new Element("label", curNS);
label.appendChild(lv.first);
Element value = new Element("value", curNS);
value.appendChild(lv.second.toString());
prob.appendChild(label);
prob.appendChild(value);
probs.appendChild(prob);
}
}
return probs;
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:18,代码来源:XMLOutputter.java
示例16: toDistributionString
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
/**
* Similar to histogram but exact values of the weights
* to see whether there are many equal weights.
*
* @return A human readable string about the classifier distribution.
*/
public String toDistributionString(int treshold) {
Counter<Double> weightCounts = new ClassicCounter<Double>();
StringBuilder s = new StringBuilder();
s.append("Total number of weights: ").append(totalSize());
for (int f = 0; f < weights.length; f++) {
for (int l = 0; l < weights[f].length; l++) {
weightCounts.incrementCount(weights[f][l]);
}
}
s.append("Counts of weights\n");
Set<Double> keys = Counters.keysAbove(weightCounts, treshold);
s.append(keys.size()).append(" keys occur more than ").append(treshold).append(" times ");
return s.toString();
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:22,代码来源:LinearClassifier.java
示例17: printableObject
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
/**
* Returns true if it's worth saving/printing this object
* This happens in two cases:
* 1. The type of the object is not nilLabel
* 2. The type of the object is nilLabel but the second ranked label is within the given beam (0 -- 100) of the first choice
* @param beam
* @param nilLabel
*/
public boolean printableObject(double beam, String nilLabel) {
List<Pair<String, Double>> sorted = Counters.toDescendingMagnitudeSortedListWithCounts(typeProbabilities);
// first choice not nil
if(sorted.size() > 0 && ! sorted.get(0).first.equals(nilLabel)){
return true;
}
// first choice is nil, but second is within beam
if(sorted.size() > 1 && sorted.get(0).first.equals(nilLabel) && beam > 0 &&
100.0 * (sorted.get(0).second - sorted.get(1).second) < beam){
return true;
}
return false;
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:25,代码来源:ExtractionObject.java
示例18: scoreOf
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
public Map<String, Double> scoreOf(HashMap<String, Double> ftrValues) {
Datum<String, String> d = new RVFDatum(Counters.fromMap(ftrValues), "");
HashMap<String, Double> label2score = new HashMap<String, Double>();
Counter<String> c = classifier.scoresOf(d);
for (String label : c.keySet()) {
label2score.put(label, c.getCount(label));
}
return label2score;
}
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:10,代码来源:StanfordRegression.java
示例19: counter2str
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
public static String counter2str(Counter<String> c) {
if (c != null) {
return Counters.toSortedString(c, Integer.MAX_VALUE, "%s=%f", ", ", "[%s]");
} else {
return "{}";
}
}
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:8,代码来源:Util.java
示例20: findBestDocumentsInFile
import edu.stanford.nlp.stats.Counters; //导入依赖的package包/类
/**
* Traverses the lines in the given file, each line is an IntCounter representing
* a single document from Gigaword. Compare each line to all files in the global
* MUC input, see if it is close to a MUC document.
* @param path Path to a file of IntCounters.
* @return A list of document names that we matched.
*/
private List<String> findBestDocumentsInFile(String path) {
List<String> matches = new ArrayList<String>();
BufferedReader in = null;
try {
if( path.endsWith(".gz") )
in = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(path))));
else in = new BufferedReader(new FileReader(path));
String line;
while( (line = in.readLine()) != null ) {
// e.g. nyt_eng_199411 kidnap 40 release 23 ...
int tab = line.indexOf('\t');
IntCounter<String> gigadoc = ParsesToCounts.fromString(line.substring(tab+1));
Counter<String> gigavec = multiplyByIDF(gigadoc);
for( Counter<String> mucdoc : _mucVectors ) {
double cosine = Counters.cosine(gigavec, mucdoc);
if( cosine > _minCosine ) {
String docname = line.substring(0,tab);
System.out.println("Matched " + docname);
matches.add(docname);
}
}
}
} catch( Exception ex ) {
System.err.println("Error opening " + path);
ex.printStackTrace();
}
return matches;
}
开发者ID:nchambers,项目名称:schemas,代码行数:40,代码来源:IRDocSim.java
注:本文中的edu.stanford.nlp.stats.Counters类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论