本文整理汇总了Java中edu.stanford.nlp.ling.Datum类的典型用法代码示例。如果您正苦于以下问题:Java Datum类的具体用法?Java Datum怎么用?Java Datum使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Datum类属于edu.stanford.nlp.ling包,在下文中一共展示了Datum类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: trainRVF
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public LinearClassifier trainRVF(List<HashMap<String, Double>> list_feature2values,
List<String> list_labels) {
List<Datum<String, String>> trainingData = new ArrayList<Datum<String, String>>();
for (int i = 0; i < list_feature2values.size(); i++) {
HashMap<String, Double> feature2values = list_feature2values.get(i);
String label = list_labels.get(i);
Datum<String, String> d = new RVFDatum(Counters.fromMap(feature2values), label);
trainingData.add(d);
}
// Build a classifier factory
LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<String, String>();
factory.setSigma(3);
factory.setEpsilon(15);
factory.useQuasiNewton();
factory.setVerbose(true);
LinearClassifier<String, String> classifier = factory.trainClassifier(trainingData);
// {
// ArrayList<String> temp = new ArrayList<String>();
// temp.add("NS=" + GREEN);
// System.out.println(classifier.scoreOf(new BasicDatum<String,
// String>(temp, BROKEN), BROKEN));
// }
this.classifier = classifier;
return classifier;
}
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:27,代码来源:StanfordRegression.java
示例2: score
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public <F> double score(ProbabilisticClassifier<L,F> classifier, GeneralDataset<L,F> data) {
ArrayList<Pair<Double, Integer>> dataScores = new ArrayList<Pair<Double, Integer>>();
for (int i = 0; i < data.size(); i++) {
Datum<L,F> d = data.getRVFDatum(i);
Counter<L> scores = classifier.logProbabilityOf(d);
int labelD = d.label().equals(posLabel) ? 1 : 0;
dataScores.add(new Pair<Double, Integer>(Math.exp(scores.getCount(posLabel)), labelD));
}
PRCurve prc = new PRCurve(dataScores);
confWeightedAccuracy = prc.cwa();
accuracy = prc.accuracy();
optAccuracy = prc.optimalAccuracy();
optConfWeightedAccuracy = prc.optimalCwa();
logLikelihood = prc.logLikelihood();
accrecall = prc.cwaArray();
optaccrecall = prc.optimalCwaArray();
return accuracy;
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:23,代码来源:AccuracyStats.java
示例3: trainClassifier
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public Classifier<L, F> trainClassifier(Iterable<Datum<L, F>> dataIterable) {
Minimizer<DiffFunction> minimizer = getMinimizer();
Index<F> featureIndex = Generics.newIndex();
Index<L> labelIndex = Generics.newIndex();
for (Datum<L, F> d : dataIterable) {
labelIndex.add(d.label());
featureIndex.addAll(d.asFeatures());//If there are duplicates, it doesn't add them again.
}
System.err.println(String.format("Training linear classifier with %d features and %d labels", featureIndex.size(), labelIndex.size()));
LogConditionalObjectiveFunction<L, F> objective = new LogConditionalObjectiveFunction<L, F>(dataIterable, logPrior, featureIndex, labelIndex);
objective.setPrior(new LogPrior(LogPrior.LogPriorType.QUADRATIC));
double[] initial = objective.initial();
double[] weights = minimizer.minimize(objective, TOL, initial);
LinearClassifier<L, F> classifier = new LinearClassifier<L, F>(objective.to2D(weights), featureIndex, labelIndex);
return classifier;
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:20,代码来源:LinearClassifierFactory.java
示例4: updateDerivative
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
private void updateDerivative(Datum<L,F> datum, double[] probs,Counter<Triple<Integer,Integer,Integer>> feature2classPairDerivatives){
for (F feature : datum.asFeatures()) {
int fID = labeledDataset.featureIndex.indexOf(feature);
if (fID >= 0) {
for (int c = 0; c < numClasses; c++) {
for (int cPrime = 0; cPrime < numClasses; cPrime++) {
if (cPrime == c) {
feature2classPairDerivatives.incrementCount(new Triple<Integer,Integer,Integer>(fID,c,cPrime), - probs[c]*(1-probs[c])*valueOfFeature(feature,datum));
} else {
feature2classPairDerivatives.incrementCount(new Triple<Integer,Integer,Integer>(fID,c,cPrime), probs[c]*probs[cPrime]*valueOfFeature(feature,datum));
}
}
}
}
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:17,代码来源:GeneralizedExpectationObjectiveFunction.java
示例5: mapDataset
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
/**
*
* @param dataset
* @return a new GeneralDataset whose features and ids map exactly to those of this GeneralDataset. But labels are converted to be another set of labels
*/
public <L2> GeneralDataset<L2,F> mapDataset(GeneralDataset<L,F> dataset, Index<L2> newLabelIndex, Map<L,L2> labelMapping, L2 defaultLabel)
{
GeneralDataset<L2,F> newDataset;
if(dataset instanceof RVFDataset)
newDataset = new RVFDataset<L2,F>(this.featureIndex, newLabelIndex);
else newDataset = new Dataset<L2,F>(this.featureIndex, newLabelIndex);
this.featureIndex.lock();
this.labelIndex.lock();
//System.out.println("inside mapDataset: dataset size:"+dataset.size());
for(int i = 0; i < dataset.size(); i++) {
//System.out.println("inside mapDataset: adding datum number"+i);
Datum<L,F> d = dataset.getDatum(i);
Datum<L2,F> d2 = mapDatum(d, labelMapping, defaultLabel);
newDataset.add(d2);
}
//System.out.println("old Dataset stats: numData:"+dataset.size()+" numfeatures:"+dataset.featureIndex().size()+" numlabels:"+dataset.labelIndex.size());
//System.out.println("new Dataset stats: numData:"+newDataset.size()+" numfeatures:"+newDataset.featureIndex().size()+" numlabels:"+newDataset.labelIndex.size());
//System.out.println("this dataset stats: numData:"+size()+" numfeatures:"+featureIndex().size()+" numlabels:"+labelIndex.size());
this.featureIndex.unlock();
this.labelIndex.unlock();
return newDataset;
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:29,代码来源:GeneralDataset.java
示例6: scoresOf
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
/** Construct a counter with keys the labels of the classifier and
* values the score (unnormalized log probability) of each class.
*/
@Override
public Counter<L> scoresOf(Datum<L, F> example) {
if(example instanceof RVFDatum<?, ?>)return scoresOfRVFDatum((RVFDatum<L,F>)example);
Collection<F> feats = example.asFeatures();
int[] features = new int[feats.size()];
int i = 0;
for (F f : feats) {
int index = featureIndex.indexOf(f);
if (index >= 0) {
features[i++] = index;
} else {
//System.err.println("FEATURE LESS THAN ZERO: " + f);
}
}
int[] activeFeatures = new int[i];
System.arraycopy(features, 0, activeFeatures, 0, i);
Counter<L> scores = new ClassicCounter<L>();
for (L lab : labels()) {
scores.setCount(lab, scoreOf(activeFeatures, lab));
}
return scores;
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:26,代码来源:LinearClassifier.java
示例7: svmLightLineToDatum
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public static Datum<String, String> svmLightLineToDatum(String l) {
line1++;
l = l.replaceAll("#.*", ""); // remove any trailing comments
String[] line = l.split("\\s+");
Collection<String> features = new ArrayList<String>();
for (int i = 1; i < line.length; i++) {
String[] f = line[i].split(":");
if (f.length != 2) {
System.err.println("Dataset error: line " + line1);
}
int val = (int) Double.parseDouble(f[1]);
for (int j = 0; j < val; j++) {
features.add(f[0]);
}
}
features.add(String.valueOf(Integer.MAX_VALUE)); // a constant feature for a class
Datum<String, String> d = new BasicDatum<String, String>(features, line[0]);
return d;
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:20,代码来源:Dataset.java
示例8: main
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
ColumnDataClassifier columnDataClassifier = new ColumnDataClassifier("examples/cheese2007.prop");
Classifier<String,String> classifier =
columnDataClassifier.makeClassifier(columnDataClassifier.readTrainingExamples("examples/cheeseDisease.train"));
for (String line : ObjectBank.getLineIterator("examples/cheeseDisease.test", "utf-8")) {
Datum<String,String> d = columnDataClassifier.makeDatumFromLine(line);
System.out.println(line + " ==> " + classifier.classOf(d));
}
}
开发者ID:PacktPublishing,项目名称:Java-Data-Science-Cookbook,代码行数:10,代码来源:StanfordClassifier.java
示例9: trainMultinomialClassifier
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
/**
* Train a multinomial classifier off of the provided dataset.
*
* @param dataset The dataset to train the classifier off of.
* @return A classifier.
*/
public static Classifier<String, String> trainMultinomialClassifier(
GeneralDataset<String, String> dataset,
int featureThreshold,
double sigma) {
// Set up the dataset and factory
log.info("Applying feature threshold (" + featureThreshold + ")...");
dataset.applyFeatureCountThreshold(featureThreshold);
log.info("Randomizing dataset...");
dataset.randomize(42l);
log.info("Creating factory...");
LinearClassifierFactory<String, String> factory = initFactory(sigma);
// Train the final classifier
log.info("BEGIN training");
LinearClassifier<String, String> classifier = factory.trainClassifier(dataset);
log.info("END training");
// Debug
Accuracy trainAccuracy = new Accuracy();
for (Datum<String, String> datum : dataset) {
String guess = classifier.classOf(datum);
trainAccuracy.predict(Collections.singleton(guess), Collections.singleton(datum.label()));
}
log.info("Training accuracy:");
log.info(trainAccuracy.toString());
log.info("");
// Return the classifier
return classifier;
}
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:37,代码来源:KBPStatisticalExtractor.java
示例10: trainMultinomialClassifier
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
/**
* Train a multinomial classifier off of the provided dataset.
* @param dataset The dataset to train the classifier off of.
* @return A classifier.
*/
public static Classifier<String, String> trainMultinomialClassifier(
GeneralDataset<String, String> dataset,
int featureThreshold,
double sigma) {
// Set up the dataset and factory
log.info("Applying feature threshold (" + featureThreshold + ")...");
dataset.applyFeatureCountThreshold(featureThreshold);
log.info("Randomizing dataset...");
dataset.randomize(42l);
log.info("Creating factory...");
LinearClassifierFactory<String,String> factory = initFactory(sigma);
// Train the final classifier
log.info("BEGIN training");
LinearClassifier<String, String> classifier = factory.trainClassifier(dataset);
log.info("END training");
// Debug
Accuracy trainAccuracy = new Accuracy();
for (Datum<String, String> datum : dataset) {
String guess = classifier.classOf(datum);
trainAccuracy.predict(Collections.singleton(guess), Collections.singleton(datum.label()));
}
log.info("Training accuracy:");
log.info(trainAccuracy.toString());
log.info("");
// Return the classifier
return classifier;
}
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:36,代码来源:KBPStatisticalExtractor.java
示例11: trainBasic
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public LinearClassifier trainBasic(
List<List<String>> list_features, List<String> list_labels) {
List<Datum<String, String>> trainingData = new ArrayList<Datum<String, String>>();
for (int i = 0; i < list_features.size(); i++) {
List<String> features = list_features.get(i);
String label = list_labels.get(i);
Datum<String, String> d = new BasicDatum<String, String>(features, label);
trainingData.add(d);
}
// Build a classifier factory
LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<String, String>();
// factory.setTol(tol);
// factory.setSigma(1);
// factory.setEpsilon(0.01);
// factory.useQuasiNewton();
factory.setVerbose(true);
LinearClassifier<String, String> classifier = factory.trainClassifier(trainingData);
// {
// ArrayList<String> temp = new ArrayList<String>();
// temp.add("NS=" + GREEN);
// System.out.println(classifier.scoreOf(new BasicDatum<String,
// String>(temp, BROKEN), BROKEN));
// }
this.classifier = classifier;
return classifier;
}
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:28,代码来源:StanfordRegression.java
示例12: scoreOf
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public Map<String, Double> scoreOf(List<String> features) {
Datum<String, String> d = new BasicDatum<String, String>(features, "");
HashMap<String, Double> label2score = new HashMap<String, Double>();
Counter<String> c = classifier.scoresOf(d);
for (String label : c.keySet()) {
label2score.put(label, c.getCount(label));
}
return label2score;
}
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:10,代码来源:StanfordRegression.java
示例13: initMC
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public <F> void initMC(ProbabilisticClassifier<L,F> classifier, GeneralDataset<L,F> data) {
//if (!(gData instanceof Dataset)) {
// throw new UnsupportedOperationException("Can only handle Datasets, not "+gData.getClass().getName());
//}
//
//Dataset data = (Dataset)gData;
PriorityQueue<Pair<Integer, Pair<Double, Boolean>>> q = new BinaryHeapPriorityQueue<Pair<Integer, Pair<Double, Boolean>>>();
total = 0;
correct = 0;
logLikelihood = 0.0;
for (int i = 0; i < data.size(); i++) {
Datum<L,F> d = data.getRVFDatum(i);
Counter<L> scores = classifier.logProbabilityOf(d);
L guess = Counters.argmax(scores);
L correctLab = d.label();
double guessScore = scores.getCount(guess);
double correctScore = scores.getCount(correctLab);
int guessInd = data.labelIndex().indexOf(guess);
int correctInd = data.labelIndex().indexOf(correctLab);
total++;
if (guessInd == correctInd) {
correct++;
}
logLikelihood += correctScore;
q.add(new Pair<Integer, Pair<Double, Boolean>>(Integer.valueOf(i), new Pair<Double, Boolean>(new Double(guessScore), Boolean.valueOf(guessInd == correctInd))), -guessScore);
}
accuracy = (double) correct / (double) total;
List<Pair<Integer, Pair<Double, Boolean>>> sorted = q.toSortedList();
scores = new double[sorted.size()];
isCorrect = new boolean[sorted.size()];
for (int i = 0; i < sorted.size(); i++) {
Pair<Double, Boolean> next = sorted.get(i).second();
scores[i] = next.first().doubleValue();
isCorrect[i] = next.second().booleanValue();
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:41,代码来源:MultiClassAccuracyStats.java
示例14: score
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public <F> double score(Classifier<L,F> classifier, GeneralDataset<L,F> data) {
setLabelIndex(data.labelIndex);
clearCounts();
int[] labelsArr = data.getLabelsArray();
for (int i = 0; i < data.size(); i++) {
Datum<L, F> d = data.getRVFDatum(i);
L guess = classifier.classOf(d);
addGuess(guess, labelIndex.get(labelsArr[i]));
}
finalizeCounts();
return getFMeasure();
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:14,代码来源:MultiClassPrecisionRecallExtendedStats.java
示例15: loglikelihood
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
/**
* Returns the log conditional likelihood of the given dataset.
*
* @return The log conditional likelihood of the given dataset.
*/
public double loglikelihood(List<IN> lineInfos) {
double cll = 0.0;
for (int i = 0; i < lineInfos.size(); i++) {
Datum<String, String> d = makeDatum(lineInfos, i, featureFactory);
Counter<String> c = classifier.logProbabilityOf(d);
double total = Double.NEGATIVE_INFINITY;
for (String s : c.keySet()) {
total = SloppyMath.logAdd(total, c.getCount(s));
}
cll -= c.getCount(d.label()) - total;
}
// quadratic prior
// HN: TODO: add other priors
if (classifier instanceof LinearClassifier) {
double sigmaSq = flags.sigma * flags.sigma;
LinearClassifier<String, String> lc = (LinearClassifier<String, String>)classifier;
for (String feature: lc.features()) {
for (String classLabel: classIndex) {
double w = lc.weight(feature, classLabel);
cll += w * w / 2.0 / sigmaSq;
}
}
}
return cll;
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:34,代码来源:CMMClassifier.java
示例16: add
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
@Override
public void add(Datum<L, F> d) {
if (d instanceof RVFDatum<?, ?>) {
addLabel(d.label());
addFeatures(((RVFDatum<L, F>) d).asFeaturesCounter());
size++;
} else {
addLabel(d.label());
addFeatures(Counters.asCounter(d.asFeatures()));
size++;
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:13,代码来源:RVFDataset.java
示例17: main
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public static void main(String[] args) {
RVFDataset<String, String> data = new RVFDataset<String, String>();
ClassicCounter<String> c1 = new ClassicCounter<String>();
c1.incrementCount("fever", 3.5);
c1.incrementCount("cough", 1.1);
c1.incrementCount("congestion", 4.2);
ClassicCounter<String> c2 = new ClassicCounter<String>();
c2.incrementCount("fever", 1.5);
c2.incrementCount("cough", 2.1);
c2.incrementCount("nausea", 3.2);
ClassicCounter<String> c3 = new ClassicCounter<String>();
c3.incrementCount("cough", 2.5);
c3.incrementCount("congestion", 3.2);
data.add(new RVFDatum<String, String>(c1, "cold"));
data.add(new RVFDatum<String, String>(c2, "flu"));
data.add(new RVFDatum<String, String>(c3, "cold"));
data.summaryStatistics();
LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<String, String>();
factory.useQuasiNewton();
LinearClassifier<String, String> c = factory.trainClassifier(data);
ClassicCounter<String> c4 = new ClassicCounter<String>();
c4.incrementCount("cough", 2.3);
c4.incrementCount("fever", 1.3);
RVFDatum<String, String> datum = new RVFDatum<String, String>(c4);
c.justificationOf((Datum<String, String>) datum);
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:35,代码来源:RVFDataset.java
示例18: trainSemiSupGE
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
/**
* Trains the linear classifier using Generalized Expectation criteria as described in
* <tt>Generalized Expectation Criteria for Semi Supervised Learning of Conditional Random Fields</tt>, Mann and McCallum, ACL 2008.
* The original algorithm is proposed for CRFs but has been adopted to LinearClassifier (which is a simpler special case of a CRF).
* IMPORTANT: the labeled features that are passed as an argument are assumed to be binary valued, although
* other features are allowed to be real valued.
*/
public LinearClassifier<L,F> trainSemiSupGE(GeneralDataset<L, F> labeledDataset, List<? extends Datum<L, F>> unlabeledDataList, List<F> GEFeatures, double convexComboCoeff) {
Minimizer<DiffFunction> minimizer = minimizerCreator.create();
LogConditionalObjectiveFunction<L, F> objective = new LogConditionalObjectiveFunction<L, F>(labeledDataset, new LogPrior(LogPrior.LogPriorType.NULL));
GeneralizedExpectationObjectiveFunction<L,F> geObjective = new GeneralizedExpectationObjectiveFunction<L,F>(labeledDataset, unlabeledDataList, GEFeatures);
SemiSupervisedLogConditionalObjectiveFunction semiSupObjective = new SemiSupervisedLogConditionalObjectiveFunction(objective, geObjective, null,convexComboCoeff);
double[] initial = objective.initial();
double[] weights = minimizer.minimize(semiSupObjective, TOL, initial);
return new LinearClassifier<L, F>(objective.to2D(weights), labeledDataset.featureIndex(), labeledDataset.labelIndex());
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:17,代码来源:LinearClassifierFactory.java
示例19: getModelProbs
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
private double[] getModelProbs(Datum<L,F> datum){
double[] condDist = new double[labeledDataset.numClasses()];
Counter<L> probCounter = classifier.probabilityOf(datum);
for(L label : probCounter.keySet()){
int labelID = labeledDataset.labelIndex.indexOf(label);
condDist[labelID] = probCounter.getCount(label);
}
return condDist;
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:10,代码来源:GeneralizedExpectationObjectiveFunction.java
示例20: GeneralizedExpectationObjectiveFunction
import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public GeneralizedExpectationObjectiveFunction(GeneralDataset<L, F> labeledDataset, List<? extends Datum<L,F>> unlabeledDataList,List<F> geFeatures) {
System.out.println("Number of labeled examples:"+labeledDataset.size+"\nNumber of unlabeled examples:"+unlabeledDataList.size());
System.out.println("Number of GE features:"+geFeatures.size());
this.numFeatures = labeledDataset.numFeatures();
this.numClasses = labeledDataset.numClasses();
this.labeledDataset = labeledDataset;
this.unlabeledDataList = unlabeledDataList;
this.geFeatures = geFeatures;
this.classifier = new LinearClassifier<L,F>(null,labeledDataset.featureIndex,labeledDataset.labelIndex);
computeEmpiricalStatistics(geFeatures);
//empirical distributions don't change with iterations, so compute them only once.
//model distributions will have to be recomputed every iteration though.
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:14,代码来源:GeneralizedExpectationObjectiveFunction.java
注:本文中的edu.stanford.nlp.ling.Datum类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论