本文整理汇总了Java中edu.stanford.nlp.util.HashIndex类的典型用法代码示例。如果您正苦于以下问题:Java HashIndex类的具体用法?Java HashIndex怎么用?Java HashIndex使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
HashIndex类属于edu.stanford.nlp.util包,在下文中一共展示了HashIndex类的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: recalculateTemporaryBetas
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
* Creates temporary beta data structures and fills them in by
* iterating over the trees.
*/
public void recalculateTemporaryBetas(boolean splitStates, Map<String, double[]> totalStateMass,
TwoDimensionalMap<String, String, double[][]> tempUnaryBetas,
ThreeDimensionalMap<String, String, String, double[][][]> tempBinaryBetas) {
tempWordIndex = new HashIndex<String>();
tempTagIndex = new HashIndex<String>();
tempLex = op.tlpParams.lex(op, tempWordIndex, tempTagIndex);
tempLex.initializeTraining(trainSize);
for (Tree tree : trees) {
double weight = treeWeights.getCount(tree);
if (DEBUG()) {
System.out.println("Incrementing trees read: " + weight);
}
tempLex.incrementTreesRead(weight);
recalculateTemporaryBetas(tree, splitStates, totalStateMass, tempUnaryBetas, tempBinaryBetas);
}
tempLex.finishTraining();
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:24,代码来源:SplittingGrammarExtractor.java
示例2: tagProject
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
private short tagProject(short tag) {
if (smoothTPIndex == null) {
smoothTPIndex = new HashIndex<String>(tagIndex);
}
if (tag < 0) {
return tag;
} else {
String tagStr = smoothTPIndex.get(tag);
String binStr = TP_PREFIX + smoothTP.project(tagStr);
return (short) smoothTPIndex.indexOf(binStr, true);
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:13,代码来源:MLEDependencyGrammar.java
示例3: initTagBins
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
protected void initTagBins() {
Index<String> tagBinIndex = new HashIndex<String>();
if (DEBUG) {
System.err.println();
System.err.println("There are " + tagIndex.size() + " tags.");
}
tagBin = new int[tagIndex.size()];
for (int t = 0; t < tagBin.length; t++) {
String tagStr = tagIndex.get(t);
String binStr;
if (tagProjection == null) {
binStr = tagStr;
} else {
binStr = tagProjection.project(tagStr);
}
tagBin[t] = tagBinIndex.indexOf(binStr, true);
if (DEBUG) {
System.err.println("initTagBins: Mapped " + tagStr + " (" + t +
") to " + binStr + " (" + tagBin[t] + ")");
}
}
numTagBins = tagBinIndex.size();
if (DEBUG) {
System.err.println("initTagBins: tags " + tagBin.length + " bins " +
numTagBins);
System.err.println("tagBins: " + tagBinIndex);
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:29,代码来源:AbstractDependencyGrammar.java
示例4: addGuess
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
protected void addGuess(L guess, L label, boolean addUnknownLabels)
{
if (label == null) {
noLabel++;
return;
}
if (addUnknownLabels) {
if (labelIndex == null) {
labelIndex = new HashIndex<L>();
}
labelIndex.add(guess);
labelIndex.add(label);
}
if (guess.equals(label)) {
correctGuesses.incrementCount(label);
tokensCorrect++;
}
if (!guess.equals(negLabel)) {
foundGuessed.incrementCount(guess);
}
if (!label.equals(negLabel)) {
foundCorrect.incrementCount(label);
}
tokensCount++;
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:28,代码来源:MultiClassPrecisionRecallExtendedStats.java
示例5: initialize
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
@Override
protected void initialize(int numDatums) {
labelIndex = new HashIndex<L>();
featureIndex = new HashIndex<F>();
labels = new int[numDatums];
data = new int[numDatums][];
values = new double[numDatums][];
sourcesAndIds = new ArrayList<Pair<String, String>>(numDatums);
size = 0;
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:11,代码来源:RVFDataset.java
示例6: selectFeaturesFromSet
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
* Removes all features from the dataset that are not in featureSet.
*
* @param featureSet
*/
public void selectFeaturesFromSet(Set<F> featureSet) {
HashIndex<F> newFeatureIndex = new HashIndex<F>();
int[] featMap = new int[featureIndex.size()];
Arrays.fill(featMap, -1);
for (F feature : featureSet) {
int oldID = featureIndex.indexOf(feature);
if (oldID >= 0) { // it's a valid feature in the index
int newID = newFeatureIndex.indexOf(feature, true);
featMap[oldID] = newID;
}
}
featureIndex = newFeatureIndex;
for (int i = 0; i < size; i++) {
List<Integer> featList = new ArrayList<Integer>(data[i].length);
List<Double> valueList = new ArrayList<Double>(values[i].length);
for (int j = 0; j < data[i].length; j++) {
if (featMap[data[i][j]] >= 0) {
featList.add(featMap[data[i][j]]);
valueList.add(values[i][j]);
}
}
data[i] = new int[featList.size()];
values[i] = new double[valueList.size()];
for (int j = 0; j < data[i].length; j++) {
data[i][j] = featList.get(j);
values[i][j] = valueList.get(j);
}
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:35,代码来源:RVFDataset.java
示例7: applyFeatureCountThreshold
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
* Applies a feature count threshold to the RVFDataset. All features that
* occur fewer than <i>k</i> times are expunged.
*/
public void applyFeatureCountThreshold(int k) {
float[] counts = getFeatureCounts();
HashIndex<F> newFeatureIndex = new HashIndex<F>();
int[] featMap = new int[featureIndex.size()];
for (int i = 0; i < featMap.length; i++) {
F feat = featureIndex.get(i);
if (counts[i] >= k) {
int newIndex = newFeatureIndex.size();
newFeatureIndex.add(feat);
featMap[i] = newIndex;
} else {
featMap[i] = -1;
}
// featureIndex.remove(feat);
}
featureIndex = newFeatureIndex;
// counts = null; // This is unnecessary; JVM can clean it up
for (int i = 0; i < size; i++) {
List<Integer> featList = new ArrayList<Integer>(data[i].length);
List<Double> valueList = new ArrayList<Double>(values[i].length);
for (int j = 0; j < data[i].length; j++) {
if (featMap[data[i][j]] >= 0) {
featList.add(featMap[data[i][j]]);
valueList.add(values[i][j]);
}
}
data[i] = new int[featList.size()];
values[i] = new double[valueList.size()];
for (int j = 0; j < data[i].length; j++) {
data[i][j] = featList.get(j);
values[i][j] = valueList.get(j);
}
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:42,代码来源:RVFDataset.java
示例8: applyFeatureMaxCountThreshold
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
* Applies a feature max count threshold to the RVFDataset. All features that
* occur greater than <i>k</i> times are expunged.
*/
public void applyFeatureMaxCountThreshold(int k) {
float[] counts = getFeatureCounts();
HashIndex<F> newFeatureIndex = new HashIndex<F>();
int[] featMap = new int[featureIndex.size()];
for (int i = 0; i < featMap.length; i++) {
F feat = featureIndex.get(i);
if (counts[i] <= k) {
int newIndex = newFeatureIndex.size();
newFeatureIndex.add(feat);
featMap[i] = newIndex;
} else {
featMap[i] = -1;
}
// featureIndex.remove(feat);
}
featureIndex = newFeatureIndex;
// counts = null; // This is unnecessary; JVM can clean it up
for (int i = 0; i < size; i++) {
List<Integer> featList = new ArrayList<Integer>(data[i].length);
List<Double> valueList = new ArrayList<Double>(values[i].length);
for (int j = 0; j < data[i].length; j++) {
if (featMap[data[i][j]] >= 0) {
featList.add(featMap[data[i][j]]);
valueList.add(values[i][j]);
}
}
data[i] = new int[featList.size()];
values[i] = new double[valueList.size()];
for (int j = 0; j < data[i].length; j++) {
data[i][j] = featList.get(j);
values[i][j] = valueList.get(j);
}
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:42,代码来源:RVFDataset.java
示例9: applyFeatureCountThreshold
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
* Applies a feature count threshold to the Dataset. All features that
* occur fewer than <i>k</i> times are expunged.
*/
public void applyFeatureCountThreshold(int k) {
float[] counts = getFeatureCounts();
Index<F> newFeatureIndex = new HashIndex<F>();
int[] featMap = new int[featureIndex.size()];
for (int i = 0; i < featMap.length; i++) {
F feat = featureIndex.get(i);
if (counts[i] >= k) {
int newIndex = newFeatureIndex.size();
newFeatureIndex.add(feat);
featMap[i] = newIndex;
} else {
featMap[i] = -1;
}
// featureIndex.remove(feat);
}
featureIndex = newFeatureIndex;
// counts = null; // This is unnecessary; JVM can clean it up
for (int i = 0; i < size; i++) {
List<Integer> featList = new ArrayList<Integer>(data[i].length);
for (int j = 0; j < data[i].length; j++) {
if (featMap[data[i][j]] >= 0) {
featList.add(featMap[data[i][j]]);
}
}
data[i] = new int[featList.size()];
for (int j = 0; j < data[i].length; j++) {
data[i][j] = featList.get(j);
}
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:38,代码来源:GeneralDataset.java
示例10: applyFeatureMaxCountThreshold
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
* Applies a max feature count threshold to the Dataset. All features that
* occur greater than <i>k</i> times are expunged.
*/
public void applyFeatureMaxCountThreshold(int k) {
float[] counts = getFeatureCounts();
HashIndex<F> newFeatureIndex = new HashIndex<F>();
int[] featMap = new int[featureIndex.size()];
for (int i = 0; i < featMap.length; i++) {
F feat = featureIndex.get(i);
if (counts[i] <= k) {
int newIndex = newFeatureIndex.size();
newFeatureIndex.add(feat);
featMap[i] = newIndex;
} else {
featMap[i] = -1;
}
// featureIndex.remove(feat);
}
featureIndex = newFeatureIndex;
// counts = null; // This is unnecessary; JVM can clean it up
for (int i = 0; i < size; i++) {
List<Integer> featList = new ArrayList<Integer>(data[i].length);
for (int j = 0; j < data[i].length; j++) {
if (featMap[data[i][j]] >= 0) {
featList.add(featMap[data[i][j]]);
}
}
data[i] = new int[featList.size()];
for (int j = 0; j < data[i].length; j++) {
data[i][j] = featList.get(j);
}
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:38,代码来源:GeneralDataset.java
示例11: initialize
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
@Override
protected final void initialize(int numDatums) {
labelIndex = new HashIndex<L>();
featureIndex = new HashIndex<F>();
labels = new int[numDatums];
data = new int[numDatums][];
size = 0;
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:9,代码来源:Dataset.java
示例12: selectFeatures
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
* Generic method to select features based on the feature scores vector provided as an argument.
* @param numFeatures number of features to be selected.
* @param scores a vector of size total number of features in the data.
*/
public void selectFeatures(int numFeatures, double[] scores) {
List<ScoredObject<F>> scoredFeatures = new ArrayList<ScoredObject<F>>();
for (int i = 0; i < scores.length; i++) {
scoredFeatures.add(new ScoredObject<F>(featureIndex.get(i), scores[i]));
}
Collections.sort(scoredFeatures, ScoredComparator.DESCENDING_COMPARATOR);
Index<F> newFeatureIndex = new HashIndex<F>();
for (int i = 0; i < scoredFeatures.size() && i < numFeatures; i++) {
newFeatureIndex.add(scoredFeatures.get(i).object());
//System.err.println(scoredFeatures.get(i));
}
for (int i = 0; i < size; i++) {
int[] newData = new int[data[i].length];
int curIndex = 0;
for (int j = 0; j < data[i].length; j++) {
int index;
if ((index = newFeatureIndex.indexOf(featureIndex.get(data[i][j]))) != -1) {
newData[curIndex++] = index;
}
}
int[] newDataTrimmed = new int[curIndex];
System.arraycopy(newData, 0, newDataTrimmed, 0, curIndex);
data[i] = newDataTrimmed;
}
featureIndex = newFeatureIndex;
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:36,代码来源:Dataset.java
示例13: getFeaturesAboveThreshhold
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
public Index<String> getFeaturesAboveThreshhold(Dataset<String, String> dataset, double thresh) {
if (!(classifier instanceof LinearClassifier)) {
throw new RuntimeException("Attempting to remove features based on weight from a non-linear classifier");
}
Index<String> featureIndex = dataset.featureIndex;
Index<String> labelIndex = dataset.labelIndex;
Index<String> features = new HashIndex<String>();
Iterator<String> featureIt = featureIndex.iterator();
LinearClassifier<String, String> lc = (LinearClassifier<String, String>)classifier;
LOOP:
while (featureIt.hasNext()) {
String f = featureIt.next();
Iterator<String> labelIt = labelIndex.iterator();
double smallest = Double.POSITIVE_INFINITY;
double biggest = Double.NEGATIVE_INFINITY;
while (labelIt.hasNext()) {
String l = labelIt.next();
double weight = lc.weight(f, l);
if (weight < smallest) {
smallest = weight;
}
if (weight > biggest) {
biggest = weight;
}
if (biggest - smallest > thresh) {
features.add(f);
continue LOOP;
}
}
}
return features;
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:34,代码来源:CMMClassifier.java
示例14: makeAnswerArraysAndTagIndex
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/** This routine builds the <code>answerArrays</code> which give the
* empirically legal label sequences (of length (order) at most
* <code>flags.maxLeft</code>) and the <code>classIndex</code>,
* which indexes known answer classes.
*
* @param docs The training data: A List of List of CoreLabel
*/
private void makeAnswerArraysAndTagIndex(Collection<List<IN>> docs) {
if (answerArrays == null) {
answerArrays = Generics.newHashSet();
}
if (classIndex == null) {
classIndex = new HashIndex<String>();
}
for (List<IN> doc : docs) {
if (flags.useReverse) {
Collections.reverse(doc);
}
int leng = doc.size();
for (int start = 0; start < leng; start++) {
for (int diff = 1; diff <= flags.maxLeft && start + diff <= leng; diff++) {
String[] seq = new String[diff];
for (int i = start; i < start + diff; i++) {
seq[i - start] = doc.get(i).get(CoreAnnotations.AnswerAnnotation.class);
}
answerArrays.add(Arrays.asList(seq));
}
}
for (int i = 0; i < leng; i++) {
CoreLabel wordInfo = doc.get(i);
classIndex.add(wordInfo.get(CoreAnnotations.AnswerAnnotation.class));
}
if (flags.useReverse) {
Collections.reverse(doc);
}
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:41,代码来源:CMMClassifier.java
示例15: initialBetasAndLexicon
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
private void initialBetasAndLexicon() {
wordIndex = new HashIndex<String>();
tagIndex = new HashIndex<String>();
lex = op.tlpParams.lex(op, wordIndex, tagIndex);
lex.initializeTraining(trainSize);
for (Tree tree : trees) {
double weight = treeWeights.getCount(tree);
lex.incrementTreesRead(weight);
initialBetasAndLexicon(tree, 0, weight);
}
lex.finishTraining();
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:15,代码来源:SplittingGrammarExtractor.java
示例16: recalculateMergedBetas
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
public void recalculateMergedBetas(Map<String, int[]> mergeCorrespondence) {
TwoDimensionalMap<String, String, double[][]> tempUnaryBetas = new TwoDimensionalMap<String, String, double[][]>();
ThreeDimensionalMap<String, String, String, double[][][]> tempBinaryBetas = new ThreeDimensionalMap<String, String, String, double[][][]>();
tempWordIndex = new HashIndex<String>();
tempTagIndex = new HashIndex<String>();
tempLex = op.tlpParams.lex(op, tempWordIndex, tempTagIndex);
tempLex.initializeTraining(trainSize);
for (Tree tree : trees) {
double treeWeight = treeWeights.getCount(tree);
double[] stateWeights = { Math.log(treeWeight) };
tempLex.incrementTreesRead(treeWeight);
IdentityHashMap<Tree, double[][]> oldUnaryTransitions = new IdentityHashMap<Tree, double[][]>();
IdentityHashMap<Tree, double[][][]> oldBinaryTransitions = new IdentityHashMap<Tree, double[][][]>();
recountTree(tree, false, oldUnaryTransitions, oldBinaryTransitions);
IdentityHashMap<Tree, double[][]> unaryTransitions = new IdentityHashMap<Tree, double[][]>();
IdentityHashMap<Tree, double[][][]> binaryTransitions = new IdentityHashMap<Tree, double[][][]>();
mergeTransitions(tree, oldUnaryTransitions, oldBinaryTransitions, unaryTransitions, binaryTransitions, stateWeights, mergeCorrespondence);
recalculateTemporaryBetas(tree, stateWeights, 0, unaryTransitions, binaryTransitions,
null, tempUnaryBetas, tempBinaryBetas);
}
tempLex.finishTraining();
useNewBetas(false, tempUnaryBetas, tempBinaryBetas);
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:30,代码来源:SplittingGrammarExtractor.java
示例17: buildStateIndex
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
public void buildStateIndex() {
stateIndex = new HashIndex<String>();
for (String key : stateSplitCounts.keySet()) {
for (int i = 0; i < stateSplitCounts.getIntCount(key); ++i) {
stateIndex.indexOf(state(key, i), true);
}
}
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:9,代码来源:SplittingGrammarExtractor.java
示例18: createIndex
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
public Index<IntPair> createIndex() {
Index<IntPair> index = new HashIndex<IntPair>();
for (int x = 0; x < px.length; x++) {
int numberY = numY(x);
for (int y = 0; y < numberY; y++) {
index.add(new IntPair(x, y));
}
}
return index;
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:11,代码来源:Experiments.java
示例19: read
import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
protected void read(DataInputStream file) {
try {
int size = file.readInt();
index = new HashIndex<String>();
for (int i = 0; i < size; i++) {
String tag = file.readUTF();
boolean inClosed = file.readBoolean();
index.add(tag);
if (inClosed) closed.add(tag);
}
} catch (IOException e) {
e.printStackTrace();
}
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:16,代码来源:TTags.java
注:本文中的edu.stanford.nlp.util.HashIndex类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论