本文整理汇总了Java中net.sf.javaml.core.Dataset类的典型用法代码示例。如果您正苦于以下问题:Java Dataset类的具体用法?Java Dataset怎么用?Java Dataset使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Dataset类属于net.sf.javaml.core包,在下文中一共展示了Dataset类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: cluster
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public Dataset[] cluster(Dataset data) {
Instance[] medoids = new Instance[numberOfClusters];
Dataset[] output = new DefaultDataset[numberOfClusters];
for (int i = 0; i < numberOfClusters; i++) {
int random = rg.nextInt(data.size());
medoids[i] = data.instance(random);
}
boolean changed = true;
int count = 0;
while (changed && count < maxIterations) {
changed = false;
count++;
int[] assignment = assign(medoids, data);
changed = recalculateMedoids(assignment, medoids, output, data);
}
System.out.print("Count: " + count + "\n");
this.medoids = medoids;
return output;
}
开发者ID:taochen,项目名称:ssascaling,代码行数:24,代码来源:CustomKMedoids.java
示例2: assign
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
* Assign all instances from the data set to the medoids.
*
* @param medoids candidate medoids
* @param data the data to assign to the medoids
* @return best cluster indices for each instance in the data set
*/
private int[] assign(Instance[] medoids, Dataset data) {
int[] out = new int[data.size()];
for (int i = 0; i < data.size(); i++) {
double bestDistance = dm.measure(data.instance(i), medoids[0]);
int bestIndex = 0;
for (int j = 1; j < medoids.length; j++) {
double tmpDistance = dm.measure(data.instance(i), medoids[j]);
if (dm.compare(tmpDistance, bestDistance)) {
bestDistance = tmpDistance;
bestIndex = j;
}
}
out[i] = bestIndex;
}
return out;
}
开发者ID:taochen,项目名称:ssascaling,代码行数:26,代码来源:CustomKMedoids.java
示例3: average
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
private Instance average(Dataset set) {
Instance instance = null;
double fValue = 0.0;
for (Instance ins : set) {
double value = 0.0;
for (Instance subIns : set) {
if(!ins.equals(subIns)){
value += dm.measure(ins, subIns);
}
}
if (instance == null || value < fValue ){
instance = ins;
fValue = value;
}
}
return instance;
}
开发者ID:taochen,项目名称:ssascaling,代码行数:21,代码来源:CustomKMedoids.java
示例4: clustering
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
@SuppressWarnings("rawtypes")
@Override
public List[] clustering(List<Objective> objectives) {
Dataset ds = new DefaultDataset();
for (Objective obj : objectives) {
ds.add(new DenseInstance(obj.getArray(),obj));
}
long time = System.currentTimeMillis();
//SpearmanRankCorrelation sc = new SpearmanRankCorrelation();
//System.out.print("Correlation " + sc.measure(ds.get(1), ds.get(2)) + "\n");
//SpearmanRankCorrelation
CustomKMean ckm = new CustomKMean(2, 1000, new SpearmanDistance());
Dataset[] clusters = ckm.cluster(ds);
System.out.print("Time taken on clustering: " + ( System.currentTimeMillis() - time) + "\n");
return clusters;
}
开发者ID:taochen,项目名称:ssascaling,代码行数:18,代码来源:JavaMLNeighborhood.java
示例5: main
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
* Tests the k-means algorithm with default parameter settings.
*/
public static void main(String[] args) throws Exception {
/* Load a dataset */
Dataset data = FileHandler.loadDataset(new File("devtools/data/iris.data"), 4, ",");
/*
* Create a new instance of the KMeans algorithm, with no options
* specified. By default this will generate 4 clusters.
*/
Clusterer km = new KMeans();
/*
* Cluster the data, it will be returned as an array of data sets, with
* each dataset representing a cluster
*/
Dataset[] clusters = km.cluster(data);
System.out.println("Cluster count: " + clusters.length);
}
开发者ID:jaimeguzman,项目名称:data_mining,代码行数:25,代码来源:TutorialKMeans.java
示例6: cluster
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
* XXX add doc
*/
public Dataset[] cluster(Dataset data) {
KMeans km = new KMeans(this.kMin, this.iterations, this.dm);
Dataset[] bestClusters = km.cluster(data);
double bestScore = this.ce.score(bestClusters);
for (int i = kMin + 1; i <= kMax; i++) {
km = new KMeans(i, this.iterations, this.dm);
Dataset[] tmpClusters = km.cluster(data);
double tmpScore = this.ce.score(tmpClusters);
if (this.ce.compareScore(bestScore, tmpScore)) {
bestScore = tmpScore;
bestClusters = tmpClusters;
}
}
return bestClusters;
}
开发者ID:eracle,项目名称:gap,代码行数:19,代码来源:IterativeKMeans.java
示例7: assign
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
* Assign all instances from the data set to the medoids.
*
* @param medoids candidate medoids
* @param data the data to assign to the medoids
* @return best cluster indices for each instance in the data set
*/
private int[] assign(Instance[] medoids, Dataset data) {
LOGGER.log( Level.INFO, "Assign all instances from the data set to the medoids.");
int[] out = new int[data.size()];
for (int i = 0; i < data.size(); i++) {
double bestDistance = dm.measure(data.instance(i), medoids[0]);
int bestIndex = 0;
for (int j = 1; j < medoids.length; j++) {
double tmpDistance = dm.measure(data.instance(i), medoids[j]);
LOGGER.log(Level.FINE, "Distance:{0}",tmpDistance);
if (dm.compare(tmpDistance, bestDistance)) {
bestDistance = tmpDistance;
bestIndex = j;
}
}
out[i] = bestIndex;
}
return out;
}
开发者ID:eracle,项目名称:gap,代码行数:28,代码来源:KMedoids.java
示例8: recalculateMedoids
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
* Return a array with on each position the clusterIndex to which the
* Instance on that position in the dataset belongs.
*
* @param medoids
* the current set of cluster medoids, will be modified to fit
* the new assignment
* @param assignment
* the new assignment of all instances to the different medoids
* @param output
* the cluster output, this will be modified at the end of the
* method
* @return the
*/
public boolean recalculateMedoids(int[] assignment, Instance[] medoids,
Dataset[] output, Dataset data) {
boolean changed = false;
for (int i = 0; i < numberOfClusters; i++) {
output[i] = new DefaultDataset();
for (int j = 0; j < assignment.length; j++) {
if (assignment[j] == i) {
output[i].add(data.instance(j));
}
}
if (output[i].size() == 0) { // new random, empty medoid
medoids[i] = data.instance(rg.nextInt(data.size()));
changed = true;
} else {
Instance centroid = DatasetTools.average(output[i]);
Instance oldMedoid = medoids[i];
medoids[i] = data.kNearest(1, centroid, dm).iterator().next();
if (!medoids[i].equals(oldMedoid))
changed = true;
}
}
return changed;
}
开发者ID:eracle,项目名称:gap,代码行数:38,代码来源:KMedoids.java
示例9: score
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
* XXX DOC
*/
public double score(Dataset[] datas) {
double sum=0;
for(int i=0;i<datas.length;i++){
double tmpSum=0;
for(int j=0;j<datas[i].size();j++){
for(int k=0;k<datas[i].size();k++){
double error=dm.measure(datas[i].instance(j),datas[i].instance(k));
tmpSum+=error;
}
}
sum+=tmpSum/datas[i].size();
}
return sum;
}
开发者ID:eracle,项目名称:gap,代码行数:19,代码来源:SumOfAveragePairwiseSimilarities.java
示例10: score
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
* XXX DOC
*/
public double score(Dataset[] datas) {
Instance[] centroids = new Instance[datas.length];
for (int i = 0; i < datas.length; i++) {
centroids[i] = DatasetTools.average(datas[i]);
}
double sum = 0;
for (int i = 0; i < datas.length; i++) {
for (int j = 0; j < datas[i].size(); j++) {
double error = dm.measure(datas[i].instance(j),
centroids[i]);
sum += error;
}
}
return sum;
}
开发者ID:eracle,项目名称:gap,代码行数:20,代码来源:SumOfCentroidSimilarities.java
示例11: score
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public double score(Dataset[] clusters) {
// number of free parameters K
double k = 1;
// sampelsize N
double datasize = 0;
for (int i = 0; i < clusters.length; i++) {
datasize += clusters[i].size();
}
LogLikelihoodFunction likelihood = new LogLikelihoodFunction();
// loglikelihood log(L)
double l = likelihood.loglikelihoodsum(clusters);
// BIC score
double bic = -2 * l + Math.log10(datasize) * k;
return bic;
}
开发者ID:eracle,项目名称:gap,代码行数:17,代码来源:BICScore.java
示例12: cluster
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
* XXX add doc
*/
public Dataset[] cluster(Dataset data) {
KMeans km = new KMeans(this.clusters, this.iterations,
this.dm);
Dataset[] bestClusters = km.cluster(data);
double bestScore = this.ce.score(bestClusters);
for (int i = 0; i < repeats; i++) {
Dataset[] tmpClusters = km.cluster(data);
double tmpScore = this.ce.score(tmpClusters);
if (this.ce.compareScore(bestScore, tmpScore)) {
bestScore = tmpScore;
bestClusters = tmpClusters;
}
}
return bestClusters;
}
开发者ID:eracle,项目名称:gap,代码行数:19,代码来源:MultiKMeans.java
示例13: transformDataset
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
private static svm_problem transformDataset(Dataset data) {
svm_problem p = new svm_problem();
p.l = data.size();
p.y = new double[data.size()];
p.x = new svm_node[data.size()][];
int tmpIndex = 0;
for (int j = 0; j < data.size(); j++) {
Instance tmp = data.instance(j);
p.y[tmpIndex] = data.classIndex(tmp.classValue());
p.x[tmpIndex] = new svm_node[tmp.keySet().size()];
int i = 0;
SortedSet<Integer> tmpSet = tmp.keySet();
for (int index : tmpSet) {
p.x[tmpIndex][i] = new svm_node();
p.x[tmpIndex][i].index = index;
p.x[tmpIndex][i].value = tmp.value(index);
i++;
}
tmpIndex++;
}
return p;
}
开发者ID:eracle,项目名称:gap,代码行数:23,代码来源:LibSVM.java
示例14: cluster
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public Dataset[] cluster(Dataset data) {
filter.filter(data);
m_numberOfClusters = -1;
m_cobwebTree = null;
m_numberSplits = 0;
m_numberMerges = 0;
for (int i = 0; i < data.size(); i++) {
updateClusterer(data.instance(i));
}
determineNumberOfClusters();
// printNode(m_cobwebTree, 0);
Vector<Dataset> clusters = new Vector<Dataset>();
createClusters(m_cobwebTree, clusters);
Dataset[] out = new Dataset[clusters.size()];
clusters.toArray(out);
return out;
}
开发者ID:eracle,项目名称:gap,代码行数:20,代码来源:Cobweb.java
示例15: logLikelihood
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public double logLikelihood(Dataset cluster) {
double instanceLength = cluster.instance(0).size();
this.count = instanceLength * cluster.size();
sum = 0;
sum2 = 0;
for (int row = 0; row < cluster.size(); row++) {
for (int column = 0; column < instanceLength; column++) {
sum += cluster.instance(row).value(column);
sum2 += cluster.instance(row).value(column)
* cluster.instance(row).value(column);
}
}
double loglikelihood = logLikelihoodFunction(count, sum, sum2);
if (loglikelihood == Double.NEGATIVE_INFINITY
|| loglikelihood == Double.POSITIVE_INFINITY) {
loglikelihood = 0;
}
return (loglikelihood);
}
开发者ID:eracle,项目名称:gap,代码行数:22,代码来源:LogLikelihoodFunction.java
示例16: buildClassifier
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public void buildClassifier(Dataset data) {
this.dataReference = data;
int t = 0, f = 0;
for (int i = 0; i < classifiers.length; i++) {
Pair<Dataset, Dataset>sample = samplingMethod.sample(data,data
.size(), seed++);
classifiers[i].buildClassifier(sample.x());
if (calculateOutOfBagErrorEstimate) {
for (Instance inst : sample.y()) {
Object predClass = classifiers[i].classify(inst);
if (predClass.equals(inst.classValue())) {
t++;
} else {
f++;
}
}
outOfBagErrorEstimate = t / (t + f);
}
}
}
开发者ID:eracle,项目名称:gap,代码行数:25,代码来源:Bagging.java
示例17: buildClassifier
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
@Override
public void buildClassifier(Dataset data) {
super.buildClassifier(data);
mean = new HashMap<Object, Instance>();
HashMap<Object, Integer> count = new HashMap<Object, Integer>();
for (Instance i : data) {
if (!mean.containsKey(i.classValue())) {
mean.put(i.classValue(), i);
count.put(i.classValue(), 1);
} else {
mean.put(i.classValue(), mean.get(i.classValue()).add(i));
count.put(i.classValue(), count.get(i.classValue()) + 1);
}
}
for (Object o : mean.keySet()) {
mean.put(o, mean.get(o).divide(count.get(o)));
}
}
开发者ID:eracle,项目名称:gap,代码行数:20,代码来源:AbstractMeanClassifier.java
示例18: maxAttributes
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
* Create an instance that contains all the maximum values for the
* attributes.
*
* @param data
* data set to find minimum attribute values for
* @return Instance representing the minimum values for each attribute
*/
public static Instance maxAttributes(Dataset data) {
Instance max = new SparseInstance();
for (Instance i : data) {
for (Integer index : i.keySet()) {
double val = i.value(index);
if (!max.containsKey(index))
max.put(index, val);
else if (max.get(index) < val)
max.put(index, val);
}
}
return max;
}
开发者ID:eracle,项目名称:gap,代码行数:24,代码来源:DatasetTools.java
示例19: build
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
@Override
public void build(Dataset data) {
weights = new double[data.noAttributes()];
/* Normalize the data to [0,1] */
NormalizeMidrange dnm = new NormalizeMidrange(0.5, 1);
dnm.filter(data);
/* Number of iterations */
int m = data.size();
for (int i = 0; i < m; i++) {
Instance random = data.instance(rg.nextInt(data.size()));
findNearest(data, random);
for (int j = 0; j < weights.length; j++)
weights[j] = weights[j] - diff(j, random, nearestHit) / m + diff(j, random, nearestMiss) / m;
}
}
开发者ID:eracle,项目名称:gap,代码行数:21,代码来源:RELIEF.java
示例20: build
import net.sf.javaml.core.Dataset; //导入依赖的package包/类
@Override
public void build(Dataset data) {
/*
* When more attributes should be selected then there are, return all
* attributes.
*/
if (n > data.noAttributes()) {
selectedAttributes = data.get(0).keySet();
return;
}
/*
* Regular procedure, add iteratively the best attribute till we have
* enough attributes selected.
*/
Instance classInstance = DatasetTools.createInstanceFromClass(data);
selectedAttributes = new HashSet<Integer>();
while (selectedAttributes.size() < n) {
selectNext(data, classInstance);
}
}
开发者ID:eracle,项目名称:gap,代码行数:22,代码来源:GreedyForwardSelection.java
注:本文中的net.sf.javaml.core.Dataset类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论