• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Java Dataset类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Java中net.sf.javaml.core.Dataset的典型用法代码示例。如果您正苦于以下问题:Java Dataset类的具体用法?Java Dataset怎么用?Java Dataset使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



Dataset类属于net.sf.javaml.core包,在下文中一共展示了Dataset类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: cluster

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public Dataset[] cluster(Dataset data) {
	Instance[] medoids = new Instance[numberOfClusters];
	Dataset[] output = new DefaultDataset[numberOfClusters];
	for (int i = 0; i < numberOfClusters; i++) {
		int random = rg.nextInt(data.size());
		medoids[i] = data.instance(random);
	}

	boolean changed = true;
	int count = 0;
	while (changed && count < maxIterations) {
		changed = false;
		count++;
		int[] assignment = assign(medoids, data);
		changed = recalculateMedoids(assignment, medoids, output, data);

	}
	System.out.print("Count: " + count + "\n");
	
	this.medoids = medoids;
	return output;

}
 
开发者ID:taochen,项目名称:ssascaling,代码行数:24,代码来源:CustomKMedoids.java


示例2: assign

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * Assign all instances from the data set to the medoids.
 * 
 * @param medoids candidate medoids
 * @param data the data to assign to the medoids
 * @return best cluster indices for each instance in the data set
 */
private int[] assign(Instance[] medoids, Dataset data) {
	int[] out = new int[data.size()];
	for (int i = 0; i < data.size(); i++) {
		double bestDistance = dm.measure(data.instance(i), medoids[0]);
		int bestIndex = 0;
		for (int j = 1; j < medoids.length; j++) {
			double tmpDistance = dm.measure(data.instance(i), medoids[j]);
			if (dm.compare(tmpDistance, bestDistance)) {
				bestDistance = tmpDistance;
				bestIndex = j;
			}
		}
		out[i] = bestIndex;

	}
	return out;

}
 
开发者ID:taochen,项目名称:ssascaling,代码行数:26,代码来源:CustomKMedoids.java


示例3: average

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
private Instance average(Dataset set) {
	
	Instance instance = null;
	double fValue = 0.0;
	for (Instance ins : set) {
		double value = 0.0;
		for (Instance subIns : set) {
			if(!ins.equals(subIns)){
				value += dm.measure(ins, subIns);
			}
		}
		
		if (instance == null || value < fValue ){
			instance = ins;
			fValue = value;
		}
	}
	
	return instance;
}
 
开发者ID:taochen,项目名称:ssascaling,代码行数:21,代码来源:CustomKMedoids.java


示例4: clustering

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
@SuppressWarnings("rawtypes")
@Override
public List[] clustering(List<Objective> objectives) {
	Dataset ds = new DefaultDataset();
	for (Objective obj : objectives) {
		ds.add(new DenseInstance(obj.getArray(),obj));
	}
	long time = System.currentTimeMillis();
	//SpearmanRankCorrelation sc = new SpearmanRankCorrelation();
	//System.out.print("Correlation " + sc.measure(ds.get(1), ds.get(2)) + "\n");
	//SpearmanRankCorrelation
	CustomKMean ckm = new CustomKMean(2, 1000, new SpearmanDistance());
	Dataset[] clusters = ckm.cluster(ds);
	System.out.print("Time taken on clustering: " + ( System.currentTimeMillis() - time) + "\n");
	
	return clusters;
}
 
开发者ID:taochen,项目名称:ssascaling,代码行数:18,代码来源:JavaMLNeighborhood.java


示例5: main

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * Tests the k-means algorithm with default parameter settings.
 */
public static void main(String[] args) throws Exception {

    /* Load a dataset */
    Dataset data = FileHandler.loadDataset(new File("devtools/data/iris.data"), 4, ",");
    /*
     * Create a new instance of the KMeans algorithm, with no options
     * specified. By default this will generate 4 clusters.
     */
    Clusterer km = new KMeans();
    /*
     * Cluster the data, it will be returned as an array of data sets, with
     * each dataset representing a cluster
     */
    Dataset[] clusters = km.cluster(data);
    System.out.println("Cluster count: " + clusters.length);

    
    
    
    
}
 
开发者ID:jaimeguzman,项目名称:data_mining,代码行数:25,代码来源:TutorialKMeans.java


示例6: cluster

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * XXX add doc
 */
public Dataset[] cluster(Dataset data) {
    KMeans km = new KMeans(this.kMin, this.iterations, this.dm);
    Dataset[] bestClusters = km.cluster(data);
    double bestScore = this.ce.score(bestClusters);
    for (int i = kMin + 1; i <= kMax; i++) {
        km = new KMeans(i, this.iterations, this.dm);
        Dataset[] tmpClusters = km.cluster(data);
        double tmpScore = this.ce.score(tmpClusters);
        if (this.ce.compareScore(bestScore, tmpScore)) {
            bestScore = tmpScore;
            bestClusters = tmpClusters;
        }
    }
    return bestClusters;
}
 
开发者ID:eracle,项目名称:gap,代码行数:19,代码来源:IterativeKMeans.java


示例7: assign

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * Assign all instances from the data set to the medoids.
 * 
 * @param medoids candidate medoids
 * @param data the data to assign to the medoids
 * @return best cluster indices for each instance in the data set
 */
private int[] assign(Instance[] medoids, Dataset data) {
	LOGGER.log( Level.INFO, "Assign all instances from the data set to the medoids.");
	int[] out = new int[data.size()];
	for (int i = 0; i < data.size(); i++) {
		double bestDistance = dm.measure(data.instance(i), medoids[0]);
		int bestIndex = 0;
		for (int j = 1; j < medoids.length; j++) {
			double tmpDistance = dm.measure(data.instance(i), medoids[j]);
			LOGGER.log(Level.FINE, "Distance:{0}",tmpDistance);
			if (dm.compare(tmpDistance, bestDistance)) {
				bestDistance = tmpDistance;
				bestIndex = j;
			}
		}
		out[i] = bestIndex;

	}
	return out;

}
 
开发者ID:eracle,项目名称:gap,代码行数:28,代码来源:KMedoids.java


示例8: recalculateMedoids

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * Return a array with on each position the clusterIndex to which the
 * Instance on that position in the dataset belongs.
 * 
 * @param medoids
 *            the current set of cluster medoids, will be modified to fit
 *            the new assignment
 * @param assignment
 *            the new assignment of all instances to the different medoids
 * @param output
 *            the cluster output, this will be modified at the end of the
 *            method
 * @return the
 */
public boolean recalculateMedoids(int[] assignment, Instance[] medoids,
		Dataset[] output, Dataset data) {
	boolean changed = false;
	for (int i = 0; i < numberOfClusters; i++) {
		output[i] = new DefaultDataset();
		for (int j = 0; j < assignment.length; j++) {
			if (assignment[j] == i) {
				output[i].add(data.instance(j));
			}
		}
		if (output[i].size() == 0) { // new random, empty medoid
			medoids[i] = data.instance(rg.nextInt(data.size()));
			changed = true;
		} else {
			Instance centroid = DatasetTools.average(output[i]);
			Instance oldMedoid = medoids[i];
			medoids[i] = data.kNearest(1, centroid, dm).iterator().next();
			if (!medoids[i].equals(oldMedoid))
				changed = true;
		}
	}
	return changed;
}
 
开发者ID:eracle,项目名称:gap,代码行数:38,代码来源:KMedoids.java


示例9: score

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * XXX DOC
 */
public double score(Dataset[] datas) {
   
    double sum=0;
    for(int i=0;i<datas.length;i++){
        double tmpSum=0;
        for(int j=0;j<datas[i].size();j++){
            for(int k=0;k<datas[i].size();k++){
                double error=dm.measure(datas[i].instance(j),datas[i].instance(k));
                tmpSum+=error;
            }  
        }
        sum+=tmpSum/datas[i].size();
    }
   return sum;
}
 
开发者ID:eracle,项目名称:gap,代码行数:19,代码来源:SumOfAveragePairwiseSimilarities.java


示例10: score

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
    * XXX DOC
    */
public double score(Dataset[] datas) {

	Instance[] centroids = new Instance[datas.length];
	for (int i = 0; i < datas.length; i++) {
		centroids[i] = DatasetTools.average(datas[i]);
	}
	double sum = 0;
	for (int i = 0; i < datas.length; i++) {
		for (int j = 0; j < datas[i].size(); j++) {
			double error = dm.measure(datas[i].instance(j),
					centroids[i]);
			sum += error;
		}
	}
	return sum;
}
 
开发者ID:eracle,项目名称:gap,代码行数:20,代码来源:SumOfCentroidSimilarities.java


示例11: score

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public double score(Dataset[] clusters) {
	// number of free parameters K
	double k = 1;
	// sampelsize N
	double datasize = 0;

	for (int i = 0; i < clusters.length; i++) {
		datasize += clusters[i].size();
	}
	LogLikelihoodFunction likelihood = new LogLikelihoodFunction();
	// loglikelihood log(L)
	double l = likelihood.loglikelihoodsum(clusters);
	// BIC score
	double bic = -2 * l + Math.log10(datasize) * k;
	return bic;
}
 
开发者ID:eracle,项目名称:gap,代码行数:17,代码来源:BICScore.java


示例12: cluster

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
    * XXX add doc
    */
public Dataset[] cluster(Dataset data) {
	KMeans km = new KMeans(this.clusters, this.iterations,
			this.dm);
	Dataset[] bestClusters = km.cluster(data);
	double bestScore = this.ce.score(bestClusters);
	for (int i = 0; i < repeats; i++) {
		Dataset[] tmpClusters = km.cluster(data);
		double tmpScore = this.ce.score(tmpClusters);
		if (this.ce.compareScore(bestScore, tmpScore)) {
			bestScore = tmpScore;
			bestClusters = tmpClusters;
		}
	}
	return bestClusters;
}
 
开发者ID:eracle,项目名称:gap,代码行数:19,代码来源:MultiKMeans.java


示例13: transformDataset

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
private static svm_problem transformDataset(Dataset data) {
	svm_problem p = new svm_problem();
	p.l = data.size();
	p.y = new double[data.size()];
	p.x = new svm_node[data.size()][];
	int tmpIndex = 0;
	for (int j = 0; j < data.size(); j++) {
		Instance tmp = data.instance(j);
		p.y[tmpIndex] = data.classIndex(tmp.classValue());
		p.x[tmpIndex] = new svm_node[tmp.keySet().size()];
		int i = 0;
		SortedSet<Integer> tmpSet = tmp.keySet();
		for (int index : tmpSet) {
			p.x[tmpIndex][i] = new svm_node();
			p.x[tmpIndex][i].index = index;
			p.x[tmpIndex][i].value = tmp.value(index);
			i++;
		}
		tmpIndex++;
	}
	return p;
}
 
开发者ID:eracle,项目名称:gap,代码行数:23,代码来源:LibSVM.java


示例14: cluster

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public Dataset[] cluster(Dataset data) {

        filter.filter(data);
        m_numberOfClusters = -1;
        m_cobwebTree = null;
        m_numberSplits = 0;
        m_numberMerges = 0;
        for (int i = 0; i < data.size(); i++) {
            updateClusterer(data.instance(i));
        }
        determineNumberOfClusters();
        // printNode(m_cobwebTree, 0);

        Vector<Dataset> clusters = new Vector<Dataset>();
        createClusters(m_cobwebTree, clusters);
        Dataset[] out = new Dataset[clusters.size()];
        clusters.toArray(out);
        return out;
    }
 
开发者ID:eracle,项目名称:gap,代码行数:20,代码来源:Cobweb.java


示例15: logLikelihood

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public double logLikelihood(Dataset cluster) {
	double instanceLength = cluster.instance(0).size();
	this.count = instanceLength * cluster.size();
	sum = 0;
	sum2 = 0;

	for (int row = 0; row < cluster.size(); row++) {
		for (int column = 0; column < instanceLength; column++) {
			sum += cluster.instance(row).value(column);
			sum2 += cluster.instance(row).value(column)
					* cluster.instance(row).value(column);
		}
	}

	double loglikelihood = logLikelihoodFunction(count, sum, sum2);
	if (loglikelihood == Double.NEGATIVE_INFINITY
			|| loglikelihood == Double.POSITIVE_INFINITY) {
		loglikelihood = 0;
	}
	return (loglikelihood);
}
 
开发者ID:eracle,项目名称:gap,代码行数:22,代码来源:LogLikelihoodFunction.java


示例16: buildClassifier

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
public void buildClassifier(Dataset data) {
	this.dataReference = data;
	int t = 0, f = 0;
	for (int i = 0; i < classifiers.length; i++) {
		Pair<Dataset, Dataset>sample = samplingMethod.sample(data,data
				.size(), seed++);
		classifiers[i].buildClassifier(sample.x());
		if (calculateOutOfBagErrorEstimate) {
			for (Instance inst : sample.y()) {
				Object predClass = classifiers[i].classify(inst);
				if (predClass.equals(inst.classValue())) {
					t++;
				} else {
					f++;
				}
			}
			outOfBagErrorEstimate = t / (t + f);
			
		}
		
	}
	

}
 
开发者ID:eracle,项目名称:gap,代码行数:25,代码来源:Bagging.java


示例17: buildClassifier

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
@Override
public void buildClassifier(Dataset data) {
    super.buildClassifier(data);
    mean = new HashMap<Object, Instance>();
    HashMap<Object, Integer> count = new HashMap<Object, Integer>();
    for (Instance i : data) {
        if (!mean.containsKey(i.classValue())) {
            mean.put(i.classValue(), i);
            count.put(i.classValue(), 1);
        } else {
            mean.put(i.classValue(), mean.get(i.classValue()).add(i));
            count.put(i.classValue(), count.get(i.classValue()) + 1);
        }
    }
    for (Object o : mean.keySet()) {
        mean.put(o, mean.get(o).divide(count.get(o)));
    }

}
 
开发者ID:eracle,项目名称:gap,代码行数:20,代码来源:AbstractMeanClassifier.java


示例18: maxAttributes

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
/**
 * Create an instance that contains all the maximum values for the
 * attributes.
 * 
 * @param data
 *            data set to find minimum attribute values for
 * @return Instance representing the minimum values for each attribute
 */
public static Instance maxAttributes(Dataset data) {
	Instance max = new SparseInstance();
	for (Instance i : data) {
		for (Integer index : i.keySet()) {
			double val = i.value(index);
			if (!max.containsKey(index))
				max.put(index, val);
			else if (max.get(index) < val)
				max.put(index, val);

		}

	}
	return max;
}
 
开发者ID:eracle,项目名称:gap,代码行数:24,代码来源:DatasetTools.java


示例19: build

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
@Override
public void build(Dataset data) {
    weights = new double[data.noAttributes()];

    /* Normalize the data to [0,1] */

    NormalizeMidrange dnm = new NormalizeMidrange(0.5, 1);
    dnm.filter(data);

    /* Number of iterations */
    int m = data.size();

    for (int i = 0; i < m; i++) {
        Instance random = data.instance(rg.nextInt(data.size()));
        findNearest(data, random);
        for (int j = 0; j < weights.length; j++)
            weights[j] = weights[j] - diff(j, random, nearestHit) / m + diff(j, random, nearestMiss) / m;

    }
}
 
开发者ID:eracle,项目名称:gap,代码行数:21,代码来源:RELIEF.java


示例20: build

import net.sf.javaml.core.Dataset; //导入依赖的package包/类
@Override
public void build(Dataset data) {
    /*
     * When more attributes should be selected then there are, return all
     * attributes.
     */
    if (n > data.noAttributes()) {
        selectedAttributes = data.get(0).keySet();
        return;
    }
    /*
     * Regular procedure, add iteratively the best attribute till we have
     * enough attributes selected.
     */
    Instance classInstance = DatasetTools.createInstanceFromClass(data);
    selectedAttributes = new HashSet<Integer>();
    while (selectedAttributes.size() < n) {
        selectNext(data, classInstance);
    }

}
 
开发者ID:eracle,项目名称:gap,代码行数:22,代码来源:GreedyForwardSelection.java



注:本文中的net.sf.javaml.core.Dataset类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Java TabixIndex类代码示例发布时间:2022-05-23
下一篇:
Java PerFieldSimilarityWrapper类代码示例发布时间:2022-05-23
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap