本文整理汇总了Java中org.apache.mahout.common.distance.EuclideanDistanceMeasure类的典型用法代码示例。如果您正苦于以下问题:Java EuclideanDistanceMeasure类的具体用法?Java EuclideanDistanceMeasure怎么用?Java EuclideanDistanceMeasure使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
EuclideanDistanceMeasure类属于org.apache.mahout.common.distance包,在下文中一共展示了EuclideanDistanceMeasure类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getDistanceClass
import org.apache.mahout.common.distance.EuclideanDistanceMeasure; //导入依赖的package包/类
private DistanceMeasure getDistanceClass(String shortName){
if (shortName.equalsIgnoreCase("manhattan")){
return new ManhattanDistanceMeasure();
} else if(shortName.equalsIgnoreCase("cosine")){
return new CosineDistanceMeasure();
} else if(shortName.equalsIgnoreCase("chebyshev")){
return new ChebyshevDistanceMeasure();
} else if(shortName.equalsIgnoreCase("euclidean")){
return new EuclideanDistanceMeasure();
} else if(shortName.equalsIgnoreCase("mahalanobis")){
return new MahalanobisDistanceMeasure();
} else if(shortName.equalsIgnoreCase("minkowski")){
return new MinkowskiDistanceMeasure();
} else if(shortName.equalsIgnoreCase("squaredeuclidean")){
return new SquaredEuclideanDistanceMeasure();
} else if(shortName.equalsIgnoreCase("tanimoto")){
return new TanimotoDistanceMeasure();
} else if(shortName.equalsIgnoreCase("weightedeuclidean")){
return new WeightedEuclideanDistanceMeasure();
} else if(shortName.equalsIgnoreCase("weightedmanhattan")){
return new WeightedManhattanDistanceMeasure();
}
return new ManhattanDistanceMeasure();
}
开发者ID:bytegriffin,项目名称:recsys-offline,代码行数:25,代码来源:Constants.java
示例2: runClustering
import org.apache.mahout.common.distance.EuclideanDistanceMeasure; //导入依赖的package包/类
private static void runClustering(Configuration conf, ConfigFile configFile)
throws IOException, ClassNotFoundException, InterruptedException {
FileSystem fs = FileSystem.get(conf);
Path clusters = new Path(BASE_DIR, new Path("initial-clusters"));
fs.delete(DICTIONARY_DIR, true);
fs.mkdirs(DICTIONARY_DIR);
DistanceMeasure measure = new EuclideanDistanceMeasure();
int k = configFile.get("dictionarySize",100);
double convergenceDelta = configFile.get("dictionaryConvergenceDelta",0.001);
int maxIterations = configFile.get("dictionaryMaxIterations",10);
// Random clusters
clusters = RandomSeedGenerator.buildRandom(conf, DESCRIPTORS_DIR,
clusters, k, measure);
log.info("Random clusters generated, running K-Means, k="+k+" maxIter="+maxIterations);
log.info("KMeansDriver.run(...");
log.info(DESCRIPTORS_DIR.toString());
log.info(clusters.toString());
log.info(DICTIONARY_DIR.toString());
log.info("....)");
KMeansDriver.run(conf, DESCRIPTORS_DIR, clusters, DICTIONARY_DIR,
measure, convergenceDelta, maxIterations, true, 0.0,
VM.RunSequential());
log.info("KMeans done");
}
开发者ID:pgorecki,项目名称:visearch,代码行数:32,代码来源:KMeans.java
示例3: runClustering
import org.apache.mahout.common.distance.EuclideanDistanceMeasure; //导入依赖的package包/类
private static void runClustering(Configuration conf) throws IOException, ClassNotFoundException, InterruptedException {
Path input = new Path("kmeans/toy1/in");
Path clusters = new Path("kmeans/toy1/cl");
Path output = new Path("kmeans/toy1/out");
DistanceMeasure measure = new EuclideanDistanceMeasure();
int k = 3;
double convergenceDelta = 0.5;
int maxIterations = 10;
boolean runSequential = true;
// delete output dir
FileSystem.get(conf).delete(output, true);
FileSystem.get(conf).mkdirs(output);
FileSystem.get(conf).delete(clusters, true);
FileSystem.get(conf).mkdirs(clusters);
// Random clusters
log.info("Random clusters points....");
clusters = RandomSeedGenerator.buildRandom(conf, input, clusters, k, measure);
log.info(clusters.toString());
log.info("Running KMeans");
// TODO: ustawić flagę -cl (klasyfikacja?)
log.info(input.toString());
log.info(clusters.toString());
log.info(output.toString());
KMeansDriver.run(conf, input, clusters, output, measure, convergenceDelta,
maxIterations, false, 0.0, runSequential);
log.info("KMeans done");
}
开发者ID:pgorecki,项目名称:visearch,代码行数:35,代码来源:KMeansToy.java
示例4: main
import org.apache.mahout.common.distance.EuclideanDistanceMeasure; //导入依赖的package包/类
@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String hdfsUrl = conf.get("fs.defaultFS");
// part1---------------------------------------------------------------
// Job job0 = Job.getInstance(conf, "siftKeywordsDimension");
// Path output1Path=new Path(hdfsUrl + "/data/recommend/matrix1");
// HadoopUtil.delete(conf, output1Path);
// job0.setJarByClass(TFIDF.class);
// job0.setMapperClass(Mapper_Part1.class);
// job0.setReducerClass(Reduce_Part1.class);
// job0.setMapOutputKeyClass(Text.class);
// job0.setMapOutputValueClass(Text.class);
// job0.setOutputKeyClass(Text.class);
// job0.setOutputValueClass(Text.class);
// job0.setPartitionerClass(CustomPartitioner.class);
// FileInputFormat.addInputPath(job0, new Path(hdfsUrl + "/data/recommend/tfidf3"));
// FileOutputFormat.setOutputPath(job0, output1Path);
// job0.waitForCompletion(true);
// part2---------------------------------------------------------------
// FileSystem fsopen = FileSystem.get(conf);
// FSDataInputStream in = fsopen.open(new Path(hdfsUrl + "/data/recommend/matrix1/part-r-00000"));
// Scanner scan = new Scanner(in);
// List<String> keywordList=new ArrayList<String>();
// while (scan.hasNext()) {
// keywordList.add(scan.next());
// }
//// must before job
// conf.setStrings("keyword", keywordList.toArray(new String[keywordList.size()]));
// Job job1 = Job.getInstance(conf, "generateMatrix");
// Path output2Path=new Path(hdfsUrl + "/data/recommend/matrix2");
// HadoopUtil.delete(conf, output2Path);
// job1.setJarByClass(TFIDF.class);
// job1.setMapperClass(Mapper_Part2.class);
// job1.setReducerClass(Reduce_Part2.class);
// job1.setMapOutputKeyClass(Text.class);
// job1.setMapOutputValueClass(Text.class);
// job1.setOutputKeyClass(Text.class);
// job1.setOutputValueClass(NullWritable.class);
//// job1.addCacheFile(new Path("/data/recommend/matrix1/part-r-00000").toUri());
// FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/tfidf3"));
// FileOutputFormat.setOutputPath(job1, output2Path);
// job1.waitForCompletion(true);
// part3-------------------聚类并打印--------------------------------------------
Path output3Path=new Path(hdfsUrl + "/data/recommend/cluster2");
HadoopUtil.delete(conf, output3Path);
EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
Path clusterInput = new Path(hdfsUrl + "/data/recommend/matrix2");
Path clusterSeqInput = new Path(hdfsUrl + "/data/recommend/cluster1");
Path clusterOutput = new Path(hdfsUrl + "/data/recommend/cluster2");
int k = 10;
int maxIter = 3;
// 将数据文件转为mahout向量表示(这里要自己写)
// InputDriver.runJob(clusterInput, clusterSeqInput, "org.apache.mahout.math.RandomAccessSparseVector");
// 随机的选择k个作为簇的中心
Path clusters = RandomSeedGenerator.buildRandom(conf, clusterSeqInput,
new Path(clusterOutput,"clusters-0"), k, measure);
KMeansDriver.run(conf,clusterSeqInput,clusters,clusterOutput,0.01,maxIter,true, 0.0, false);
// 调用 ClusterDumper 的 printClusters 方法将聚类结果打印出来。
ClusterDumper clusterDumper = new ClusterDumper(new Path(clusterOutput, "clusters-"
+ (maxIter - 1)), new Path(clusterOutput, "clusteredPoints"));
clusterDumper.printClusters(null);
clusterOutput(conf,new Path(hdfsUrl + "/data/recommend/cluster2/clusteredPoints/part-m-00000"));
// clusterOutput2(conf0,new Path(hdfsUrl0 + "/data/recommend/cluster2/clusteredPoints/part-m-00000"));
// matrix2Vector(conf0,new Path(hdfsUrl0 + "/data/recommend/cluster1/part-m-00000"));//暂时没用到
}
开发者ID:hejy12,项目名称:newsRecommender,代码行数:73,代码来源:MatrixAndCluster.java
注:本文中的org.apache.mahout.common.distance.EuclideanDistanceMeasure类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论