本文整理汇总了Java中org.apache.mahout.common.commandline.DefaultOptionCreator类 的典型用法代码示例。如果您正苦于以下问题:Java DefaultOptionCreator类的具体用法?Java DefaultOptionCreator怎么用?Java DefaultOptionCreator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
DefaultOptionCreator类 属于org.apache.mahout.common.commandline包,在下文中一共展示了DefaultOptionCreator类 的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
* CLI to run clustering post processor. The input to post processor is the ouput path specified to the
* clustering.
*/
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.methodOption().create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
if (getConf() == null) {
setConf(new Configuration());
}
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
DefaultOptionCreator.SEQUENTIAL_METHOD);
run(input, output, runSequential);
return 0;
}
开发者ID:saradelrio, 项目名称:Chi-FRBCS-BigDataCS, 代码行数:28, 代码来源:ClusterOutputPostProcessorDriver.java
示例2: main
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
GroupBuilder gbuilder = new GroupBuilder();
Option inputOpt = DefaultOptionCreator.inputOption().withRequired(false).create();
Option outputOpt = DefaultOptionCreator.outputOption().withRequired(false).create();
Option vectorOpt = obuilder.withLongName("vector").withRequired(false).withArgument(
abuilder.withName("v").withMinimum(1).withMaximum(1).create()).withDescription(
"The vector implementation to use.").withShortName("v").create();
Option helpOpt = DefaultOptionCreator.helpOption();
Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(
vectorOpt).withOption(helpOpt).create();
try {
Parser parser = new Parser();
parser.setGroup(group);
CommandLine cmdLine = parser.parse(args);
if (cmdLine.hasOption(helpOpt)) {
CommandLineUtil.printHelp(group);
return;
}
Path input = new Path(cmdLine.getValue(inputOpt, "testdata").toString());
Path output = new Path(cmdLine.getValue(outputOpt, "output").toString());
String vectorClassName = cmdLine.getValue(vectorOpt,
"org.apache.mahout.math.RandomAccessSparseVector").toString();
//runJob(input, output, vectorClassName);
} catch (OptionException e) {
InputDriver.log.error("Exception parsing command line: ", e);
CommandLineUtil.printHelp(group);
}
}
开发者ID:PacktPublishing, 项目名称:HBase-High-Performance-Cookbook, 代码行数:36, 代码来源:InputDriver.java
示例3: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] arg0) throws Exception {
// set up command line arguments
addOption("half-life", "b", "Minimal half-life threshold", true);
addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
addOption("epsilon", "e", "Half-life threshold coefficient", Double.toString(EPSILON_DEFAULT));
addOption("tau", "t", "Threshold for cutting affinities", Double.toString(TAU_DEFAULT));
addOption("eigenrank", "k", "Number of top eigenvectors to use", true);
addOption(DefaultOptionCreator.inputOption().create());
addOption(DefaultOptionCreator.outputOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
Map<String, List<String>> parsedArgs = parseArguments(arg0);
if (parsedArgs == null) {
return 0;
}
// read in the command line values
Path input = getInputPath();
Path output = getOutputPath();
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(getConf(), output);
}
int dimensions = Integer.parseInt(getOption("dimensions"));
double halflife = Double.parseDouble(getOption("half-life"));
double epsilon = Double.parseDouble(getOption("epsilon"));
double tau = Double.parseDouble(getOption("tau"));
int eigenrank = Integer.parseInt(getOption("eigenrank"));
run(getConf(), input, output, eigenrank, dimensions, halflife, epsilon, tau);
return 0;
}
开发者ID:saradelrio, 项目名称:Chi-FRBCS-BigDataCS, 代码行数:34, 代码来源:EigencutsDriver.java
示例4: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] arg0) throws IOException, ClassNotFoundException, InterruptedException {
// set up command line options
Configuration conf = getConf();
addInputOption();
addOutputOption();
addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
addOption("clusters", "k", "Number of clusters and top eigenvectors", true);
addOption(DefaultOptionCreator.distanceMeasureOption().create());
addOption(DefaultOptionCreator.convergenceOption().create());
addOption(DefaultOptionCreator.maxIterationsOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
Map<String, List<String>> parsedArgs = parseArguments(arg0);
if (parsedArgs == null) {
return 0;
}
Path input = getInputPath();
Path output = getOutputPath();
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(conf, output);
}
int numDims = Integer.parseInt(getOption("dimensions"));
int clusters = Integer.parseInt(getOption("clusters"));
String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
run(conf, input, output, numDims, clusters, measure, convergenceDelta, maxIterations);
return 0;
}
开发者ID:saradelrio, 项目名称:Chi-FRBCS-BigDataCS, 代码行数:34, 代码来源:SpectralKMeansDriver.java
示例5: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
* CLI to run Cluster Classification Driver.
*/
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.methodOption().create());
addOption(DefaultOptionCreator.clustersInOption()
.withDescription("The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy.")
.create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
if (getConf() == null) {
setConf(new Configuration());
}
Path clustersIn = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
DefaultOptionCreator.SEQUENTIAL_METHOD);
double clusterClassificationThreshold = 0.0;
if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
}
run(input, clustersIn, output, clusterClassificationThreshold, true, runSequential);
return 0;
}
开发者ID:saradelrio, 项目名称:Chi-FRBCS-BigDataCS, 代码行数:37, 代码来源:ClusterClassificationDriver.java
示例6: getAnalyzerClassFromOption
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
protected Class<? extends Analyzer> getAnalyzerClassFromOption() throws ClassNotFoundException {
Class<? extends Analyzer> analyzerClass = DefaultAnalyzer.class;
if (hasOption(DefaultOptionCreator.ANALYZER_NAME_OPTION)) {
String className = getOption(DefaultOptionCreator.ANALYZER_NAME_OPTION);
analyzerClass = Class.forName(className).asSubclass(Analyzer.class);
// try instantiating it, b/c there isn't any point in setting it if
// you can't instantiate it
ClassUtils.instantiateAs(analyzerClass, Analyzer.class);
}
return analyzerClass;
}
开发者ID:saradelrio, 项目名称:Chi-FRBCS-BigDataCS, 代码行数:12, 代码来源:AbstractJob.java
示例7: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
* CLI to run ImageToText Driver.
*/
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.methodOption().create());
addOption(DefaultOptionCreator
.clustersInOption()
.withDescription(
"The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy.")
.create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
if (getConf() == null) {
setConf(new Configuration());
}
Path clustersIn = new Path(
getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
.equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
double clusterClassificationThreshold = 0.0;
if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
clusterClassificationThreshold = Double
.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
}
run(getConf(), input, clustersIn, output, runSequential);
return 0;
}
开发者ID:pgorecki, 项目名称:visearch, 代码行数:40, 代码来源:ImageToTextDriver.java
示例8: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
* CLI to run Cluster Classification Driver.
*/
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.methodOption().create());
addOption(DefaultOptionCreator
.clustersInOption()
.withDescription(
"The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy.")
.create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
if (getConf() == null) {
setConf(new Configuration());
}
Path clustersIn = new Path(
getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
.equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
double clusterClassificationThreshold = 0.0;
if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
clusterClassificationThreshold = Double
.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
}
run(getConf(), input, clustersIn, output,
clusterClassificationThreshold, true, runSequential);
return 0;
}
开发者ID:pgorecki, 项目名称:visearch, 代码行数:41, 代码来源:MyClusterClassificationDriver.java
示例9: complete
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public void complete(List<Centroid> list, TridentCollector tridentCollector) {
BallKMeans clusterer = new BallKMeans(StreamingKMeansBolt.searcherFromConfiguration(_conf),
((Number) _conf.get(DefaultOptionCreator.NUM_CLUSTERS_OPTION)).intValue(), 100);
clusterer.cluster(list);
for (Centroid centroid : clusterer) {
tridentCollector.emit(new Values(centroid));
}
}
开发者ID:dfilimon, 项目名称:streaming-storm, 代码行数:11, 代码来源:BallKMeansAggregator.java
示例10: searcherFromConfiguration
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
* Instantiates a searcher from a given configuration.
* @param conf the configuration
* @return the instantiated searcher
* @throws RuntimeException if the distance measure class cannot be instantiated
* @throws IllegalStateException if an unknown searcher class was requested
*/
public static UpdatableSearcher searcherFromConfiguration(Map conf) {
DistanceMeasure distanceMeasure;
String distanceMeasureClass = (String) conf.get(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
try {
distanceMeasure = (DistanceMeasure)Class.forName(distanceMeasureClass).newInstance();
} catch (Exception e) {
throw new RuntimeException("Failed to instantiate distanceMeasure", e);
}
Integer numProjections = ((Number) conf.get(StreamingKMeansDriver.NUM_PROJECTIONS_OPTION)).intValue();
Integer searchSize = ((Number) conf.get(StreamingKMeansDriver.SEARCH_SIZE_OPTION)).intValue();
String searcherClass = (String) conf.get(StreamingKMeansDriver.SEARCHER_CLASS_OPTION);
if (searcherClass.equals(BruteSearch.class.getName())) {
return ClassUtils.instantiateAs(searcherClass, UpdatableSearcher.class,
new Class[]{DistanceMeasure.class}, new Object[]{distanceMeasure});
} else if (searcherClass.equals(FastProjectionSearch.class.getName()) ||
searcherClass.equals(ProjectionSearch.class.getName())) {
return ClassUtils.instantiateAs(searcherClass, UpdatableSearcher.class,
new Class[]{DistanceMeasure.class, int.class, int.class},
new Object[]{distanceMeasure, numProjections, searchSize});
} else if (searcherClass.equals(LocalitySensitiveHashSearch.class.getName())) {
return ClassUtils.instantiateAs(searcherClass, LocalitySensitiveHashSearch.class,
new Class[]{DistanceMeasure.class, int.class},
new Object[]{distanceMeasure, searchSize});
} else {
throw new IllegalStateException("Unknown class instantiation requested");
}
}
开发者ID:dfilimon, 项目名称:streaming-storm, 代码行数:38, 代码来源:StreamingKMeansBolt.java
示例11: setupConfig
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Before
public void setupConfig() {
_conf = new Config();
_conf.setNumWorkers(2);
_conf.registerSerialization(Path.class, FieldSerializer.class);
_conf.registerSerialization(SequenceFile.Writer.class, FieldSerializer.class);
_conf.registerSerialization(DenseVector.class, VectorSerializer.class);
_conf.registerSerialization(Centroid.class, CentroidSerializer.class);
_conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 5);
_conf.put(HypercubeSpout.NUM_POINTS, 2000);
_conf.put(HypercubeSpout.NUM_DIMENSION, 500);
_conf.put(HypercubeSpout.NUM_CLUSTERS, NUM_CLUSTERS);
_conf.put(HypercubeSpout.RADIUS, 0.0001);
_conf.put(HypercubeSpout.UNIFORM_FRACTION, 0.0);
_conf.put(DefaultOptionCreator.DISTANCE_MEASURE_OPTION, SquaredEuclideanDistanceMeasure.class.getName());
_conf.put(StreamingKMeansDriver.NUM_PROJECTIONS_OPTION, 3);
_conf.put(StreamingKMeansDriver.SEARCH_SIZE_OPTION, 2);
_conf.put(StreamingKMeansDriver.SEARCHER_CLASS_OPTION, FastProjectionSearch.class.getName());
_conf.put(StreamingKMeansDriver.ESTIMATED_NUM_MAP_CLUSTERS, ESTIMATED_NUM_MAP_CLUSTERS);
_conf.put(StreamingKMeansDriver.ESTIMATED_DISTANCE_CUTOFF, 1e-7);
_conf.put(HypercubeSpout.OUTPUT_PATH, INPUT_PATH);
_conf.put(LocalSequenceFileWriterBolt.OUTPUT_PATH, OUTPUT_PATH);
}
开发者ID:dfilimon, 项目名称:streaming-storm, 代码行数:29, 代码来源:StreamingKMeansTopologyTest.java
示例12: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
/**
* The sampling rate that is used for computing the reconstruction error
*/
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.methodOption().create());
addOption(ROWSOPTION, "rows", "Number of rows");
addOption(COLSOPTION, "cols", "Number of cols");
addOption(PRINCIPALSOPTION, "pcs", "Number of principal components");
addOption(SPLITFACTOROPTION, "sf", "Split each block to increase paralelism");
addOption(ERRSAMPLE, "errSampleRate",
"Sampling rate for computing the error (0-1]");
addOption(MAXITER, "maxIter",
"Maximum number of iterations before terminating, the default is 3");
addOption(NORMALIZEOPTION, "normalize",
"Choose whether you want the input matrix to be normalized or not, 1 means normalize, 0 means don't normalize");
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
final int nRows = Integer.parseInt(getOption(ROWSOPTION));
final int nCols = Integer.parseInt(getOption(COLSOPTION));
final int nPCs = Integer.parseInt(getOption(PRINCIPALSOPTION));
final int splitFactor;
final int normalize;
final int maxIterations;
final float errSampleRate;
if(hasOption(SPLITFACTOROPTION))
splitFactor= Integer.parseInt(getOption(SPLITFACTOROPTION, "1"));
else
splitFactor=1;
if (hasOption(ERRSAMPLE))
errSampleRate = Float.parseFloat(getOption(ERRSAMPLE));
else
{
int length = String.valueOf(nRows).length();
if(length <= 4)
errSampleRate= 1;
else
errSampleRate=(float) (1/Math.pow(10, length-4));
log.warn("error sampling rate set to: errRate=" + errSampleRate);
}
if (hasOption(MAXITER))
maxIterations = Integer.parseInt(getOption(MAXITER));
else
maxIterations=3;
if (hasOption(NORMALIZEOPTION))
normalize = Integer.parseInt(getOption(NORMALIZEOPTION));
else
normalize=0;
Configuration conf = getConf();
if (conf == null) {
throw new IOException("No Hadoop configuration present");
}
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
.equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
run(conf, input, output, nRows, nCols, nPCs, splitFactor, errSampleRate, maxIterations, normalize, runSequential);
return 0;
}
开发者ID:SiddharthMalhotra, 项目名称:sPCA, 代码行数:66, 代码来源:SPCADriver.java
示例13: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.maxIterationsOption().create());
addOption(DefaultOptionCreator.numClustersOption().withRequired(true).create());
addOption(DefaultOptionCreator.overwriteOption().create());
addOption(DefaultOptionCreator.clusteringOption().create());
addOption(ALPHA_OPTION, "a0", "The alpha0 value for the DirichletDistribution. Defaults to 1.0", "1.0");
addOption(MODEL_DISTRIBUTION_CLASS_OPTION, "md",
"The ModelDistribution class name. Defaults to GaussianClusterDistribution",
GaussianClusterDistribution.class.getName());
addOption(MODEL_PROTOTYPE_CLASS_OPTION, "mp",
"The ModelDistribution prototype Vector class name. Defaults to RandomAccessSparseVector",
RandomAccessSparseVector.class.getName());
addOption(DefaultOptionCreator.distanceMeasureOption().withRequired(false).create());
addOption(DefaultOptionCreator.emitMostLikelyOption().create());
addOption(DefaultOptionCreator.thresholdOption().create());
addOption(DefaultOptionCreator.methodOption().create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(getConf(), output);
}
String modelFactory = getOption(MODEL_DISTRIBUTION_CLASS_OPTION);
String modelPrototype = getOption(MODEL_PROTOTYPE_CLASS_OPTION);
String distanceMeasure = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
int numModels = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION));
int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
boolean emitMostLikely = Boolean.parseBoolean(getOption(DefaultOptionCreator.EMIT_MOST_LIKELY_OPTION));
double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
double alpha0 = Double.parseDouble(getOption(ALPHA_OPTION));
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
DefaultOptionCreator.SEQUENTIAL_METHOD);
int prototypeSize = readPrototypeSize(input);
DistributionDescription description = new DistributionDescription(modelFactory, modelPrototype, distanceMeasure,
prototypeSize);
run(getConf(), input, output, description, numModels, maxIterations, alpha0, runClustering, emitMostLikely,
threshold, runSequential);
return 0;
}
开发者ID:saradelrio, 项目名称:Chi-FRBCS-BigDataCS, 代码行数:50, 代码来源:DirichletDriver.java
示例14: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.distanceMeasureOption().create());
addOption(DefaultOptionCreator.t1Option().create());
addOption(DefaultOptionCreator.t2Option().create());
addOption(DefaultOptionCreator.t3Option().create());
addOption(DefaultOptionCreator.t4Option().create());
addOption(DefaultOptionCreator.clusterFilterOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
addOption(DefaultOptionCreator.clusteringOption().create());
addOption(DefaultOptionCreator.methodOption().create());
addOption(DefaultOptionCreator.outlierThresholdOption().create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
Configuration conf = getConf();
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(conf, output);
}
String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
double t3 = t1;
if (hasOption(DefaultOptionCreator.T3_OPTION)) {
t3 = Double.parseDouble(getOption(DefaultOptionCreator.T3_OPTION));
}
double t4 = t2;
if (hasOption(DefaultOptionCreator.T4_OPTION)) {
t4 = Double.parseDouble(getOption(DefaultOptionCreator.T4_OPTION));
}
int clusterFilter = 0;
if (hasOption(DefaultOptionCreator.CLUSTER_FILTER_OPTION)) {
clusterFilter = Integer
.parseInt(getOption(DefaultOptionCreator.CLUSTER_FILTER_OPTION));
}
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
.equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
double clusterClassificationThreshold = 0.0;
if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
}
run(conf, input, output, measure, t1, t2, t3, t4, clusterFilter,
runClustering, clusterClassificationThreshold, runSequential );
return 0;
}
开发者ID:saradelrio, 项目名称:Chi-FRBCS-BigDataCS, 代码行数:55, 代码来源:CanopyDriver.java
示例15: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.distanceMeasureOption().create());
addOption(DefaultOptionCreator.clustersInOption()
.withDescription("The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. "
+ "If k is also specified, then a random set of vectors will be selected"
+ " and written out to this path first")
.create());
addOption(DefaultOptionCreator.numClustersOption()
.withDescription("The k in k-Means. If specified, then a random selection of k Vectors will be chosen"
+ " as the Centroid and written to the clusters input path.").create());
addOption(DefaultOptionCreator.convergenceOption().create());
addOption(DefaultOptionCreator.maxIterationsOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
addOption(M_OPTION, M_OPTION, "coefficient normalization factor, must be greater than 1", true);
addOption(DefaultOptionCreator.clusteringOption().create());
addOption(DefaultOptionCreator.emitMostLikelyOption().create());
addOption(DefaultOptionCreator.thresholdOption().create());
addOption(DefaultOptionCreator.methodOption().create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path clusters = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
Path output = getOutputPath();
String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
if (measureClass == null) {
measureClass = SquaredEuclideanDistanceMeasure.class.getName();
}
double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
float fuzziness = Float.parseFloat(getOption(M_OPTION));
int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(getConf(), output);
}
boolean emitMostLikely = Boolean.parseBoolean(getOption(DefaultOptionCreator.EMIT_MOST_LIKELY_OPTION));
double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION));
DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
clusters = RandomSeedGenerator.buildRandom(getConf(),
input,
clusters,
Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)),
measure);
}
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
DefaultOptionCreator.SEQUENTIAL_METHOD);
run(getConf(),
input,
clusters,
output,
measure,
convergenceDelta,
maxIterations,
fuzziness,
runClustering,
emitMostLikely,
threshold,
runSequential);
return 0;
}
开发者ID:saradelrio, 项目名称:Chi-FRBCS-BigDataCS, 代码行数:70, 代码来源:FuzzyKMeansDriver.java
示例16: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
addInputOption();
addOutputOption();
addOption(MinhashOptionCreator.minClusterSizeOption().create());
addOption(MinhashOptionCreator.minVectorSizeOption().create());
addOption(MinhashOptionCreator.hashTypeOption().create());
addOption(MinhashOptionCreator.numHashFunctionsOption().create());
addOption(MinhashOptionCreator.keyGroupsOption().create());
addOption(MinhashOptionCreator.numReducersOption().create());
addOption(MinhashOptionCreator.debugOutputOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(getConf(), output);
}
int minClusterSize = Integer.valueOf(getOption(MinhashOptionCreator.MIN_CLUSTER_SIZE));
int minVectorSize = Integer.valueOf(getOption(MinhashOptionCreator.MIN_VECTOR_SIZE));
String hashType = getOption(MinhashOptionCreator.HASH_TYPE);
int numHashFunctions = Integer.valueOf(getOption(MinhashOptionCreator.NUM_HASH_FUNCTIONS));
int keyGroups = Integer.valueOf(getOption(MinhashOptionCreator.KEY_GROUPS));
int numReduceTasks = Integer.parseInt(getOption(MinhashOptionCreator.NUM_REDUCERS));
boolean debugOutput = hasOption(MinhashOptionCreator.DEBUG_OUTPUT);
runJob(input,
output,
minClusterSize,
minVectorSize,
hashType,
numHashFunctions,
keyGroups,
numReduceTasks,
debugOutput);
return 0;
}
开发者ID:saradelrio, 项目名称:Chi-FRBCS-BigDataCS, 代码行数:42, 代码来源:MinHashDriver.java
示例17: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.distanceMeasureOption().create());
addOption(DefaultOptionCreator
.clustersInOption()
.withDescription(
"The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. "
+ "If k is also specified, then a random set of vectors will be selected"
+ " and written out to this path first").create());
addOption(DefaultOptionCreator
.numClustersOption()
.withDescription(
"The k in k-Means. If specified, then a random selection of k Vectors will be chosen"
+ " as the Centroid and written to the clusters input path.").create());
addOption(DefaultOptionCreator.convergenceOption().create());
addOption(DefaultOptionCreator.maxIterationsOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
addOption(DefaultOptionCreator.clusteringOption().create());
addOption(DefaultOptionCreator.methodOption().create());
addOption(DefaultOptionCreator.outlierThresholdOption().create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path clusters = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
Path output = getOutputPath();
String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
if (measureClass == null) {
measureClass = SquaredEuclideanDistanceMeasure.class.getName();
}
double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(getConf(), output);
}
DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
if (hasOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)) {
clusters = RandomSeedGenerator.buildRandom(getConf(), input, clusters,
Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)), measure);
}
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
DefaultOptionCreator.SEQUENTIAL_METHOD);
if (getConf() == null) {
setConf(new Configuration());
}
double clusterClassificationThreshold = 0.0;
if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
}
run(getConf(), input, clusters, output, measure, convergenceDelta, maxIterations, runClustering,
clusterClassificationThreshold, runSequential);
return 0;
}
开发者ID:saradelrio, 项目名称:Chi-FRBCS-BigDataCS, 代码行数:61, 代码来源:KMeansDriver.java
示例18: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.convergenceOption().create());
addOption(DefaultOptionCreator.maxIterationsOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
addOption(DefaultOptionCreator.inputIsCanopiesOption().create());
addOption(DefaultOptionCreator.distanceMeasureOption().create());
addOption(DefaultOptionCreator.kernelProfileOption().create());
addOption(DefaultOptionCreator.t1Option().create());
addOption(DefaultOptionCreator.t2Option().create());
addOption(DefaultOptionCreator.clusteringOption().create());
addOption(DefaultOptionCreator.methodOption().create());
if (parseArguments(args) == null) {
return -1;
}
Path input = getInputPath();
Path output = getOutputPath();
if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
HadoopUtil.delete(getConf(), output);
}
String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
String kernelProfileClass = getOption(DefaultOptionCreator.KERNEL_PROFILE_OPTION);
double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION));
double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION));
boolean runClustering = hasOption(DefaultOptionCreator.CLUSTERING_OPTION);
double convergenceDelta = Double
.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
int maxIterations = Integer
.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
boolean inputIsCanopies = hasOption(INPUT_IS_CANOPIES_OPTION);
boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION)
.equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
IKernelProfile kernelProfile = ClassUtils.instantiateAs(kernelProfileClass, IKernelProfile.class);
run(getConf(), input, output, measure, kernelProfile, t1, t2,
convergenceDelta, maxIterations, inputIsCanopies, runClustering,
runSequential);
return 0;
}
开发者ID:saradelrio, 项目名称:Chi-FRBCS-BigDataCS, 代码行数:45, 代码来源:MeanShiftCanopyDriver.java
示例19: run
import org.apache.mahout.common.commandline.DefaultOptionCreator; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
addInputOption();
addOutputOption();
addOption(DefaultOptionCreator.maxIterationsOption().create());
addOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION, "cd", "The convergence delta value", "0");
addOption(DefaultOptionCreator.overwriteOption().create());
addOption(NUM_TOPICS, "k", "Number of topics to learn", true);
addOption(NUM_TERMS, "nt", "Vocabulary size", false);
addOption(DOC_TOPIC_SMOOTHING, "a", "Smoothing for document/topic distribution", "0.0001");
addOption(TERM_TOPIC_SMOOTHING, "e", "Smoothing for topic/term distribution", "0.0001");
addOption(DICTIONARY, "dict", "Path to term-dictionary file(s) (glob expression supported)",
false);
addOption(DOC_TOPIC_OUTPUT, "dt", "Output path for the training doc/topic distribution",
false);
addOption(MODEL_TEMP_DIR, "mt", "Path to intermediate model path (useful for restarting)",
false);
addOption(ITERATION_BLOCK_SIZE, "block", "Number of iterations per perplexity check", "10");
addOption(RANDOM_SEED, "seed", "Random seed", false);
addOption(TEST_SET_FRACTION, "tf", "Fraction of data to hold out for testing", "0");
addOption(NUM_TRAIN_THREADS, "ntt", "number of threads per mapper to train with", "4");
addOption(NUM_UPDATE_THREADS, "nut", "number of threads per mapper to update the model with",
"1");
addOption(MAX_ITERATIONS_PER_DOC, "mipd",
"max number of iterations per doc for p(topic|doc) learning", "10");
addOption(NUM_REDUCE_TASKS, null,
"number of reducers to use during model estimation", "10");
addOption(buildOption(BACKFILL_PERPLEXITY, null,
"enable backfilling of missing perplexity values", false, false, null));
if (parseArguments(args) == null) {
return -1;
}
int numTopics = Integer.parseInt(getOption(NUM_TOPICS));
Path inputPath = getInputPath();
Path topicModelOutputPath = getOutputPath();
int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
int iterationBlockSize = Integer.parseInt(getOption(ITERATION_BLOCK_SIZE));
double convergenceDelta = Double.parseDouble(getOption(DefaultOptionCreator.CONVERGENCE_DELTA_OPTION));
double alpha = Double.parseDouble(getOption(DOC_TOPIC_SMOOTHING));
double eta = Double.parseDouble(getOption(TERM_TOPIC_SMOOTHING));
int numTrainThreads = Integer.parseInt(getOption(NUM_TRAIN_THREADS));
int numUpdateThreads = Integer.parseInt(getOption(NUM_UPDATE_THREADS));
int maxItersPerDoc = Integer.parseInt(getOption(MAX_ITERATIONS_PER_DOC));
Path dictionaryPath = hasOption(DICTIONARY) ? new Path(getOption(DICTIONARY)) : null;
int numTerms = hasOption(NUM_TERMS)
? Integer.parseInt(getOption(NUM_TERMS))
: getNumTerms(getConf(), dictionaryPath);
Path docTopicOutputPath = hasOption(DOC_TOPIC_OUTPUT) ? new Path(getOption(DOC_TOPIC_OUTPUT)) : null;
Path modelTempPath = hasOption(MODEL_TEMP_DIR)
? new Path(getOption(MODEL_TEMP_DIR))
: getTempPath("topicModelState");
long seed = hasOption(RANDOM_SEED)
? Long.parseLong(getOption(RANDOM_SEED))
: System.nanoTime() % 10000;
float testFraction = hasOption(TEST_SET_FRACTION)
? Float.parseFloat(getOption(TEST_SET_FRACTION))
: 0.0f;
int numReduceTasks = Integer.parseInt(getOption(NUM_REDUCE_TASKS));
boolean backfillPerplexity = hasOption(BACKFILL_PERPLEXITY);
return run(getConf(), inputPath, topicModelOutputPath, numTopics, numTerms, alpha, eta,
maxIterations, iterationBlockSize, convergenceDelta, dictionaryPath, docTopicOutputPath,
modelTempPath, seed, testFraction, numTrainThreads, numUpdateThreads, maxItersPerDoc,
numReduceTasks, backfillPerplexity);
}
开发者ID:saradelrio, 项目名称:Chi-FRBCS-BigDataCS, 代码行数:69, 代码来源:CVB0Driver.java
六六分期app的软件客服如何联系?不知道吗?加qq群【895510560】即可!标题:六六分期
阅读:18873| 2023-10-27
今天小编告诉大家如何处理win10系统火狐flash插件总是崩溃的问题,可能很多用户都不知
阅读:9885| 2022-11-06
今天小编告诉大家如何对win10系统删除桌面回收站图标进行设置,可能很多用户都不知道
阅读:8289| 2022-11-06
今天小编告诉大家如何对win10系统电脑设置节能降温的设置方法,想必大家都遇到过需要
阅读:8645| 2022-11-06
我们在使用xp系统的过程中,经常需要对xp系统无线网络安装向导设置进行设置,可能很多
阅读:8575| 2022-11-06
今天小编告诉大家如何处理win7系统玩cf老是与主机连接不稳定的问题,可能很多用户都不
阅读:9582| 2022-11-06
电脑对日常生活的重要性小编就不多说了,可是一旦碰到win7系统设置cf烟雾头的问题,很
阅读:8570| 2022-11-06
我们在日常使用电脑的时候,有的小伙伴们可能在打开应用的时候会遇见提示应用程序无法
阅读:7961| 2022-11-06
今天小编告诉大家如何对win7系统打开vcf文件进行设置,可能很多用户都不知道怎么对win
阅读:8575| 2022-11-06
今天小编告诉大家如何对win10系统s4开启USB调试模式进行设置,可能很多用户都不知道怎
阅读:7501| 2022-11-06
请发表评论