Mahout Cluster example
package mia.clustering.ch07;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.clustering.kmeans.Cluster;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
public class MyKmeans
{
public static void main(String s[])
{
System.out.println("venkat");
try
{
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
KMeansDriver.run( conf,
new Path("/home/venkat/Desktop/work/reuters-out-seqdir-sparse/tfidf-vectors"),
new Path("/home/venkat/Desktop/work/clusters"),
new Path("/home/venkat/Desktop/work/output"),
new EuclideanDistanceMeasure(),
// new TanimotoDistanceMeasure(),
0.001,
1,
true,
false);
// SequenceFile.Reader reader = new SequenceFile.Reader(fs,new Path("/home/venkat/Desktop/mahout/mahout-distribution-0.5/examples/bin/work2/clusters/part-randomSeed"), conf);
System.out.println(Cluster.CLUSTERED_POINTS_DIR);
/*
./bin/mahout kmeans
-i ./examples/bin/work/reuters-out-seqdir-sparse/tfidf-vectors/
-c ./examples/bin/work/clusters
-o ./examples/bin/work/reuters-kmeans
-x 10
-k 20
-ow
*/
//SequenceFile.Reader reader = new SequenceFile.Reader(fs,new Path("/home/venkat/Desktop/work/output/" + Cluster.CLUSTERED_POINTS_DIR + "/part-m-00000"), conf);
SequenceFile.Reader reader = new SequenceFile.Reader(fs,new Path("/home/venkat/Desktop/work/output/" + Cluster.CLUSTERED_POINTS_DIR + "/part-m-00000"), conf);
IntWritable key = new IntWritable();
WeightedVectorWritable value = new WeightedVectorWritable();
while (reader.next(key, value))
{
System.out.println(value.toString() + " belongs to cluster " + key.toString());
}
reader.close();
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
No comments:
Post a Comment