Sunday, July 1, 2012


Mahout Cluster example




package mia.clustering.ch07;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.clustering.kmeans.Cluster;
import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;

public class MyKmeans
{
public static void main(String s[])
{

     System.out.println("venkat");

 try
 {
 Configuration conf = new Configuration();
 FileSystem fs = FileSystem.get(conf);

 KMeansDriver.run( conf,
new Path("/home/venkat/Desktop/work/reuters-out-seqdir-sparse/tfidf-vectors"),
new Path("/home/venkat/Desktop/work/clusters"),
new Path("/home/venkat/Desktop/work/output"),
new EuclideanDistanceMeasure(),
// new TanimotoDistanceMeasure(),
0.001,
1,
true,
false);

  // SequenceFile.Reader reader = new SequenceFile.Reader(fs,new Path("/home/venkat/Desktop/mahout/mahout-distribution-0.5/examples/bin/work2/clusters/part-randomSeed"), conf);

System.out.println(Cluster.CLUSTERED_POINTS_DIR);

  /*
  ./bin/mahout kmeans
 
  -i ./examples/bin/work/reuters-out-seqdir-sparse/tfidf-vectors/
  -c ./examples/bin/work/clusters
  -o ./examples/bin/work/reuters-kmeans
  -x 10
  -k 20
  -ow
 
  */

//SequenceFile.Reader reader = new SequenceFile.Reader(fs,new Path("/home/venkat/Desktop/work/output/" + Cluster.CLUSTERED_POINTS_DIR + "/part-m-00000"), conf);

   SequenceFile.Reader reader = new SequenceFile.Reader(fs,new Path("/home/venkat/Desktop/work/output/" + Cluster.CLUSTERED_POINTS_DIR + "/part-m-00000"), conf);

IntWritable key = new IntWritable();
WeightedVectorWritable value = new WeightedVectorWritable();

while (reader.next(key, value))
{
System.out.println(value.toString() + " belongs to cluster " + key.toString());
}

reader.close();

}
catch (Exception e)
{
e.printStackTrace();
}  

}
}

No comments:

Post a Comment