Saturday, July 28, 2012

Mahout Clustering 


./bin/mahout seqdirectory -i /home/venkat/Downloads/cj4test/newscluster/reuters-out/ -o /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir -c UTF-8 -chunk 5
./bin/mahout seq2sparse -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir/ -o /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse -ng 2 -nv

./bin/mahout canopy -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/  -o /home/venkat/Downloads/cj4test/newscluster/canopy-output  -t1 0.8 -t2 0.7   -ow  -xm sequential -dm org.apache.mahout.common.distance.CosineDistanceMeasure
./bin/mahout canopy -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/  -o /home/venkat/Downloads/cj4test/newscluster/canopy-output  -t1 3.0 -t2 2.8   -ow  -xm sequential -dm org.apache.mahout.common.distance.CosineDistanceMeasure
./bin/mahout canopy -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/  -o /home/venkat/Downloads/cj4test/newscluster/canopy-output  -t1 0.5 -t2 1.0   -ow  -xm sequential -dm org.apache.mahout.common.distance.CosineDistanceMeasure
./bin/mahout canopy -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/  -o /home/venkat/Downloads/cj4test/newscluster/canopy-output  -t1 2.0 -t2 1.0   -ow  -xm sequential -dm org.apache.mahout.common.distance.CosineDistanceMeasure
./bin/mahout canopy -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/  -o /home/venkat/Downloads/cj4test/newscluster/canopy-output  -t1 0.2 -t2 0.1   -ow  -xm sequential -dm org.apache.mahout.common.distance.CosineDistanceMeasure
./bin/mahout canopy -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/  -o /home/venkat/Downloads/cj4test/newscluster/canopy-output  -t1 0.1 -t2 0.1   -ow  -xm sequential -dm org.apache.mahout.common.distance.CosineDistanceMeasure
./bin/mahout canopy -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/  -o /home/venkat/Downloads/cj4test/newscluster/canopy-output  -t1 0.8 -t2 0.8   -ow  -xm sequential -dm org.apache.mahout.common.distance.CosineDistanceMeasure
./bin/mahout canopy -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/  -o /home/venkat/Downloads/cj4test/newscluster/canopy-output  -t1 3.0 -t2 2.8   -ow  -xm sequential -dm org.apache.mahout.common.distance.EuclideanDistanceMeasure
./bin/mahout canopy -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/  -o /home/venkat/Downloads/cj4test/newscluster/canopy-output  -t1 3.0 -t2 2.8   -ow  -xm sequential -dm org.apache.mahout.common.distance.ManhattanDistanceMeasure



./bin/mahout kmeans -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/ -c /home/venkat/Downloads/cj4test/newscluster/canopy-output/clusters-0 -o /home/venkat/Downloads/cj4test/newscluster/reuters-kmeans -x 10  -ow -cl -dm org.apache.mahout.common.distance.CosineDistanceMeasure
./bin/mahout kmeans -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/ -c /home/venkat/Downloads/cj4test/newscluster/canopy-output -o /home/venkat/Downloads/cj4test/newscluster/reuters-kmeans -x 10 -k 2 -ow -cl -dm org.apache.mahout.common.distance.CosineDistanceMeasure
./bin/mahout kmeans -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/ -c /home/venkat/Downloads/cj4test/newscluster/clusters -o /home/venkat/Downloads/cj4test/newscluster/reuters-kmeans -x 10 -k 4 -ow -cl  -dm org.apache.mahout.common.distance.CosineDistanceMeasure
./bin/mahout kmeans -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/ -c /home/venkat/Downloads/cj4test/newscluster/canopy-output/clusters-0 -o /home/venkat/Downloads/cj4test/newscluster/reuters-kmeans -x 10  -ow -cl -dm org.apache.mahout.common.distance.EuclideanDistanceMeasure
./bin/mahout kmeans -i /home/venkat/Downloads/cj4test/newscluster/reuters-out-seqdir-sparse/tfidf-vectors/ -c /home/venkat/Downloads/cj4test/newscluster/canopy-output/clusters-0 -o /home/venkat/Downloads/cj4test/newscluster/reuters-kmeans -x 10  -ow -cl -dm org.apache.mahout.common.distance.ManhattanDistanceMeasure

No comments:

Post a Comment