, , , . , - Mahout , . , , Mahout . , .
, Mahout, . 0,7, 0,7.
public void classify(String modelLocation, RawEntry unclassifiedInstanceRaw) throws IOException {
Configuration conf = new Configuration();
NaiveBayesModel model = NaiveBayesModel.materialize(new Path(modelLocation), conf);
AbstractNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(model);
String unclassifiedInstanceFeatures = RawEntry.toNaiveBayesTrainingFormat(unclassifiedInstanceRaw);
FeatureVectorEncoder vectorEncoder = new AdaptiveWordValueEncoder("features");
vectorEncoder.setProbes(1);
Vector unclassifiedInstanceVector = new RandomAccessSparseVector(unclassifiedInstanceFeatures.split(" ").length());
for (String feature: unclassifiedInstanceFeatures) {
vectorEncoder.addToVector(feature, unclassifiedInstanceVector);
}
Vector classificationResult = classifier.classifyFull(unclassifiedInstanceVector);
System.out.println(classificationResult.asFormatString());
}
:
1) -, , , trainnb. , -o trainnb. .bin .
2) NaiveBayesClassifier
3) RawEntry - , . toNaiveBayesTrainingFormar , , "word1 word2 word3 word4". , .
4) Mahout Vector, Vector
5) - .
. Vector, ( ) . . ( , ) :
1) , ,
2) ( , StandardNaiveBayesClassifier )
3) , , ,
4) jC.set( "mapreduce.textoutputformat.separator", "); jC - JobConf. mapreduce. ",".
, Mahout 0.7. , , . , .
, Mahout , Mahout Java - .