#!/usr/bin/python # -*- coding: utf-8 -*- # """ Runs collection of machine learning algorithms for data mining tasks available in Weka. Name: weka_classifiers.py Author: Alessandro dos Santos Ferreira ( santosferreira.alessandro@gmail.com ) """ from weka.core.converters import Loader as WLoader from weka.classifiers import Classifier as WClassifier from weka.classifiers import Evaluation as WEvaluation from weka.core.classes import Random as WRandom from collections import OrderedDict from util.config import Config from util.file_utils import File from classifier import Classifier class WekaClassifiers(Classifier): def __init__(self, classname="weka.classifiers.functions.SMO", options='default'): self.classname = Config("ClassName", classname, 'classifier') self.options = Config("Options", options, str) def get_name(self): return "Weka ML Algorithms" def get_config(self): weka_config = OrderedDict() weka_config["classname"] = self.classname weka_config["options"] = self.options return weka_config def set_config(self, configs): self.classname = Config.nvl_config(configs["classname"], self.classname) self.options = Config.nvl_config(configs["options"], self.options) def get_summary_config(self): weka_config = OrderedDict() weka_config[self.classname.label] = self.classname.value weka_config[self.options.label] = self.options.value.strip() summary = '' for config in weka_config: summary += "%s: %s\n" % (config, str(weka_config[config])) return summary def must_train(self): return True def train(self, dataset, training_data): loader = WLoader(classname="weka.core.converters.ArffLoader") training_file = File.make_path(dataset, training_data + ".arff") self.data = loader.load_file(training_file) self.data.class_is_last() options = None if self.options.value == 'default' else self.options.value.split() self.classifier = WClassifier(classname=self.classname.value, options=options) self.classifier.build_classifier(self.data) def classify(self, dataset, test_data): loader = WLoader(classname="weka.core.converters.ArffLoader") test_file = File.make_path(dataset, test_data + ".arff") predict_data = loader.load_file(test_file) predict_data.class_is_last() #values = str(predict_data.class_attribute)[19:-1].split(',') values = [str(predict_data.class_attribute.value(i)) for i in range(0, predict_data.class_attribute.num_values)] classes = [] for index, inst in enumerate(predict_data): #pred = self.classifier.classify_instance(inst) prediction = self.classifier.distribution_for_instance(inst) #cl = int(values[prediction.argmax()][7:]) cl = values[prediction.argmax()] #print 'Classe:', cl classes.append(cl) return classes