weka_classifiers.py 3.12 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
"""
    Runs collection of machine learning algorithms for data mining tasks available in Weka.
    
    Name: weka_classifiers.py
    Author: Alessandro dos Santos Ferreira ( santosferreira.alessandro@gmail.com )
"""

from weka.core.converters import Loader as WLoader
from weka.classifiers import Classifier as WClassifier
from weka.classifiers import Evaluation as WEvaluation
from weka.core.classes import Random as WRandom

from collections import OrderedDict

from util.config import Config
from util.file_utils import File

from classifier import Classifier

class WekaClassifiers(Classifier):

    def __init__(self, classname="weka.classifiers.functions.SMO", options='default'):
        self.classname = Config("ClassName", classname, 'classifier')
        self.options = Config("Options", options, str)
        
        
    def get_name(self):
        return "Weka ML Algorithms"
    
    def get_config(self):
        weka_config = OrderedDict()
        
        weka_config["classname"] = self.classname
        weka_config["options"] = self.options
        
        return weka_config
        
    def set_config(self, configs):
        self.classname = Config.nvl_config(configs["classname"], self.classname)
        self.options = Config.nvl_config(configs["options"], self.options)

    def get_summary_config(self):
        weka_config = OrderedDict()
        
        weka_config[self.classname.label] = self.classname.value
        weka_config[self.options.label] = self.options.value.strip()

        summary = ''
        for config in weka_config:
            summary += "%s: %s\n" % (config, str(weka_config[config]))
        
        return summary


    def must_train(self):
        return True

    def train(self, dataset, training_data):
        loader = WLoader(classname="weka.core.converters.ArffLoader")
        
        training_file = File.make_path(dataset, training_data + ".arff")
        self.data = loader.load_file(training_file)
        self.data.class_is_last()
        
        options = None if self.options.value == 'default' else self.options.value.split()
        self.classifier = WClassifier(classname=self.classname.value, options=options)
        self.classifier.build_classifier(self.data)

    
    def classify(self, dataset, test_data):
        loader = WLoader(classname="weka.core.converters.ArffLoader")
        
        test_file = File.make_path(dataset, test_data + ".arff")
        predict_data = loader.load_file(test_file)
        predict_data.class_is_last()
        
        #values = str(predict_data.class_attribute)[19:-1].split(',')
        values = [str(predict_data.class_attribute.value(i)) for i in range(0, predict_data.class_attribute.num_values)]
        
        classes = []
        
        for index, inst in enumerate(predict_data):
            #pred = self.classifier.classify_instance(inst)
            prediction = self.classifier.distribution_for_instance(inst)
            #cl = int(values[prediction.argmax()][7:])
            cl = values[prediction.argmax()]
            
            #print 'Classe:', cl
            classes.append(cl)

        return classes