Pynovisao - Adicionando Weka classificadores

parent eaec0d1b
......@@ -49,11 +49,13 @@ Pacote de Visão Computacional do Inovisão.
- scikit-image
- Opencv 2.7
- tk/tk-dev
- python-weka-wrapper ( Classification )
### Windows
- Instale o [Anaconda](http://continuum.io/downloads) que contém todas dependências, inclusive o Python. Basta fazer o download do arquivo .exe e executá-lo.
- Opencv 2.7
- python-weka-wrapper ( Classification )
## Como instalar o OpenCV
......@@ -115,3 +117,63 @@ $ sudo apt-get install tk tk-dev
## Mais informações
- http://www.tkdocs.com/tutorial/install.html
## Como instalar o python-weka-wrapper ( Opcional )
### Ubuntu
Primeiro você precisa compilar os código C/C+ e os módulos Python:
$ sudo apt-get install build-essential python-dev
Agora você pode instalar os vários pacotes que precisamos para instalar o python-weka-wrapper:
$ sudo apt-get install python-pip python-numpy
Os seguintes pacotes são opcionais mas necessários se você deseja uma representação gráfica:
$ sudo apt-get install python-imaging python-matplotlib python-pygraphviz
Instale OpenJDK para obter todos os cabeçalhos que javabridge compila:
$ sudo apt-get install default-jdk
No meu ubuntu 14.04 tive problemas com dependência, acabei instalando o java da oracle seguindo as orientações deste site: [instalando java da oracle](http://askubuntu.com/questions/521145/how-to-install-oracle-java-on-ubuntu-14-04)
Finalmente você pode usar pip para instalar os pacotes Python que não estão disponíveis no repositório:
$ sudo pip install javabridge
$ sudo pip install python-weka-wrapper
### Windows
Por favor note: você precisa certificar-se que os bits do seu ambiente é consistente. Isto é, se você instalar uma versão de Python 32-bit você deve instalar um JDK 32-bit e numpy 32-bit ( ou então todos eles devem ser 64-bit ).
Realize os seguintes passos:
Instale Python, esteja certo que você checou Add python.exe to path durante a instalação
Adicione os scripts Python eu sua variável de ambiente PATH, por exemplo, :\\Python27\\Scripts
Instale pip com os seguintes passos:
* baixe daqui https://bootstrap.pypa.io/get-pip.py
* instale usando python get-pip.py
Instale numpy
* baixe numpy 1.9.x MKL ( ou superior ) para Python 2.7 (cp27) e sua configuração de bit (32 ou 64 bit)
* instale o arquivo .whl usando pip: pip install numpy-X.Y.Z.whl
Instale .Net 4.0 (se já não estiver instalado)
Instale Windows SDK 7.1
Abra o prompt de comando do Windows SDK (não o prompt de comando convencional!) e instale javabridge e python-weka-wrapper
> set MSSdk=1
> set DISTUTILS_USE_SDK=1
> pip install javabridge
> pip install python-weka-wrapper
Agora você pode executar python-weka-wrapper usando o prompt de comando convencional também.
Se você deseja as funcionalidades gráficas você precisa instalar matplotlib também:
* baixe matplotlib para Python 2.7 (cp27) e sua configuração de bit (32 or 64 bit)
* instale o arquivo .whl usando pip: pip install matplotlib-X.Y.Z.whl
## Mais informações
- http://pythonhosted.org/python-weka-wrapper/install.html
- http://pythonhosted.org/python-weka-wrapper/troubleshooting.html
from .classifier import Classifier
from .weka_classifiers import WekaClassifiers
try:
import weka.core.jvm as jvm
jvm.start()
weka_is_defined = True
except:
weka_is_defined = False
__all__ = ["classifier",
"weka_classifiers"]
from collections import OrderedDict
from util.config import Config
_classifier_list = OrderedDict( [
["weka_classifiers", Config(WekaClassifiers().get_name(), weka_is_defined, bool, meta=WekaClassifiers, hidden=not weka_is_defined)],
["invalid", Config("Invalid", True, bool, meta=None, hidden=True)]
] )
def get_classifier_config():
return _classifier_list
def set_classifier_config(configs):
_classifier_list["weka_classifiers"] = Config.nvl_config(configs["weka_classifiers"], _classifier_list["weka_classifiers"])
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
"""
Abstract class for classifiers.
Name: classifier.py
Author: Alessandro dos Santos Ferreira ( santosferreira.alessandro@gmail.com )
"""
from abc import ABCMeta, abstractmethod
class Classifier(object):
__metaclass__ = ABCMeta
def get_name(self):
return self.__class__.__name__
@abstractmethod
def get_config(self):
pass
@abstractmethod
def set_config(self, configs):
pass
@abstractmethod
def get_summary_config(self):
pass
def must_train(self):
return False
def train(self, dataset=None, training_data=None):
pass
@abstractmethod
def classify(self, dataset, test_data):
pass
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
"""
Runs collection of machine learning algorithms for data mining tasks available in Weka.
Name: weka_classifiers.py
Author: Alessandro dos Santos Ferreira ( santosferreira.alessandro@gmail.com )
"""
from weka.core.converters import Loader as WLoader
from weka.classifiers import Classifier as WClassifier
from weka.classifiers import Evaluation as WEvaluation
from weka.core.classes import Random as WRandom
from collections import OrderedDict
from util.config import Config
from util.file_utils import File
from classifier import Classifier
class WekaClassifiers(Classifier):
def __init__(self, classname="weka.classifiers.functions.SMO", options='default'):
self.classname = Config("ClassName", classname, 'classifier')
self.options = Config("Options", options, str)
def get_name(self):
return "Weka ML Algorithms"
def get_config(self):
weka_config = OrderedDict()
weka_config["classname"] = self.classname
weka_config["options"] = self.options
return weka_config
def set_config(self, configs):
self.classname = Config.nvl_config(configs["classname"], self.classname)
self.options = Config.nvl_config(configs["options"], self.options)
def get_summary_config(self):
weka_config = OrderedDict()
weka_config[self.classname.label] = self.classname.value
weka_config[self.options.label] = self.options.value.strip()
summary = ''
for config in weka_config:
summary += "%s: %s\n" % (config, str(weka_config[config]))
return summary
def must_train(self):
return True
def train(self, dataset, training_data):
loader = WLoader(classname="weka.core.converters.ArffLoader")
training_file = File.make_path(dataset, training_data + ".arff")
self.data = loader.load_file(training_file)
self.data.class_is_last()
options = None if self.options.value == 'default' else self.options.value.split()
self.classifier = WClassifier(classname=self.classname.value, options=options)
self.classifier.build_classifier(self.data)
def classify(self, dataset, test_data):
loader = WLoader(classname="weka.core.converters.ArffLoader")
test_file = File.make_path(dataset, test_data + ".arff")
predict_data = loader.load_file(test_file)
predict_data.class_is_last()
#values = str(predict_data.class_attribute)[19:-1].split(',')
values = [str(predict_data.class_attribute.value(i)) for i in range(0, predict_data.class_attribute.num_values)]
classes = []
for index, inst in enumerate(predict_data):
#pred = self.classifier.classify_instance(inst)
prediction = self.classifier.distribution_for_instance(inst)
#cl = int(values[prediction.argmax()][7:])
cl = values[prediction.argmax()]
#print 'Classe:', cl
classes.append(cl)
return classes
......@@ -25,7 +25,7 @@ class FeatureExtractor(object):
def __init__(self, extractors):
self.extractors = extractors
def extract_all(self, dataset, output_file = None, classes = None, overwrite = True):
def extract_all(self, dataset, output_file = None, dirs = None, overwrite = True):
if len(self.extractors) == 0:
raise IException("Please select at least one extractor")
......@@ -35,16 +35,17 @@ class FeatureExtractor(object):
output_file = File.make_path(dataset, output_file + '.arff')
if overwrite == False and os.path.isfile(output_file):
return output_file
classes = sorted(File.list_dirs(dataset)) if classes is None else classes
return output_file, 0
start_time = TimeUtils.get_time()
classes = sorted(File.list_dirs(dataset))
dirs = classes if dirs is None else dirs
data = []
for cl in classes:
items = os.listdir( File.make_path(dataset, cl) )
for cl in dirs:
items = sorted(os.listdir( File.make_path(dataset, cl)))
print("Processing class %s - %d itens" % (cl, len(items)))
for item in items:
......@@ -62,7 +63,8 @@ class FeatureExtractor(object):
else:
labels, types, values = [ list(itertools.chain.from_iterable(ret))
for ret in zip(*([extractor().run(image) for extractor in self.extractors])) ]
data.append(values + [cl])
data.append(values + [cl if cl in classes else classes[0]])
if len(data) == 0:
raise IException("There are no images in dataset: %s" % dataset)
......@@ -73,12 +75,39 @@ class FeatureExtractor(object):
return output_file, (end_time - start_time)
def extract_one_file(self, dataset, image_path, output_file = None):
if len(self.extractors) == 0:
raise IException("Please select at least one extractor")
if output_file is None:
output_file = File.get_filename(dataset)
output_file = File.make_path(dataset, output_file + '.arff')
classes = sorted(File.list_dirs(dataset))
start_time = TimeUtils.get_time()
try:
image = File.open_image(image_path, rgb = False )
except:
raise IException("Image %s is possibly corrupt" % filepath)
labels, types, values = [ list(itertools.chain.from_iterable(ret))
for ret in zip(*([extractor().run(image) for extractor in self.extractors])) ]
self._save_output(File.get_filename(dataset), classes, labels, types, [values + [classes[0]]], output_file)
end_time = TimeUtils.get_time()
return output_file, (end_time - start_time)
def _save_output(self, relation, classes, labels, types, data, output_file):
arff = open(output_file,'wb')
arff.write("%s %s\n\n" % ('@relation ', relation))
arff.write("%s %s\n\n" % ('@relation', relation))
for label, t in zip(labels, types):
arff.write("%s %s %s\n" % ('@attribute', label, t))
......
......@@ -22,9 +22,9 @@ class RawCentralMoments(Extractor):
def run(self, image):
image_grayscale = ImageUtils.image_grayscale(image, bgr = True)
image_binary = ImageUtils.image_binary(image, bgr = True)
m = measure.moments(image_grayscale)
m = measure.moments(image_binary)
values_m = [m[p, q] for (p, q) in self._moments_order]
labels_m = [M+str(p)+str(q) for M,(p,q) in zip(['M_'] * len(self._moments_order), self._moments_order)]
......@@ -32,7 +32,7 @@ class RawCentralMoments(Extractor):
row = m[0, 1] / m[0, 0]
col = m[1, 0] / m[0, 0]
mu = measure.moments_central(image_grayscale, row, col)
mu = measure.moments_central(image_binary, row, col)
values_mu = [mu[p, q] for (p, q) in self._moments_order]
labels_mu = [M+str(p)+str(q) for M,(p,q) in zip(['Mu_'] * len(self._moments_order), self._moments_order)]
......@@ -53,14 +53,14 @@ class HuMoments(Extractor):
def run(self, image):
image_grayscale = ImageUtils.image_grayscale(image, bgr = True)
image_binary = ImageUtils.image_binary(image, bgr = True)
m = measure.moments(image_grayscale)
m = measure.moments(image_binary)
row = m[0, 1] / m[0, 0]
col = m[1, 0] / m[0, 0]
mu = measure.moments_central(image_grayscale, row, col)
mu = measure.moments_central(image_binary, row, col)
nu = measure.moments_normalized(mu)
hu = measure.moments_hu(nu)
......
......@@ -75,9 +75,10 @@ class Image(object):
self._show_toolbar()
def refresh(self, image):
def refresh(self, image = None):
if self._canvas is not None:
self.parent.image = image
if image is not None:
self.parent.image = image
self._im.set_data(self.parent.image)
self._fig.tight_layout()
......
......@@ -79,7 +79,7 @@ class TkInterface(Interface):
def toggle_image_axes(self):
self._image.toggle_axes()
def refresh_image(self, image, title = None):
def refresh_image(self, image = None, title = None):
self._image.refresh(image)
if title is not None:
......@@ -96,10 +96,12 @@ class TkInterface(Interface):
def write_log(self, fmt, *args):
if self._logger:
self._logger.write_logger(fmt, *args)
self.refresh_image()
def append_log(self, fmt, *args):
if self._logger:
self._logger.append_logger(fmt, *args)
self.refresh_image()
def clear_log(self):
if self._logger:
......
......@@ -33,6 +33,7 @@ if __name__ == "__main__":
tk.add_menu("File")
tk.add_command("Open a image", act.open_image, 'O')
tk.add_command("Restore image", act.restore_image, 'R')
tk.add_separator()
tk.add_command("Close image", act.close_image, 'W')
tk.add_separator()
......@@ -54,7 +55,6 @@ if __name__ == "__main__":
tk.add_command("Choose segmenter", act.select_segmenter)
tk.add_command("Configure", act.config_segmenter, 'g')
tk.add_separator()
tk.add_command("Clean", act.clean_segmentation, 'l')
tk.add_command("Execute", act.run_segmenter, 'S')
tk.add_menu("Feature Extraction")
......@@ -63,9 +63,10 @@ if __name__ == "__main__":
tk.add_command("Execute", act.run_extractors, 'F')
tk.add_menu("Classification")
tk.add_command("Configure", act.func_not_available)
tk.add_command("Choose classifier", act.select_classifier)
tk.add_command("Configure", act.configure_classifier)
tk.add_separator()
tk.add_command("Execute", act.func_not_available, 'C')
tk.add_command("Execute", act.run_classifier, 'C')
tk.render_menu()
......
......@@ -16,14 +16,15 @@ from interface.interface import InterfaceException as IException
import segmentation
import extraction
from extraction import FeatureExtractor
import classification
import util
from util.config import Config
from util.file_utils import File as f
from util.utils import TimeUtils
class Act(object):
def __init__(self, tk, args):
self.tk = tk
......@@ -32,17 +33,23 @@ class Act(object):
self.extractors = [extraction._extractor_list[extractor].meta for extractor in extraction._extractor_list
if extraction._extractor_list[extractor].value == True ]
try:
self.classifier = [classification._classifier_list[classifier].meta for classifier in classification._classifier_list
if classification._classifier_list[classifier].value == True ][0]()
except:
self.classifier = None
self._image = None
self._const_image = None
self._image_name = None
self._init_dataset(args["dataset"])
self._init_classes(args["classes"], args["colors"])
self._dataset_generator = True
def _init_dataset(self, directory):
if(directory[-1] == '/'):
directory = directory[:-1]
......@@ -64,7 +71,7 @@ class Act(object):
self.add_class(dialog = False, color='Yellow')
self._current_class = 0
def open_image(self, imagename = None):
......@@ -83,7 +90,7 @@ class Act(object):
self.tk.append_log("Painting segment: %0.3f seconds", run_time)
self.tk.refresh_image(self._image)
filepath = f.save_image(segment, self.dataset, self.classes[self._current_class]["name"].value, self._image_name, idx_segment)
filepath = f.save_class_image(segment, self.dataset, self.classes[self._current_class]["name"].value, self._image_name, idx_segment)
if filepath:
self.tk.append_log("\nSegment saved in %s", filepath)
......@@ -97,6 +104,15 @@ class Act(object):
self.tk.write_log("Opening %s...", self._image_name)
self.tk.add_image(self._image, self._image_name, onclick)
self._const_image = self._image
self.segmenter.clean()
def restore_image(self):
if self._const_image is not None:
self.tk.write_log("Restoring image...")
self.tk.refresh_image(self._const_image)
self.segmenter.clean()
def close_image(self):
......@@ -163,6 +179,14 @@ class Act(object):
def update_current_class(self, index):
self._current_class = index
def get_class_by_name(self, name):
name = name.strip()
for cl in self.classes:
if cl["name"].value == name:
return cl
raise Exception("Class not found")
def set_dataset_path(self):
......@@ -208,6 +232,7 @@ class Act(object):
self.segmenter.set_config(new_config)
self.tk.append_log("\nConfig updated:\n%s", str(self.segmenter.get_summary_config()))
self.segmenter.clean()
self.tk.dialogue_config(title, current_config, process_config)
......@@ -223,11 +248,6 @@ class Act(object):
self.tk.append_log("Time elapsed: %0.3f seconds", run_time)
self.tk.refresh_image(self._image)
def clean_segmentation(self):
if self._const_image is not None:
self.tk.write_log("Removing %s segmentation...", self.segmenter.get_name())
self.tk.refresh_image(self._const_image)
def select_extractors(self):
......@@ -265,6 +285,127 @@ class Act(object):
self.tk.append_log("\nOutput file saved in %s", output_file)
self.tk.append_log("Time elapsed: %0.3f seconds", run_time)
def select_classifier(self):
if self.classifier is None:
raise IException("You must install python-weka-wrapper")
title = "Choosing a classifier"
self.tk.write_log(title)
current_config = classification.get_classifier_config()
def process_config():
new_config = self.tk.get_config_and_destroy()
self.classifier = [new_config[classifier].meta for classifier in new_config
if new_config[classifier].value == True ][0]()
self.tk.append_log("\nClassifier: %s\n%s", str(self.classifier.get_name()), str(self.classifier.get_summary_config()))
classification.set_classifier_config(new_config)
self.tk.dialogue_choose_one(title, current_config, process_config)
def configure_classifier(self):
if self.classifier is None:
raise IException("You must install python-weka-wrapper")
title = "Configuring %s" % self.classifier.get_name()
self.tk.write_log(title)
current_config = self.classifier.get_config()
def process_config():
new_config = self.tk.get_config_and_destroy()
self.classifier.set_config(new_config)
self.tk.append_log("\nConfig updated:\n%s", str(self.classifier.get_summary_config()))
self.tk.dialogue_config(title, current_config, process_config)
def run_classifier(self):
if self.classifier is None:
raise IException("You must install python-weka-wrapper")
if self._const_image is None:
raise IException("Image not found")
self.tk.write_log("Running %s...", self.classifier.get_name())
self.tk.append_log("\n%s", str(self.classifier.get_summary_config()))
start_time = TimeUtils.get_time()
list_segments = self.segmenter.get_list_segments()
if len(list_segments) == 0:
self.tk.append_log("Running %s... (%0.3f seconds)", self.segmenter.get_name(), (TimeUtils.get_time() - start_time))
self._image, _ = self.segmenter.run(self._const_image)
self.tk.refresh_image(self._image)
list_segments = self.segmenter.get_list_segments()
if self.classifier.must_train():
self.tk.append_log("Creating trainning data... (%0.3f seconds)", (TimeUtils.get_time() - start_time))
fextractor = FeatureExtractor(self.extractors)
output_file, run_time = fextractor.extract_all(self.dataset, "trainning", overwrite = False)
self.tk.append_log("Training classifier... (%0.3f seconds)", (TimeUtils.get_time() - start_time))
self.classifier.train(self.dataset, "trainning")
self._image = self._const_image
# Original classification from python-superpixel, deprecated, use only for debug purposes
if hasattr(self, 'python_superpixel'):
for idx_segment in list_segments:
segment, size_segment = self.segmenter.get_segment(self, idx_segment=idx_segment)[0:2]
filepath = f.save_image(segment, self.dataset, "test")
if self.classifier.must_train():
output_file, _ = fextractor.extract_one_file(self.dataset, filepath, "test")
labels = self.classifier.classify(self.dataset, "test")
cl = self.get_class_by_name(labels[0])
self._image, _ = self.segmenter.paint_segment(self._image, cl["color"].value, idx_segment=[idx_segment], border=False)
self.tk.refresh_image(self._image)
# New and optimized classification
else:
tmp = ".tmp"
f.remove_dir(f.make_path(self.dataset, tmp))
self.tk.append_log("Generating test images... (%0.3f seconds)", (TimeUtils.get_time() - start_time))
for idx_segment in list_segments:
segment, size_segment, idx_segment = self.segmenter.get_segment(self, idx_segment=idx_segment)[:-1]
filepath = f.save_class_image(segment, self.dataset, tmp, self._image_name, idx_segment)
if self.classifier.must_train():
self.tk.append_log("Running extractors on test images... (%0.3f seconds)", (TimeUtils.get_time() - start_time))
output_file, _ = fextractor.extract_all(self.dataset, "test", dirs=[tmp])
self.tk.append_log("Running classifier on test data... (%0.3f seconds)", (TimeUtils.get_time() - start_time))
labels = self.classifier.classify(self.dataset, "test")
f.remove_dir(f.make_path(self.dataset, tmp))
self.tk.append_log("Painting segments... (%0.3f seconds)", (TimeUtils.get_time() - start_time))
for cl in self.classes:
idx_segment = [ list_segments[idx] for idx in range(0, len(labels)) if cl["name"].value == labels[idx]]
if len(idx_segment) > 0:
self._image, _ = self.segmenter.paint_segment(self._image, cl["color"].value, idx_segment=idx_segment, border=False)
self.tk.refresh_image(self._image)
end_time = TimeUtils.get_time()
self.tk.append_log("\nClassification finished")
self.tk.append_log("Time elapsed: %0.3f seconds", (end_time - start_time))
def func_not_available(self):
self.tk.write_log("This functionality is not available right now.")
......
......@@ -4,8 +4,8 @@ from .quickshift import Quickshift
from .slic import Slic
__all__ = ["segmenter",
"felzenszwalb"
"quickshift"
"felzenszwalb",
"quickshift",