Commit e29f2ef5 authored by Alexandre Cese's avatar Alexandre Cese
Browse files

mudancas com opcoes para salvar em csv e fazer histograma de superpixels

parent eb119d36
......@@ -125,6 +125,13 @@ Em uma das máquinas em que tentei instalar deu um erro que resolvi rodando o co
$ sudo apt-get build-dep python-matplotlib
$ sudo pip install cycler
```
### Como instalar o scikit-learn e pandas
```
sudo pip install -U scikit-learn
sudo pip install -U pandas
```
### Como instalar o tk/tk-dev
......
......@@ -10,3 +10,4 @@ javabridge
python-weka-wrapper
cycler
cython
scikit-learn
import os
import itertools
import pandas as pd
from sklearn.preprocessing import StandardScaler
from interface.interface import InterfaceException as IException
from util import File, TimeUtils
from sklearn.cluster import MiniBatchKMeans
class histoextraction(object):
def __init__(self):
self.data=[]
def extract_all_superpixels_csv(self, dataset, segmenter,extractors, overwrite=True):
'''
:String dataset: endereco do dataset
:Segmenter segmenter: segmenter tirado do pynovisao
:Extractors extractors: extrator tirado do pynovisao
:bool overwrite:
:return: Address where csv was saved
'''
if len(extractors) == 0:
raise IException("Please select at least one extractor")
output_file = File.make_path(dataset, 'training_kmeans.pkl')
# if already exists a output file and must not override, return current file
if overwrite == False and os.path.isfile(output_file):
return output_file, 0
#start_time = TimeUtils.get_time()
dirs = sorted(File.list_dirs(dataset))
data = []
# Runs the feature extraction for all classes inside the dataset------------------
for cl in dirs:
items = sorted(os.listdir(File.make_path(dataset, cl)))
print("Processing class %s - %d itens" % (cl, len(items)))
for item in items:
filepath = File.make_path(dataset, cl, item)
print filepath
filename = item.rsplit('_', 1)[0]
image = File.open_image(filepath, rgb=False)
#values = list(itertools.chain.from_iterable(
# zip(*([extractor().run(image) for extractor in self.extractors]))[2]))
segmenter.run(image)
for i in segmenter.get_list_segments():
segment, size_segment, idx_segment, run_time = segmenter.get_segment(idx_segment=i)
if len(data) > 0:
values = list(itertools.chain.from_iterable(
zip(*([extractor().run(segment) for extractor in extractors]))[2]))
values.append(item)
values.append(cl)
data.append(values)
else:
labels, types, values = [list(itertools.chain.from_iterable(ret))
for ret in
zip(*([extractor().run(segment) for extractor in extractors]))]
aux=[]
for label in labels:
aux.append("%s" % (label))
aux.append("file")
aux.append("class")
data.append(aux)
values.append(item)
values.append(cl)
data.append(values)
dataframe = pd.DataFrame(data)
dataframe.to_csv(File.make_path(dataset, "data.csv"), header=False, index=False)
print "Saving data to file"
return File.make_path(dataset, "data.csv")
def extract_all_csv(self, dataset, extractors, output_file=None, dirs=None, overwrite=True):
'''
:string dataset: endereco do dataset usado
:extrator extractors: extratores do pynovisao
:string output_file: endereco do arquivo final
:list dirs: nao usado
:bool overwrite: nao usado
:return: output_file, tempo de execucao
'''
if len(extractors) == 0:
raise IException("Please select at least one extractor")
if output_file is None:
output_file = File.get_filename(dataset)
output_file = File.make_path(dataset, output_file + '.arff')
# if already exists a output file and must not override, return current file
if overwrite == False and os.path.isfile(output_file):
return output_file, 0
start_time = TimeUtils.get_time()
classes = sorted(File.list_dirs(dataset))
dirs = classes if dirs is None else dirs
data = []
# Runs the feature extraction all images in all classes inside the dataset
for cl in dirs:
items = sorted(os.listdir(File.make_path(dataset, cl)))
print("Processing class %s - %d itens" % (cl, len(items)))
#para cada imagem
for item in items:
if item.startswith('.'):
continue
try:
filepath = File.make_path(dataset, cl, item)
image = File.open_image(filepath, rgb=False)
except:
raise IException("Image %s is possibly corrupt" % filepath)
if len(data) > 0:
values = list(itertools.chain.from_iterable(
zip(*([extractor().run(image) for extractor in extractors]))[2]))
values.append(item)
values.append(cl)
data.append(values)
else:
labels, types, values = [list(itertools.chain.from_iterable(ret))
for ret in
zip(*([extractor().run(image) for extractor in extractors]))]
#aux seria a linha da tabela
aux = []
for label in labels:
aux.append("%s" % (label))
aux.append("file")
aux.append("class")
#data eh os dados totais
data.append(aux)
values.append(item)
values.append(cl)
data.append(values)
if len(data) == 0:
raise IException("There are no images in dataset: %s" % dataset)
dataframe = pd.DataFrame(data)
dataframe.to_csv(File.make_path(dataset, "data.csv"), header=False, index=False)
print "Saving data to file"
end_time = TimeUtils.get_time()
return output_file, (end_time - start_time)
def extract_all_superpixels_arff(self, dataset, segmenter,extractors,output_file=None, dirs=None, overwrite=True):
'''
:param dataset: dataset do pynovisao
:param segmenter: segmenter do pynovisao
:param extractors: extratores do pynovisao
:param output_file: endereco do arquivo final
:param dirs: nao usado
:param overwrite: nao usado
:return: output_file, tempo de execucao
'''
if len(extractors) == 0:
raise IException("Please select at least one extractor")
if output_file is None:
output_file = File.get_filename(dataset)
output_file = File.make_path(dataset, output_file + '.arff')
# if already exists a output file and must not override, return current file
if overwrite == False and os.path.isfile(output_file):
return output_file, 0
start_time = TimeUtils.get_time()
classes = sorted(File.list_dirs(dataset))
dirs = classes if dirs is None else dirs
data = []
# Runs the feature extraction of every superpixel in all images for all classes inside the dataset
for cl in dirs:
items = sorted(os.listdir(File.make_path(dataset, cl)))
print("Processing class %s - %d itens" % (cl, len(items)))
#para cada imagem
for item in items:
if item.startswith('.'):
continue
try:
filepath = File.make_path(dataset, cl, item)
print filepath
image = File.open_image(filepath, rgb=False)
except:
raise IException("Image %s is possibly corrupt" % filepath)
segmenter.run(image)
#para cada segmento
for i in segmenter.get_list_segments():
segment, size_segment, idx_segment, run_time = segmenter.get_segment(idx_segment=i)
#extrai os atributos de cada superpixel
if len(data) > 0:
values = list(itertools.chain.from_iterable(
zip(*([extractor().run(segment) for extractor in extractors]))[2]))
else:
labels, types, values = [list(itertools.chain.from_iterable(ret))
for ret in
zip(*([extractor().run(segment) for extractor in extractors]))]
data.append(values + [cl if cl in classes else classes[0]])
if len(data) == 0:
raise IException("There are no images in dataset: %s" % dataset)
# Save the output file in ARFF format
self._save_output(File.get_filename(dataset), classes, labels, types, data, output_file)
end_time = TimeUtils.get_time()
return output_file, (end_time - start_time)
def _save_output(self, relation, classes, labels, types, data, output_file):
"""Save output file in ARFF format.
Parameters
----------
relation : string
Name of relation.
classes : list of string
List of classes names.
labels : list of string
List of attributes names.
types : list of string
List of attributes types.
data : list of list of string
List of instances.
output_file : string
Path to output file.
"""
arff = open(output_file, 'wb')
arff.write("%s %s\n\n" % ('@relation', relation))
for label, t in zip(labels, types):
arff.write("%s %s %s\n" % ('@attribute', label, t))
arff.write("%s %s {%s}\n\n" % ('@attribute', 'classe', ', '.join(classes)))
arff.write('@data\n\n')
for instance in data:
instance = map(str, instance)
line = ",".join(instance)
arff.write(line + "\n")
arff.close()
##################################Parte de fazer histogramas######################################
def get_classes_superpixels_from_k_means(self,k, X):
#este metodo aplica o K-means pelo miniBatch kmeans e retorna as classes dos superpixels
clusterer = MiniBatchKMeans(n_clusters=k)
clusterer.fit(X)
classes_superpixels = clusterer.predict(X)
return classes_superpixels, clusterer
def get_histogramas_de_dados(self,X_train, classes_superpixels, k):
'''
:DataFrame X_train: dados dos superpixels
:lista classes_superpixels: as classes de cada superpixel dada pelo K-means
:int k: k do k-means
:array[][]: histogramas_de_dados
'''
X_train_classes = list(set(X_train['class']))
histogramas_de_dados = []
total = len(X_train_classes)
for cl in X_train_classes: # Para cada classe no X_train
lista_cl = X_train[X_train['class'] == cl] ##tabela com a classe expecifica
lista_arq = set(lista_cl['file']) ##tabela com o arquivos de uma classe especifica
for arq in lista_arq: ##Para cada arquivo na tabela de arquivos daquela classe especifica
h = [0] * (k+2) ##cria o vetor para o novo histograma para a imagem
tab = lista_cl[lista_cl['file'] == arq] ##criamos a tabela de um arquivo so
for index, row in tab.iterrows(): ##para cada superpixel da imagem
h[classes_superpixels[index]] = h[classes_superpixels[
index]] + 1; ##adiciona a classe do superpixel no
##histograma
h[k]=arq
h[k+1]=cl
histogramas_de_dados.append(h) ##add o histograma pra lista de histogramas
return histogramas_de_dados
def norm_data(self,X_csv):
#Este metodo normaliza os dados dos histogramas
#necessario para fazer o k-means do sklearn
scaler_KM = StandardScaler()
X_csv_norm = scaler_KM.fit_transform(X_csv.iloc[:, 0:-2])
return X_csv_norm
def make_histogram(self,dataset,csv,k):
k=10
data=pd.read_csv(File.make_path(dataset,'data.csv'))
data_norm=self.norm_data(data)
# Gerar as classes_superpixels
classes_superpixels, clusterer = self.get_classes_superpixels_from_k_means(k, data_norm)
# Gera o histograma de dados
histogramas_de_dados = self.get_histogramas_de_dados(data, classes_superpixels,k)
if csv==False:
path=self.save_histogram(dataset,histogramas_de_dados,k=k)
return path
else:
hist=pd.DataFrame(histogramas_de_dados)
output_file=File.make_path(dataset, "histograma.csv")
hist.to_csv(output_file, header=False, index=False)
print "Saving data to file"
return output_file
def save_histogram(self,dataset,imagens_histograma,k):
'''
:param dataset: endereco do dataset
:param imagens_histograma: BoS de cada imagem
:param k: numero de posicoes no histograma de superpixels
:return:training_file - Endereco do arquivo final
'''
training_file=File.make_path(dataset, "training_histograma_" + str(k) + ".arff")
values=""
for i,inst in enumerate(imagens_histograma):
for i in range(0,len(inst)-2):
values=values+str(inst[i])+","
values=values+str(inst[len(inst)-1])+"\n"
arff = open(training_file, 'w')
arff.write("%s \n\n" % ('@relation histogram'))
for i in range(0,len(imagens_histograma[0])-2):
arff.write("%s %s %s\n" % ('@attribute', str(i),'numeric'))
arff.write("%s %s {%s}\n\n" % ('@attribute', 'classe', ', '.join(sorted(File.list_dirs(dataset)))))
# arff.write("%s\n\n" % ('@attribute'))
arff.write('@data\n\n')
arff.write(values)
return training_file
arff.close()
\ No newline at end of file
......@@ -72,10 +72,15 @@ if __name__ == "__main__":
tk.add_command("Execute", act.run_segmenter, 'S')
tk.add_menu("Feature Extraction")
tk.add_command("Select extractors", act.select_extractors, 'e')
tk.add_command("Select extractors", act.select_extractors, 'E')
tk.add_separator()
tk.add_command("Execute", act.run_extractors, 'F')
tk.add_command("Execute images extraction", act.run_extractors, 'F')
tk.add_command("Execute Superpixels extraction", act.superpixel_extraction, 'J')
tk.add_separator()
tk.add_check_button("Save in CSV", act.toggle_CSV,default_state=False)
tk.add_separator()
tk.add_command("Make Histogram(k=10) only from CSV", act.make_histogram, 'H')
tk.add_menu("Classification")
tk.add_command("Choose classifier", act.select_classifier)
tk.add_command("Configure", act.configure_classifier)
......
......@@ -25,6 +25,8 @@ from util.config import Config
from util.file_utils import File as f
from util.utils import TimeUtils
from extraction.histoextraction import histoextraction
class Act(object):
"""Store all actions of Pynovisao."""
......@@ -63,6 +65,8 @@ class Act(object):
self._ground_truth = False
self._gt_segments = None
self.csv=False;
def _init_dataset(self, directory):
"""Initialize the directory of image dataset.
......@@ -308,7 +312,16 @@ class Act(object):
"""
self._dataset_generator = not self._dataset_generator
def toggle_CSV(self):
"""Enable/disable the option to save the softwares in CSV
"""
self.csv = not self.csv
def make_histogram(self):
h=histoextraction()
output_file=h.make_histogram(self.dataset,self.csv,k=10)
self.tk.append_log("\nOutput file saved in %s", output_file)
def select_segmenter(self):
"""Open a dialog to choose the segmenter.
"""
......@@ -403,18 +416,46 @@ class Act(object):
"""Perform a feature extraction on all images of dataset, using the current collection of extractors.
"""
self.tk.write_log("Running extractors on all images in %s", self.dataset)
#Alexandre Cese
#O feature adicionado por mim nesta parte do programa foi dar a opcao de salvar as extracoes das imagens de um
#dataset em CSV
#Nao foi implementado a classificacao destes dados em CSV
if self.csv==False:
fextractor = FeatureExtractor(self.extractors)
self.tk.append_log("%s", '\n'.join([extraction._extractor_list[extractor].label for extractor in extraction._extractor_list
if extraction._extractor_list[extractor].value == True ]))
output_file, run_time = fextractor.extract_all(self.dataset, "training")
self.tk.append_log("\nOutput file saved in %s", output_file)
self.tk.append_log("Time elapsed: %0.3f seconds", run_time)
if self.classifier: self.classifier.reset()
else:
h=histoextraction()
self.tk.append_log("%s", '\n'.join(
[extraction._extractor_list[extractor].label for extractor in extraction._extractor_list
if extraction._extractor_list[extractor].value == True]))
output_file, run_time =h.extract_all_csv(dataset=self.dataset, extractors=self.extractors)
self.tk.append_log("\nOutput file saved in %s", output_file)
self.tk.append_log("Time elapsed: %0.3f seconds", run_time)
if self.classifier: self.classifier.reset()
def superpixel_extraction(self):
#Este metodo divide as imagens dos datasets em superpixels e os extrai
h=histoextraction()
if self.csv==False:
output_file, run_time =h.extract_all_superpixels_arff(self.dataset,self.segmenter,self.extractors,output_file='training')
self.tk.append_log("\nOutput file saved in %s", output_file)
self.tk.append_log("Time elapsed: %0.3f seconds", run_time)
if self.classifier: self.classifier.reset()
else:
output_file=h.extract_all_superpixels_csv(self.dataset, self.segmenter, self.extractors)
self.tk.append_log("\nOutput file saved in %s", output_file)
if self.classifier: self.classifier.reset()
fextractor = FeatureExtractor(self.extractors)
self.tk.append_log("%s", '\n'.join([extraction._extractor_list[extractor].label for extractor in extraction._extractor_list
if extraction._extractor_list[extractor].value == True ]))
output_file, run_time = fextractor.extract_all(self.dataset, "training")
self.tk.append_log("\nOutput file saved in %s", output_file)
self.tk.append_log("Time elapsed: %0.3f seconds", run_time)
if self.classifier: self.classifier.reset()
def select_classifier(self):
"""Open a dialog to select the classifier.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment