Commit 8f7cf9d1 authored by Alexandre Cese's avatar Alexandre Cese
Browse files

Classe mudada para letra maiuscula, comentarios para ingles, csv para _csv

parent e29f2ef5
...@@ -13,16 +13,17 @@ from util import File, TimeUtils ...@@ -13,16 +13,17 @@ from util import File, TimeUtils
from sklearn.cluster import MiniBatchKMeans from sklearn.cluster import MiniBatchKMeans
from extraction.feature_extraction import FeatureExtractor
class histoextraction(object): class Histoextraction(FeatureExtractor):
def __init__(self): def __init__(self):
self.data=[] self.data=[]
def extract_all_superpixels_csv(self, dataset, segmenter,extractors, overwrite=True): def extract_all_superpixels_csv(self, dataset, segmenter, extractors, overwrite=True):
''' '''
:String dataset: endereco do dataset :String dataset: dataset path
:Segmenter segmenter: segmenter tirado do pynovisao :Segmenter segmenter: segmenter from pynovisao
:Extractors extractors: extrator tirado do pynovisao :Extractors extractors: extrator from pynovisao
:bool overwrite: :bool overwrite:
:return: Address where csv was saved :return: Address where csv was saved
''' '''
...@@ -88,27 +89,22 @@ class histoextraction(object): ...@@ -88,27 +89,22 @@ class histoextraction(object):
data.append(values) data.append(values)
dataframe = pd.DataFrame(data) dataframe = pd.DataFrame(data)
dataframe.to_csv(File.make_path(dataset, "data.csv"), header=False, index=False) dataframe.to_csv(File.make_path(dataset, "Superpixels.csv"), header=False, index=False)
print "Saving data to file" print "Saving data to file"
return File.make_path(dataset, "data.csv") return File.make_path(dataset, "data.csv")
def extract_all_csv(self, dataset, extractors, output_file=None, dirs=None, overwrite=True): def extract_all_csv(self, dataset, extractors, output_file=None, dirs=None, overwrite=True):
''' '''
:string dataset: endereco do dataset usado :String dataset: dataset path
:extrator extractors: extratores do pynovisao
:string output_file: endereco do arquivo final :string output_file: endereco do arquivo final
:list dirs: nao usado :list dirs: not used
:bool overwrite: nao usado :bool overwrite: not used
:return: output_file, tempo de execucao :return: output_file, run time
''' '''
if len(extractors) == 0: if len(extractors) == 0:
raise IException("Please select at least one extractor") raise IException("Please select at least one extractor")
if output_file is None:
output_file = File.get_filename(dataset)
output_file = File.make_path(dataset, output_file + '.arff')
# if already exists a output file and must not override, return current file # if already exists a output file and must not override, return current file
if overwrite == False and os.path.isfile(output_file): if overwrite == False and os.path.isfile(output_file):
return output_file, 0 return output_file, 0
...@@ -164,7 +160,8 @@ class histoextraction(object): ...@@ -164,7 +160,8 @@ class histoextraction(object):
raise IException("There are no images in dataset: %s" % dataset) raise IException("There are no images in dataset: %s" % dataset)
dataframe = pd.DataFrame(data) dataframe = pd.DataFrame(data)
dataframe.to_csv(File.make_path(dataset, "data.csv"), header=False, index=False) output_file=File.make_path(dataset, "Images.csv")
dataframe.to_csv(output_file, header=False, index=False)
print "Saving data to file" print "Saving data to file"
end_time = TimeUtils.get_time() end_time = TimeUtils.get_time()
...@@ -172,15 +169,14 @@ class histoextraction(object): ...@@ -172,15 +169,14 @@ class histoextraction(object):
return output_file, (end_time - start_time) return output_file, (end_time - start_time)
def extract_all_superpixels_arff(self, dataset, segmenter,extractors,output_file=None, dirs=None, overwrite=True): def extract_all_superpixels_arff(self, dataset, segmenter, extractors, output_file=None, dirs=None, overwrite=True):
''' '''
:param dataset: dataset do pynovisao :String dataset: dataset path
:param segmenter: segmenter do pynovisao :Segmenter segmenter: segmenter from pynovisao
:param extractors: extratores do pynovisao :String output_file: final address to file
:param output_file: endereco do arquivo final :bool dirs: not used
:param dirs: nao usado :bool overwrite: not used
:param overwrite: nao usado
:return: output_file, tempo de execucao :return: output_file, tempo de execucao
''' '''
if len(extractors) == 0: if len(extractors) == 0:
...@@ -286,85 +282,84 @@ class histoextraction(object): ...@@ -286,85 +282,84 @@ class histoextraction(object):
def get_classes_superpixels_from_k_means(self,k, X): def get_classes_superpixels_from_k_means(self,k, X):
#este metodo aplica o K-means pelo miniBatch kmeans e retorna as classes dos superpixels #This method applies K-means through miniBatch kmeans and returns the superpixels classes
clusterer = MiniBatchKMeans(n_clusters=k) clusterer = MiniBatchKMeans(n_clusters=k)
clusterer.fit(X) clusterer.fit(X)
classes_superpixels = clusterer.predict(X) classes_superpixels = clusterer.predict(X)
return classes_superpixels, clusterer return classes_superpixels, clusterer
def get_histogramas_de_dados(self,X_train, classes_superpixels, k): def get_data_histogram(self,X_train, classes_superpixels, k):
''' '''
:DataFrame X_train: dados dos superpixels :DataFrame X_train: Superpixels Data
:lista classes_superpixels: as classes de cada superpixel dada pelo K-means :list classes_superpixels: the classes each superpixel got from the k-means
:int k: k do k-means :int k: k from k-means
:array[][]: histogramas_de_dados :array[][]: data_histogram
''' '''
X_train_classes = list(set(X_train['class'])) X_train_classes = list(set(X_train['class']))
histogramas_de_dados = [] data_histogram = []
total = len(X_train_classes) total = len(X_train_classes)
for cl in X_train_classes: # Para cada classe no X_train for cl in X_train_classes: # for each class in X_train
lista_cl = X_train[X_train['class'] == cl] ##tabela com a classe expecifica lista_cl = X_train[X_train['class'] == cl] ##array with specific class
lista_arq = set(lista_cl['file']) ##tabela com o arquivos de uma classe especifica file_list = set(lista_cl['file']) ##array with files from specific class
for arq in lista_arq: ##Para cada arquivo na tabela de arquivos daquela classe especifica for arq in file_list: ##for each file in array with files from specific class
h = [0] * (k+2) ##cria o vetor para o novo histograma para a imagem h = [0] * (k+2) ##creates the array for the new image histogram
tab = lista_cl[lista_cl['file'] == arq] ##criamos a tabela de um arquivo so tab = lista_cl[lista_cl['file'] == arq] ##creates the table for one file only
for index, row in tab.iterrows(): ##para cada superpixel da imagem for index, row in tab.iterrows(): ##for each superpixel in image
h[classes_superpixels[index]] = h[classes_superpixels[ h[classes_superpixels[index]] = h[classes_superpixels[
index]] + 1; ##adiciona a classe do superpixel no index]] + 1; ##adds the superpixels class in histogram
##histograma
h[k]=arq h[k]=arq
h[k+1]=cl h[k+1]=cl
histogramas_de_dados.append(h) ##add o histograma pra lista de histogramas data_histogram.append(h) ##adds the histogram to the histogram list
return histogramas_de_dados return data_histogram
def norm_data(self,X_csv): def norm_data(self,X_csv):
#Este metodo normaliza os dados dos histogramas #This void method normalizes the image data
#necessario para fazer o k-means do sklearn #necessary to make the k-means work
scaler_KM = StandardScaler() scaler_KM = StandardScaler()
X_csv_norm = scaler_KM.fit_transform(X_csv.iloc[:, 0:-2]) X_csv_norm = scaler_KM.fit_transform(X_csv.iloc[:, 0:-2])
return X_csv_norm return X_csv_norm
def make_histogram(self,dataset,csv,k): def make_histogram(self,dataset,csv,k):
k=10 k=10
data=pd.read_csv(File.make_path(dataset,'data.csv')) data=pd.read_csv(File.make_path(dataset,'Superpixels.csv'))
data_norm=self.norm_data(data) data_norm=self.norm_data(data)
# Gerar as classes_superpixels # Generates the superpixel classes
classes_superpixels, clusterer = self.get_classes_superpixels_from_k_means(k, data_norm) classes_superpixels, clusterer = self.get_classes_superpixels_from_k_means(k, data_norm)
# Gera o histograma de dados # Generate the data_histogram
histogramas_de_dados = self.get_histogramas_de_dados(data, classes_superpixels,k) data_histogram = self.get_data_histogram(data, classes_superpixels,k)
if csv==False: if csv==False:
path=self.save_histogram(dataset,histogramas_de_dados,k=k) path=self.save_histogram(dataset,data_histogram,k=k)
return path return path
else: else:
hist=pd.DataFrame(histogramas_de_dados) hist=pd.DataFrame(data_histogram)
output_file=File.make_path(dataset, "histograma.csv") output_file=File.make_path(dataset, "histograma_"+str(k)+".csv")
hist.to_csv(output_file, header=False, index=False) hist.to_csv(output_file, header=False, index=False)
print "Saving data to file" print "Saving data to file"
return output_file return output_file
def save_histogram(self,dataset,imagens_histograma,k): def save_histogram(self, dataset, images_histogram, k):
''' '''
:param dataset: endereco do dataset :param dataset: dataset path
:param imagens_histograma: BoS de cada imagem :param images_histogram: each image's histogram
:param k: numero de posicoes no histograma de superpixels :param k: number of slots in histogram of superpixels
:return:training_file - Endereco do arquivo final :return:training_file - address to final file
''' '''
training_file=File.make_path(dataset, "training_histograma_" + str(k) + ".arff") training_file=File.make_path(dataset, "training_histograma_" + str(k) + ".arff")
values="" values=""
for i,inst in enumerate(imagens_histograma): for i,inst in enumerate(images_histogram):
for i in range(0,len(inst)-2): for i in range(0,len(inst)-2):
values=values+str(inst[i])+"," values=values+str(inst[i])+","
values=values+str(inst[len(inst)-1])+"\n" values=values+str(inst[len(inst)-1])+"\n"
...@@ -375,7 +370,7 @@ class histoextraction(object): ...@@ -375,7 +370,7 @@ class histoextraction(object):
arff.write("%s \n\n" % ('@relation histogram')) arff.write("%s \n\n" % ('@relation histogram'))
for i in range(0,len(imagens_histograma[0])-2): for i in range(0, len(images_histogram[0])-2):
arff.write("%s %s %s\n" % ('@attribute', str(i),'numeric')) arff.write("%s %s %s\n" % ('@attribute', str(i),'numeric'))
arff.write("%s %s {%s}\n\n" % ('@attribute', 'classe', ', '.join(sorted(File.list_dirs(dataset))))) arff.write("%s %s {%s}\n\n" % ('@attribute', 'classe', ', '.join(sorted(File.list_dirs(dataset)))))
......
...@@ -25,7 +25,7 @@ from util.config import Config ...@@ -25,7 +25,7 @@ from util.config import Config
from util.file_utils import File as f from util.file_utils import File as f
from util.utils import TimeUtils from util.utils import TimeUtils
from extraction.histoextraction import histoextraction from extraction.histoextraction import Histoextraction
class Act(object): class Act(object):
"""Store all actions of Pynovisao.""" """Store all actions of Pynovisao."""
...@@ -65,7 +65,7 @@ class Act(object): ...@@ -65,7 +65,7 @@ class Act(object):
self._ground_truth = False self._ground_truth = False
self._gt_segments = None self._gt_segments = None
self.csv=False; self._csv=False;
def _init_dataset(self, directory): def _init_dataset(self, directory):
...@@ -315,11 +315,12 @@ class Act(object): ...@@ -315,11 +315,12 @@ class Act(object):
def toggle_CSV(self): def toggle_CSV(self):
"""Enable/disable the option to save the softwares in CSV """Enable/disable the option to save the softwares in CSV
""" """
self.csv = not self.csv self._csv = not self._csv
def make_histogram(self): def make_histogram(self):
h=histoextraction() #method that create histogram from csv file in dataset
output_file=h.make_histogram(self.dataset,self.csv,k=10) h=Histoextraction()
output_file=h.make_histogram(self.dataset,self._csv,k=10)
self.tk.append_log("\nOutput file saved in %s", output_file) self.tk.append_log("\nOutput file saved in %s", output_file)
def select_segmenter(self): def select_segmenter(self):
...@@ -416,11 +417,10 @@ class Act(object): ...@@ -416,11 +417,10 @@ class Act(object):
"""Perform a feature extraction on all images of dataset, using the current collection of extractors. """Perform a feature extraction on all images of dataset, using the current collection of extractors.
""" """
self.tk.write_log("Running extractors on all images in %s", self.dataset) self.tk.write_log("Running extractors on all images in %s", self.dataset)
#Alexandre Cese #code from Alexandre Cese
#O feature adicionado por mim nesta parte do programa foi dar a opcao de salvar as extracoes das imagens de um #Now you can extract the all images from the dataset in arff or CSV
#dataset em CSV #It wasn't implemented the part to classify this data in CSV
#Nao foi implementado a classificacao destes dados em CSV if self._csv==False:
if self.csv==False:
fextractor = FeatureExtractor(self.extractors) fextractor = FeatureExtractor(self.extractors)
self.tk.append_log("%s", '\n'.join([extraction._extractor_list[extractor].label for extractor in extraction._extractor_list self.tk.append_log("%s", '\n'.join([extraction._extractor_list[extractor].label for extractor in extraction._extractor_list
if extraction._extractor_list[extractor].value == True ])) if extraction._extractor_list[extractor].value == True ]))
...@@ -431,26 +431,27 @@ class Act(object): ...@@ -431,26 +431,27 @@ class Act(object):
if self.classifier: self.classifier.reset() if self.classifier: self.classifier.reset()
else: else:
h=histoextraction() h=Histoextraction()
self.tk.append_log("%s", '\n'.join( self.tk.append_log("%s", '\n'.join(
[extraction._extractor_list[extractor].label for extractor in extraction._extractor_list [extraction._extractor_list[extractor].label for extractor in extraction._extractor_list
if extraction._extractor_list[extractor].value == True])) if extraction._extractor_list[extractor].value == True]))
output_file, run_time =h.extract_all_csv(dataset=self.dataset, extractors=self.extractors) output_file, run_time =h.extract_all_csv(dataset=self.dataset,extractors=self.extractors)
self.tk.append_log("\nOutput file saved in %s", output_file) self.tk.append_log("\nOutput file saved in %s", output_file)
self.tk.append_log("Time elapsed: %0.3f seconds", run_time) self.tk.append_log("Time elapsed: %0.3f seconds", run_time)
if self.classifier: self.classifier.reset() if self.classifier: self.classifier.reset()
def superpixel_extraction(self): def superpixel_extraction(self):
#Este metodo divide as imagens dos datasets em superpixels e os extrai #This method split the images in a dataset into superpixels and extracts them
h=histoextraction() #It can extract them in arff or CSV
if self.csv==False: h=Histoextraction()
if self._csv==False:
output_file, run_time =h.extract_all_superpixels_arff(self.dataset,self.segmenter,self.extractors,output_file='training') output_file, run_time =h.extract_all_superpixels_arff(self.dataset,self.segmenter,self.extractors,output_file='training')
self.tk.append_log("\nOutput file saved in %s", output_file) self.tk.append_log("\nOutput file saved in %s", output_file)
self.tk.append_log("Time elapsed: %0.3f seconds", run_time) self.tk.append_log("Time elapsed: %0.3f seconds", run_time)
if self.classifier: self.classifier.reset() if self.classifier: self.classifier.reset()
else: else:
output_file=h.extract_all_superpixels_csv(self.dataset, self.segmenter, self.extractors) output_file=h.extract_all_superpixels_csv(self.dataset, self.segmenter,self.extractors)
self.tk.append_log("\nOutput file saved in %s", output_file) self.tk.append_log("\nOutput file saved in %s", output_file)
if self.classifier: self.classifier.reset() if self.classifier: self.classifier.reset()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment