Commit 8f7cf9d1 authored by Alexandre Cese's avatar Alexandre Cese
Browse files

Classe mudada para letra maiuscula, comentarios para ingles, csv para _csv

parent e29f2ef5
......@@ -13,16 +13,17 @@ from util import File, TimeUtils
from sklearn.cluster import MiniBatchKMeans
from extraction.feature_extraction import FeatureExtractor
class histoextraction(object):
class Histoextraction(FeatureExtractor):
def __init__(self):
self.data=[]
def extract_all_superpixels_csv(self, dataset, segmenter,extractors, overwrite=True):
def extract_all_superpixels_csv(self, dataset, segmenter, extractors, overwrite=True):
'''
:String dataset: endereco do dataset
:Segmenter segmenter: segmenter tirado do pynovisao
:Extractors extractors: extrator tirado do pynovisao
:String dataset: dataset path
:Segmenter segmenter: segmenter from pynovisao
:Extractors extractors: extrator from pynovisao
:bool overwrite:
:return: Address where csv was saved
'''
......@@ -88,27 +89,22 @@ class histoextraction(object):
data.append(values)
dataframe = pd.DataFrame(data)
dataframe.to_csv(File.make_path(dataset, "data.csv"), header=False, index=False)
dataframe.to_csv(File.make_path(dataset, "Superpixels.csv"), header=False, index=False)
print "Saving data to file"
return File.make_path(dataset, "data.csv")
def extract_all_csv(self, dataset, extractors, output_file=None, dirs=None, overwrite=True):
'''
:string dataset: endereco do dataset usado
:extrator extractors: extratores do pynovisao
:String dataset: dataset path
:string output_file: endereco do arquivo final
:list dirs: nao usado
:bool overwrite: nao usado
:return: output_file, tempo de execucao
:list dirs: not used
:bool overwrite: not used
:return: output_file, run time
'''
if len(extractors) == 0:
raise IException("Please select at least one extractor")
if output_file is None:
output_file = File.get_filename(dataset)
output_file = File.make_path(dataset, output_file + '.arff')
# if already exists a output file and must not override, return current file
if overwrite == False and os.path.isfile(output_file):
return output_file, 0
......@@ -164,7 +160,8 @@ class histoextraction(object):
raise IException("There are no images in dataset: %s" % dataset)
dataframe = pd.DataFrame(data)
dataframe.to_csv(File.make_path(dataset, "data.csv"), header=False, index=False)
output_file=File.make_path(dataset, "Images.csv")
dataframe.to_csv(output_file, header=False, index=False)
print "Saving data to file"
end_time = TimeUtils.get_time()
......@@ -172,15 +169,14 @@ class histoextraction(object):
return output_file, (end_time - start_time)
def extract_all_superpixels_arff(self, dataset, segmenter,extractors,output_file=None, dirs=None, overwrite=True):
def extract_all_superpixels_arff(self, dataset, segmenter, extractors, output_file=None, dirs=None, overwrite=True):
'''
:param dataset: dataset do pynovisao
:param segmenter: segmenter do pynovisao
:param extractors: extratores do pynovisao
:param output_file: endereco do arquivo final
:param dirs: nao usado
:param overwrite: nao usado
:String dataset: dataset path
:Segmenter segmenter: segmenter from pynovisao
:String output_file: final address to file
:bool dirs: not used
:bool overwrite: not used
:return: output_file, tempo de execucao
'''
if len(extractors) == 0:
......@@ -286,85 +282,84 @@ class histoextraction(object):
def get_classes_superpixels_from_k_means(self,k, X):
#este metodo aplica o K-means pelo miniBatch kmeans e retorna as classes dos superpixels
#This method applies K-means through miniBatch kmeans and returns the superpixels classes
clusterer = MiniBatchKMeans(n_clusters=k)
clusterer.fit(X)
classes_superpixels = clusterer.predict(X)
return classes_superpixels, clusterer
def get_histogramas_de_dados(self,X_train, classes_superpixels, k):
def get_data_histogram(self,X_train, classes_superpixels, k):
'''
:DataFrame X_train: dados dos superpixels
:lista classes_superpixels: as classes de cada superpixel dada pelo K-means
:int k: k do k-means
:array[][]: histogramas_de_dados
:DataFrame X_train: Superpixels Data
:list classes_superpixels: the classes each superpixel got from the k-means
:int k: k from k-means
:array[][]: data_histogram
'''
X_train_classes = list(set(X_train['class']))
histogramas_de_dados = []
data_histogram = []
total = len(X_train_classes)
for cl in X_train_classes: # Para cada classe no X_train
lista_cl = X_train[X_train['class'] == cl] ##tabela com a classe expecifica
lista_arq = set(lista_cl['file']) ##tabela com o arquivos de uma classe especifica
for arq in lista_arq: ##Para cada arquivo na tabela de arquivos daquela classe especifica
h = [0] * (k+2) ##cria o vetor para o novo histograma para a imagem
tab = lista_cl[lista_cl['file'] == arq] ##criamos a tabela de um arquivo so
for index, row in tab.iterrows(): ##para cada superpixel da imagem
for cl in X_train_classes: # for each class in X_train
lista_cl = X_train[X_train['class'] == cl] ##array with specific class
file_list = set(lista_cl['file']) ##array with files from specific class
for arq in file_list: ##for each file in array with files from specific class
h = [0] * (k+2) ##creates the array for the new image histogram
tab = lista_cl[lista_cl['file'] == arq] ##creates the table for one file only
for index, row in tab.iterrows(): ##for each superpixel in image
h[classes_superpixels[index]] = h[classes_superpixels[
index]] + 1; ##adiciona a classe do superpixel no
##histograma
index]] + 1; ##adds the superpixels class in histogram
h[k]=arq
h[k+1]=cl
histogramas_de_dados.append(h) ##add o histograma pra lista de histogramas
data_histogram.append(h) ##adds the histogram to the histogram list
return histogramas_de_dados
return data_histogram
def norm_data(self,X_csv):
#Este metodo normaliza os dados dos histogramas
#necessario para fazer o k-means do sklearn
#This void method normalizes the image data
#necessary to make the k-means work
scaler_KM = StandardScaler()
X_csv_norm = scaler_KM.fit_transform(X_csv.iloc[:, 0:-2])
return X_csv_norm
def make_histogram(self,dataset,csv,k):
k=10
data=pd.read_csv(File.make_path(dataset,'data.csv'))
data=pd.read_csv(File.make_path(dataset,'Superpixels.csv'))
data_norm=self.norm_data(data)
# Gerar as classes_superpixels
# Generates the superpixel classes
classes_superpixels, clusterer = self.get_classes_superpixels_from_k_means(k, data_norm)
# Gera o histograma de dados
histogramas_de_dados = self.get_histogramas_de_dados(data, classes_superpixels,k)
# Generate the data_histogram
data_histogram = self.get_data_histogram(data, classes_superpixels,k)
if csv==False:
path=self.save_histogram(dataset,histogramas_de_dados,k=k)
path=self.save_histogram(dataset,data_histogram,k=k)
return path
else:
hist=pd.DataFrame(histogramas_de_dados)
output_file=File.make_path(dataset, "histograma.csv")
hist=pd.DataFrame(data_histogram)
output_file=File.make_path(dataset, "histograma_"+str(k)+".csv")
hist.to_csv(output_file, header=False, index=False)
print "Saving data to file"
return output_file
def save_histogram(self,dataset,imagens_histograma,k):
def save_histogram(self, dataset, images_histogram, k):
'''
:param dataset: endereco do dataset
:param imagens_histograma: BoS de cada imagem
:param k: numero de posicoes no histograma de superpixels
:return:training_file - Endereco do arquivo final
:param dataset: dataset path
:param images_histogram: each image's histogram
:param k: number of slots in histogram of superpixels
:return:training_file - address to final file
'''
training_file=File.make_path(dataset, "training_histograma_" + str(k) + ".arff")
values=""
for i,inst in enumerate(imagens_histograma):
for i,inst in enumerate(images_histogram):
for i in range(0,len(inst)-2):
values=values+str(inst[i])+","
values=values+str(inst[len(inst)-1])+"\n"
......@@ -375,7 +370,7 @@ class histoextraction(object):
arff.write("%s \n\n" % ('@relation histogram'))
for i in range(0,len(imagens_histograma[0])-2):
for i in range(0, len(images_histogram[0])-2):
arff.write("%s %s %s\n" % ('@attribute', str(i),'numeric'))
arff.write("%s %s {%s}\n\n" % ('@attribute', 'classe', ', '.join(sorted(File.list_dirs(dataset)))))
......
......@@ -25,7 +25,7 @@ from util.config import Config
from util.file_utils import File as f
from util.utils import TimeUtils
from extraction.histoextraction import histoextraction
from extraction.histoextraction import Histoextraction
class Act(object):
"""Store all actions of Pynovisao."""
......@@ -65,7 +65,7 @@ class Act(object):
self._ground_truth = False
self._gt_segments = None
self.csv=False;
self._csv=False;
def _init_dataset(self, directory):
......@@ -315,11 +315,12 @@ class Act(object):
def toggle_CSV(self):
"""Enable/disable the option to save the softwares in CSV
"""
self.csv = not self.csv
self._csv = not self._csv
def make_histogram(self):
h=histoextraction()
output_file=h.make_histogram(self.dataset,self.csv,k=10)
#method that create histogram from csv file in dataset
h=Histoextraction()
output_file=h.make_histogram(self.dataset,self._csv,k=10)
self.tk.append_log("\nOutput file saved in %s", output_file)
def select_segmenter(self):
......@@ -416,11 +417,10 @@ class Act(object):
"""Perform a feature extraction on all images of dataset, using the current collection of extractors.
"""
self.tk.write_log("Running extractors on all images in %s", self.dataset)
#Alexandre Cese
#O feature adicionado por mim nesta parte do programa foi dar a opcao de salvar as extracoes das imagens de um
#dataset em CSV
#Nao foi implementado a classificacao destes dados em CSV
if self.csv==False:
#code from Alexandre Cese
#Now you can extract the all images from the dataset in arff or CSV
#It wasn't implemented the part to classify this data in CSV
if self._csv==False:
fextractor = FeatureExtractor(self.extractors)
self.tk.append_log("%s", '\n'.join([extraction._extractor_list[extractor].label for extractor in extraction._extractor_list
if extraction._extractor_list[extractor].value == True ]))
......@@ -431,26 +431,27 @@ class Act(object):
if self.classifier: self.classifier.reset()
else:
h=histoextraction()
h=Histoextraction()
self.tk.append_log("%s", '\n'.join(
[extraction._extractor_list[extractor].label for extractor in extraction._extractor_list
if extraction._extractor_list[extractor].value == True]))
output_file, run_time =h.extract_all_csv(dataset=self.dataset, extractors=self.extractors)
output_file, run_time =h.extract_all_csv(dataset=self.dataset,extractors=self.extractors)
self.tk.append_log("\nOutput file saved in %s", output_file)
self.tk.append_log("Time elapsed: %0.3f seconds", run_time)
if self.classifier: self.classifier.reset()
def superpixel_extraction(self):
#Este metodo divide as imagens dos datasets em superpixels e os extrai
h=histoextraction()
if self.csv==False:
#This method split the images in a dataset into superpixels and extracts them
#It can extract them in arff or CSV
h=Histoextraction()
if self._csv==False:
output_file, run_time =h.extract_all_superpixels_arff(self.dataset,self.segmenter,self.extractors,output_file='training')
self.tk.append_log("\nOutput file saved in %s", output_file)
self.tk.append_log("Time elapsed: %0.3f seconds", run_time)
if self.classifier: self.classifier.reset()
else:
output_file=h.extract_all_superpixels_csv(self.dataset, self.segmenter, self.extractors)
output_file=h.extract_all_superpixels_csv(self.dataset, self.segmenter,self.extractors)
self.tk.append_log("\nOutput file saved in %s", output_file)
if self.classifier: self.classifier.reset()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment