#!/usr/bin/python # -*- coding: utf-8 -*- # """ Runs feature extraction algorithms. Name: feature_extractor.py Author: Alessandro dos Santos Ferreira ( santosferreira.alessandro@gmail.com ) """ import io import itertools import os from interface.interface import InterfaceException as IException from util.file_utils import File from util.utils import ImageUtils from util.utils import TimeUtils from extractor import Extractor class FeatureExtractor(object): """Handle the feature extraction.""" def __init__(self, extractors): """Constructor. Parameters ---------- extractor : list of Extractor Initial set of active extractors. """ self.extractors = extractors def extract_all(self, dataset, output_file = None, dirs = None, overwrite = True): """Runs the feature extraction algorithms on all images of dataset. Parameters ---------- dataset : string Path to dataset. output_file : string, optional, default = None Name of output file continaing the features. If not informed is considered the name of dataset. dirs : list of string, optional, default = None List of directories to be serched. If not informed search in all directories with images inside dataset. overwrite : boolean, optional, default = True If False check if already exists a file containing the features. Returns ------- out : tuple Returns a tuple containing the name of output file and time spent in milliseconds. Raises ------ IException 'Please select at least one extractor' Empty list of extractors. IException 'Image %s is possibly corrupt' Error opening some image inside dataset. IException 'There are no images in dataset: %s' Dataset does not contain any image. """ if len(self.extractors) == 0: raise IException("Please select at least one extractor") if output_file is None: output_file = File.get_filename(dataset) output_file = File.make_path(dataset, output_file + '.arff') # if already exists a output file and must not override, return current file if overwrite == False and os.path.isfile(output_file): return output_file, 0 start_time = TimeUtils.get_time() classes = sorted(File.list_dirs(dataset)) dirs = classes if dirs is None else dirs data = [] # Runs the feature extraction for all classes inside the dataset for cl in dirs: items = sorted(os.listdir( File.make_path(dataset, cl))) print("Processing class %s - %d itens" % (cl, len(items))) for item in items: if item.startswith('.'): continue try: filepath = File.make_path(dataset, cl, item) image = File.open_image(filepath, rgb = False ) except: raise IException("Image %s is possibly corrupt" % filepath) if len(data) > 0: values = list(itertools.chain.from_iterable(zip(*([extractor().run(image) for extractor in self.extractors]))[2] )) else: labels, types, values = [ list(itertools.chain.from_iterable(ret)) for ret in zip(*([extractor().run(image) for extractor in self.extractors])) ] data.append(values + [cl if cl in classes else classes[0]]) if len(data) == 0: raise IException("There are no images in dataset: %s" % dataset) # Save the output file in ARFF format self._save_output(File.get_filename(dataset), classes, labels, types, data, output_file) end_time = TimeUtils.get_time() return output_file, (end_time - start_time) def extract_one_file(self, dataset, image_path, output_file = None): """Runs the feature extraction algorithms on specific image. Parameters ---------- dataset : string Path to dataset. image_path : string Path to image. output_file : string, optional, default = None Name of output file continaing the features. If not informed is considered the name of dataset. Returns ------- out : tuple Returns a tuple containing the name of output file and time spent in milliseconds. Raises ------ IException 'Please select at least one extractor' Empty list of extractors. IException 'Image %s is possibly corrupt' Error opening image. """ if len(self.extractors) == 0: raise IException("Please select at least one extractor") if output_file is None: output_file = File.get_filename(dataset) output_file = File.make_path(dataset, output_file + '.arff') classes = sorted(File.list_dirs(dataset)) start_time = TimeUtils.get_time() try: image = File.open_image(image_path, rgb = False ) except: raise IException("Image %s is possibly corrupt" % filepath) labels, types, values = [ list(itertools.chain.from_iterable(ret)) for ret in zip(*([extractor().run(image) for extractor in self.extractors])) ] self._save_output(File.get_filename(dataset), classes, labels, types, [values + [classes[0]]], output_file) end_time = TimeUtils.get_time() return output_file, (end_time - start_time) def _save_output(self, relation, classes, labels, types, data, output_file): """Save output file in ARFF format. Parameters ---------- relation : string Name of relation. classes : list of string List of classes names. labels : list of string List of attributes names. types : list of string List of attributes types. data : list of list of string List of instances. output_file : string Path to output file. """ arff = open(output_file,'wb') arff.write("%s %s\n\n" % ('@relation', relation)) for label, t in zip(labels, types): arff.write("%s %s %s\n" % ('@attribute', label, t)) arff.write("%s %s {%s}\n\n" % ('@attribute','classe',', '.join(classes))) arff.write('@data\n\n') for instance in data: instance = map(str, instance) line = ",".join(instance) arff.write(line+"\n") arff.close()