Commit 3b33e917 authored by Gabriel Kirsten's avatar Gabriel Kirsten

added pseudo label

parent d424033a
......@@ -23,6 +23,12 @@ except Exception as e:
CNNKeras = None
print e.message
try:
from .cnn_pseudo_label_keras import CNNPseudoLabel
except Exception as e:
CNNPseudoLabel = None
print e.message
try:
from .segnet_keras import SEGNETKeras
......@@ -34,6 +40,7 @@ except Exception as e:
__all__ = ["classifier",
"cnn_caffe",
"cnn_keras",
"cnn_pseudo_label_keras",
"segnet_keras",
"weka_classifiers",
"syntactic"
......@@ -49,6 +56,8 @@ _classifier_list = OrderedDict( [
WekaClassifiers is None and CNNCaffe is not None, bool, meta=CNNCaffe, hidden=CNNCaffe is None)],
["cnn_keras", Config("Invalid" if CNNKeras is None else CNNKeras.__name__,
CNNKeras is not None, bool, meta=CNNKeras, hidden=CNNKeras is None)],
["cnn_pseudo_label_keras", Config("Invalid" if CNNPseudoLabel is None else CNNPseudoLabel.__name__,
CNNPseudoLabel is not None, bool, meta=CNNPseudoLabel, hidden=CNNPseudoLabel is None)],
["segnet_keras", Config("Invalid" if SEGNETKeras is None else SEGNETKeras.__name__,
SEGNETKeras is not None, bool, meta=SEGNETKeras, hidden=SEGNETKeras is None)],
["weka_classifiers", Config("Invalid" if WekaClassifiers is None else WekaClassifiers.__name__,
......@@ -63,6 +72,7 @@ def get_classifier_config():
def set_classifier_config(configs):
_classifier_list["cnn_caffe"] = Config.nvl_config(configs["cnn_caffe"], _classifier_list["cnn_caffe"])
_classifier_list["cnn_keras"] = Config.nvl_config(configs["cnn_keras"], _classifier_list["cnn_keras"])
_classifier_list["cnn_pseudo_label_keras"] = Config.nvl_config(configs["cnn_pseudo_label_keras"], _classifier_list["cnn_pseudo_label_keras"])
_classifier_list["segnet_keras"] = Config.nvl_config(configs["segnet_keras"], _classifier_list["segnet_keras"])
_classifier_list["weka_classifiers"] = Config.nvl_config(configs["weka_classifiers"], _classifier_list["weka_classifiers"])
_classifier_list["syntactic"] = Config.nvl_config(configs["syntactic"], _classifier_list["syntactic"])
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
"""
Pseudo label classifier with multiple models
Models -> (Xception, VGG16, VGG19, ResNet50, InceptionV3, MobileNet)
Name: cnn_keras.py
Author: Gabriel Kirsten Menezes (gabriel.kirsten@hotmail.com)
"""
import time
import os
import shutil
import random
import numpy as np
import json
import logging
import sys
from PIL import Image
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Model, load_model
from keras.layers import Dropout, Flatten, Dense
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import backend as K
from numpy import resize, expand_dims
from keras.preprocessing.image import load_img, img_to_array
from interface.interface import InterfaceException as IException
from classification.classifier import Classifier
from classification.pseudo_label import PseudoLabel
from collections import OrderedDict
from util.config import Config
from util.file_utils import File
from util.utils import TimeUtils
logger = logging.getLogger('PIL')
logger.setLevel(logging.WARNING)
# =========================================================
# Constants
# =========================================================
IMG_WIDTH, IMG_HEIGHT = 256, 256
weight_path = None
START_TIME = time.time()
class CNNPseudoLabel(Classifier):
""" Class for CNN classifiers based on Keras applications """
def __init__(self,
architecture="ResNet50",
learning_rate=0.001,
momentum=0.9,
batch_size=32,
epochs=50,
fine_tuning_rate=100,
transfer_learning=False,
save_weights=True,
perc_train=80,
perc_validation=20,
recreate_dataset=False,
train_data_directory="",
validation_data_directory="",
test_data_directory="",
no_label_data_directory=""):
"""
Constructor of CNNKeras
"""
self.architecture = Config(
"Architecture", architecture, str)
self.learning_rate = Config(
"Learning rate", learning_rate, float)
self.momentum = Config(
"Momentum", momentum, float)
self.batch_size = Config(
"Batch size", batch_size, int)
self.epochs = Config(
"Epochs", epochs, int)
self.fine_tuning_rate = Config(
"Fine Tuning Rate", fine_tuning_rate, int)
self.transfer_learning = Config(
"Transfer Learning", transfer_learning, bool)
self.save_weights = Config(
"Save weights", save_weights, bool)
self.perc_train = Config(
"Perc Train", perc_train, float)
self.perc_validation = Config(
"Perc Validation", perc_validation, float)
self.recreate_dataset = Config(
"Recreate Dataset", recreate_dataset, bool)
self.train_data_directory = Config(
"Train data directory", train_data_directory, str)
self.validation_data_directory=Config(
"Validation data directory", validation_data_directory, str)
self.test_data_directory=Config(
"Test data directory", test_data_directory, str)
self.no_label_data_directory=Config(
"No label data directory", no_label_data_directory, str)
self.model=None
self.trained=False
def get_config(self):
"""Return configuration of classifier.
Returns
-------
config : OrderedDict
Current configs of classifier.
"""
keras_config=OrderedDict()
keras_config["Architecture"]=self.architecture
keras_config["Learning rate"]=self.learning_rate
keras_config["Momentum"]=self.momentum
keras_config["Batch size"]=self.batch_size
keras_config["Epochs"]=self.epochs
keras_config["Fine Tuning rate"]=self.fine_tuning_rate
keras_config["Transfer Learning"]=self.transfer_learning
keras_config["Save weights"]=self.save_weights
keras_config["Perc Train"]=self.perc_train
keras_config["Perc Validation"]=self.perc_validation
keras_config["Recreate Dataset"]=self.recreate_dataset
keras_config["Train data directory"]=self.train_data_directory
keras_config["Validation data directory"]=self.validation_data_directory
keras_config["Test data directory"]=self.test_data_directory
keras_config["No label data directory"]=self.no_label_data_directory
return keras_config
def set_config(self, configs):
"""Update configuration of classifier.
Parameters
----------
configs : OrderedDict
New configs of classifier.
"""
self.architecture=Config.nvl_config(
configs["Architecture"], self.architecture)
self.learning_rate=Config.nvl_config(
configs["Learning rate"], self.learning_rate)
self.momentum=Config.nvl_config(configs["Momentum"], self.momentum)
self.batch_size=Config.nvl_config(
configs["Batch size"], self.batch_size)
self.epochs=Config.nvl_config(configs["Epochs"], self.epochs)
self.fine_tuning_rate=Config.nvl_config(
configs["Fine Tuning rate"], self.fine_tuning_rate)
self.transfer_learning=Config.nvl_config(
configs["Transfer Learning"], self.transfer_learning)
self.save_weights=Config.nvl_config(
configs["Save weights"], self.save_weights)
self.perc_train=Config.nvl_config(
configs["Perc Train"], self.perc_train)
self.perc_validation=Config.nvl_config(
configs["Perc Validation"], self.perc_validation)
self.recreate_dataset=Config.nvl_config(
configs["Recreate Dataset"], self.recreate_dataset)
self.train_data_directory = Config.nvl_config(
configs["Train data directory"], self.train_data_directory)
self.validation_data_directory=Config.nvl_config(
configs["Validation data directory"], self.validation_data_directory)
self.test_data_directory=Config.nvl_config(
configs["Test data directory"], self.test_data_directory)
self.no_label_data_directory=Config.nvl_config(
configs["No label data directory"], self.no_label_data_directory)
def get_summary_config(self):
"""Return fomatted summary of configuration.
Returns
-------
summary : string
Formatted string with summary of configuration.
"""
keras_config=OrderedDict()
keras_config[self.architecture.label]=self.architecture.value
keras_config[self.learning_rate.label]=self.learning_rate.value
keras_config[self.momentum.label]=self.momentum.value
keras_config[self.batch_size.label]=self.batch_size.value
keras_config[self.epochs.label]=self.epochs.value
keras_config[self.fine_tuning_rate.label]=self.fine_tuning_rate.value
keras_config[self.transfer_learning.label]=self.transfer_learning.value
keras_config[self.save_weights.label]=self.save_weights.value
keras_config[self.perc_train.label]=self.perc_train.value
keras_config[self.perc_validation.label]=self.perc_validation.value
keras_config[self.recreate_dataset.label]=self.recreate_dataset.value
keras_config[self.train_data_directory.label]=self.train_data_directory.value
keras_config[self.validation_data_directory.label]=self.validation_data_directory.value
keras_config[self.test_data_directory.label]=self.test_data_directory.value
keras_config[self.no_label_data_directory.label]=self.no_label_data_directory.value
summary=''
for config in keras_config:
summary += "%s: %s\n" % (config, str(keras_config[config]))
return summary
def classify(self, dataset, test_dir, test_data, image):
""""Perform the classification.
Parameters
----------
dataset : string
Path to image dataset.
test_dir : string
Not used.
test_data : string
Name of test data file.
Returns
-------
summary : list of string
List of predicted classes for each instance in test data in ordered way.
"""
predict_directory=File.make_path(dataset, test_dir)
# Create a Keras class
if not os.path.exists(File.make_path(predict_directory, "png")):
os.makedirs(File.make_path(predict_directory, "png"))
for file in os.listdir(predict_directory):
print(File.make_path(predict_directory, file))
if os.path.splitext(file)[-1] == ".tif":
try:
img=Image.open(File.make_path(predict_directory, file))
new_file=os.path.splitext(file)[0] + ".png"
img.save(File.make_path(predict_directory,
'png', new_file), "PNG", quality=100)
except Exception, e:
print e
else:
print File.make_path(predict_directory, file)
os.symlink(File.make_path(predict_directory, file),
File.make_path(predict_directory, 'png', file))
classify_datagen=ImageDataGenerator()
classify_generator=classify_datagen.flow_from_directory(
File.make_path(predict_directory, 'png'),
taet_size=(IMG_HEIGHT, IMG_WIDTH),
batch_size=1,
shuffle=False,
class_mode=None)
try:
K.clear_session()
if self.weight_path is not None:
self.model=load_model(self.weight_path)
path_classes=self.weight_path.replace(
"_model.h5", "_classes.npy")
CLASS_NAMES=np.load(path_classes).item().keys()
except Exception, e:
raise IException("Can't load the model in " +
self.weight_path + str(e))
output_classification=self.model.predict_generator(
classify_generator, classify_generator.samples, verbose=2)
one_hot_output=np.argmax(output_classification, axis=1)
one_hot_output=one_hot_output.tolist()
for index in range(0, len(one_hot_output)):
one_hot_output[index]=CLASS_NAMES[one_hot_output[index]]
return one_hot_output
def train(self, dataset, training_data, force=False):
"""Perform the training of classifier.
Parameters
----------
dataset : string
Path to image dataset.
training_data : string
Name of ARFF training file.
force : boolean, optional, default = False
If False don't perform new training if there is trained data.
"""
pseudo_label=PseudoLabel(image_width=IMG_WIDTH,
image_height=IMG_HEIGHT,
image_channels=3,
train_data_directory=self.train_data_directory.value,
validation_data_directory=self.validation_data_directory.value,
test_data_directory=self.test_data_directory.value,
no_label_data_directory=self.no_label_data_directory.value,
epochs=self.epochs.value,
batch_size=self.batch_size.value,
pseudo_label_batch_size=self.batch_size.value*2,
transfer_learning={
'use_transfer_learning': self.transfer_learning.value,
'fine_tuning': self.fine_tuning_rate.value
},
architecture=self.architecture.value,
alpha=0.1)
self.model=pseudo_label.model
pseudo_label.fit_with_pseudo_label(
steps_per_epoch=pseudo_label.train_generator.samples // self.batch_size.value,
validation_steps=pseudo_label.validation_generator.samples // self.batch_size.value)
def must_train(self):
"""Return if classifier must be trained.
Returns
-------
True
"""
return not self.trained
def must_extract_features(self):
"""Return if classifier must be extracted features.
Returns
-------
False
"""
return False
def single_classify(self, image_path, directory, extractors, dict_classes):
preprocess_input, decode_predictions=dict_preprocessing[self.app]
pil_image=load_img(image_path)
np_image=img_to_array(pil_image)
res_image=resize(np_image, (IMG_HEIGHT, IMG_WIDTH, 3))
tensor=expand_dims(res_image, axis=0)
tensor=preprocess_input(tensor)
predict=self.model.predict(tensor)
predict=np.argmax(predict, axis=1)
return dict_classes[predict[0]]
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import warnings
import numpy as np
from keras import applications
from keras import backend as K
from keras import callbacks as cbks
from keras.applications import (VGG16, VGG19, InceptionV3, MobileNet, ResNet50,
Xception)
from keras.callbacks import ModelCheckpoint
from keras.layers import Dense, Dropout, Flatten
from keras.models import Model
from keras.optimizers import (SGD, Adadelta, Adagrad, Adam, Adamax, Nadam,
RMSprop, TFOptimizer)
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import GeneratorEnqueuer, OrderedEnqueuer, Sequence
# CONSTANTS
LIST_OF_ACCEPTABLES_ARCHITECTURES = {
'Xception': Xception,
'VGG16': VGG16,
'VGG19': VGG19,
'ResNet50': ResNet50,
'InceptionV3': InceptionV3,
'MobileNet': MobileNet
}
LIST_OF_ACCEPTABLES_OPTIMIZERS = {
'SGD': SGD,
'Adagrad': Adagrad,
'RMSprop': RMSprop,
'Adadelta': Adadelta,
'Adam': Adam,
'Adamax': Adamax,
'TFOptimizer': TFOptimizer
}
LIST_OF_ACCEPTABLES_METRICS = [
'acc',
'accuracy',
'binary_accuracy',
'categorical_accuracy',
'sparse_categorical_accuracy',
'top_k_categorical_accuracy',
'sparse_top_k_categorical_accuracy'
]
class PseudoLabel:
"""
Pseudo-label Class
"""
def __init__(self,
image_width=256,
image_height=256,
train_data_directory="../data/train",
validation_data_directory="../data/validation",
test_data_directory="../data/test",
no_label_data_directory="../data/no_label",
batch_size=8,
pseudo_label_batch_size=16,
epochs=1,
architecture="VGG16",
image_channels=3,
learnin_rate=0.001,
save_heights=False,
transfer_learning={'use_transfer_learning': False,
'fine_tuning': None},
optimizer='SGD',
metrics_list=['acc'],
h5_filename=None,
class_labels=None,
alpha=0.5,
print_pseudo_generate=True ,
disconsider_no_label=False):
"""
Pseudo-label class construtor
"""
# Atributes declarations
self.image_width = image_width
self.image_height = image_height
self.train_data_directory = train_data_directory
self.validation_data_directory = validation_data_directory
self.test_data_directory = test_data_directory
self.no_label_data_directory = no_label_data_directory
self.batch_size = batch_size
self.pseudo_label_batch_size = pseudo_label_batch_size
self.epochs = epochs
self.architecture = architecture
self.image_channels = image_channels
self.learning_rate = learnin_rate
self.use_transfer_learning = transfer_learning.get('use_transfer_learning')
self.fine_tuning_rate = transfer_learning.get('fine_tuning')
self.optimizer = optimizer
self.metrics_list = metrics_list
self.model = None
self.train_generator = None
self.validation_generator = None
self.h5_filename = h5_filename
self.class_labels = class_labels
self.alpha = alpha
self.print_pseudo_generate = print_pseudo_generate
self.disconsider_no_label = disconsider_no_label
# Make your model and dataset
self.make_data_generators()
self.make_model(architecture=self.architecture,
use_transfer_learning=self.use_transfer_learning,
fine_tuning_rate=self.fine_tuning_rate,
optimizer=self.optimizer,
metrics_list=self.metrics_list)
self.generate_h5_filename()
def make_model(self,
architecture=None,
use_transfer_learning=False,
fine_tuning_rate=None,
optimizer='SGD',
metrics_list=['accuracy']):
"""
Create your CNN keras model
Arguments:
architecture (str): architecture of model
"""
# Validations
for metric in metrics_list:
if metric not in LIST_OF_ACCEPTABLES_METRICS:
raise ValueError("The specified metric \'" +
metric + "\' is not supported")
if optimizer not in LIST_OF_ACCEPTABLES_OPTIMIZERS.keys():
raise ValueError("The specified optimizer \'" +
optimizer + "\' is not supported!")
if architecture not in LIST_OF_ACCEPTABLES_ARCHITECTURES.keys():
raise ValueError("The specified architecture \'" +
architecture + "\' is not supported!")
else:
if use_transfer_learning and not 0 <= fine_tuning_rate <= 100:
raise ValueError("The fine tuning rate must be beetween 0 and 100!")
if use_transfer_learning and fine_tuning_rate == None:
raise ValueError(
"You need to specify a fine tuning rate if you're using transfer learning!")
# With transfer learning
if use_transfer_learning:
self.model = LIST_OF_ACCEPTABLES_ARCHITECTURES.get(architecture)(
weights="imagenet",
include_top=False,
input_shape=(self.image_height, self.image_width, self.image_channels))
last_layers = len(self.model.layers) - \
int(len(self.model.layers) * (fine_tuning_rate / 100.))
for layer in self.model.layers[:last_layers]:
layer.trainable = False
# Without transfer learning
else:
self.model = LIST_OF_ACCEPTABLES_ARCHITECTURES.get(architecture)(
weights=None,
include_top=False,
input_shape=(self.image_height, self.image_width, self.image_channels))
for layer in self.model.layers:
layer.trainable = True
# Adding the custom Layers
new_custom_layers = self.model.output
new_custom_layers = Flatten()(new_custom_layers)
new_custom_layers = Dense(1024, activation="relu")(new_custom_layers)
new_custom_layers = Dropout(0.5)(new_custom_layers)
new_custom_layers = Dense(1024, activation="relu")(new_custom_layers)
try:
predictions = Dense(self.train_generator.num_classes,
activation="softmax")(new_custom_layers)
except AttributeError:
predictions = Dense(self.train_generator.num_class,
activation="softmax")(new_custom_layers)
# Create the final model
self.model = Model(inputs=self.model.input, outputs=predictions)
# Compile model
self.model.compile(loss=self.pseudo_label_loss_function,
optimizer=LIST_OF_ACCEPTABLES_OPTIMIZERS.get(optimizer)(
lr=self.learning_rate
),
metrics=metrics_list)
def pseudo_label_loss_function(self, y_true, y_pred):
loss_true_label = self.cross_entropy(y_true[:self.batch_size], y_pred[:self.batch_size])
loss_pseudo_label = (self.cross_entropy(y_true[self.batch_size:], y_pred[self.batch_size:]))
return (loss_true_label/self.batch_size) + (self.alpha * (loss_pseudo_label/self.pseudo_label_batch_size))
def cross_entropy(self, targets, predictions, epsilon=1e-12):
predictions = K.clip(predictions, epsilon, 1. - epsilon)
N = predictions.shape[0]
log1 = K.log(predictions+1e-9)
sum1 = K.sum(targets*log1)
if(predictions.shape[0].value is not None):
return -K.sum(sum1)/N
else:
return -K.sum(sum1)
def make_data_generators(self, use_data_augmentation=False):
"""
Function that initiate the train, validation and test generators with data augumentation
"""
self.train_generator = ImageDataGenerator().flow_from_directory(
self.train_data_directory,
target_size=(self.image_height, self.image_width),
color_mode='rgb',
classes=self.class_labels,
batch_size=self.batch_size,
shuffle=True,
class_mode="categorical")
self.test_generator = ImageDataGenerator().flow_from_directory(
self.test_data_directory,
target_size=(self.image_height, self.image_width),
color_mode='rgb',
batch_size=1,
shuffle=False,
class_mode="categorical")
self.validation_generator = ImageDataGenerator().flow_from_directory(
self.validation_data_directory,
target_size=(self.image_height, self.image_width),
color_mode='rgb',
batch_size=self.batch_size,
shuffle=True,
class_mode="categorical")
try:
self.no_label_generator = ImageDataGenerator().flow_from_directory(
self.no_label_data_directory,
target_size=(self.image_height, self.image_width),
color_mode='rgb',
batch_size=self.pseudo_label_batch_size,
shuffle=False,
class_mode="categorical")
try:
self.no_label_generator.num_classes = self.validation_generator.num_classes
except AttributeError:
self.no_label_generator.num_class = self.validation_generator.num_class
except:
self.no_label_generator = None
def generate_h5_filename(self):
"""
Generate the .h5 filename. The .h5 file is the file that contains your trained model
"""
if self.fine_tuning_rate == 100:
self.h5_filename = self.architecture + \
'_transfer_learning'
elif self.fine_tuning_rate == None:
self.h5_filename = self.architecture + \
'_without_transfer_learning'
else:
self.h5_filename = self.architecture + \
'_fine_tunning_' + str(self.fine_tuning_rate)
################################################################################
# Semi-supervised - Pseudo label approach
################################################################################
def fit_with_pseudo_label(self,
steps_per_epoch,
validation_steps=None,
use_checkpoints=True,
class_labels=None,
verbose=1,
use_multiprocessing=False,
shuffle=False,
workers=1,
max_queue_size=10):