cnn_keras.py 17.7 KB
Newer Older
1 2 3 4 5 6
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
"""
    Generic classifier with multiple models
    Models -> (Xception, VGG16, VGG19, ResNet50, InceptionV3, MobileNet)
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
7

8 9 10 11 12 13
    Name: cnn_keras.py
    Author: Gabriel Kirsten Menezes (gabriel.kirsten@hotmail.com)

"""
import time
import os
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
14 15
import shutil
import random
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
16 17
import numpy as np
from PIL import Image
18 19 20 21 22 23
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Model
from keras.layers import Dropout, Flatten, Dense
from keras.callbacks import ModelCheckpoint
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
24
from sklearn.cross_validation import train_test_split
25

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
26
from interface.interface import InterfaceException as IException
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
27
from classification.classifier import Classifier
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42

from collections import OrderedDict

from util.config import Config
from util.file_utils import File
from util.utils import TimeUtils

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress warnings
START_TIME = time.time()

# =========================================================
# Constants
# =========================================================

IMG_WIDTH, IMG_HEIGHT = 256, 256
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
43 44
CLASS_NAMES = ['FolhasLargas', 'Gramineas',
               'Soja', 'Solo']
45

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
46

47
class CNNKeras(Classifier):
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
48 49
    """ Class for CNN classifiers based on Keras applications """

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
50
    def __init__(self, architecture="VGG16", learning_rate=0.0001, momentum=0.9, batch_size=16, epochs=1, fine_tuning_rate=100, transfer_learning=False, save_weights=True):
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
51 52 53 54
        """
            Constructor of CNNKeras
        """

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
        self.architecture = Config(
            "Architecture", architecture, str)
        self.learning_rate = Config(
            "Learning rate", learning_rate, float)
        self.momentum = Config(
            "Momentum", momentum, float)
        self.batch_size = Config(
            "Batch size", batch_size, int)
        self.epochs = Config(
            "Epochs", epochs, int)
        self.fine_tuning_rate = Config(
            "Fine Tuning Rate", fine_tuning_rate, int)
        self.transfer_learning = Config(
            "Transfer Learning", transfer_learning, bool)
        self.save_weights = Config(
            "Save weights", save_weights, bool)
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
71 72

        self.file_name = "kerasCNN"
73

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
74
        self.model = None
75

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
76
        self.trained = False
77 78

    def get_config(self):
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
79
        """Return configuration of classifier.
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
80

81 82 83 84 85
        Returns
        -------
        config : OrderedDict
            Current configs of classifier.
        """
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
86 87
        keras_config = OrderedDict()

88 89 90 91 92 93 94 95
        keras_config["Architecture"] = self.architecture
        keras_config["Learning rate"] = self.learning_rate
        keras_config["Momentum"] = self.momentum
        keras_config["Batch size"] = self.batch_size
        keras_config["Epochs"] = self.epochs
        keras_config["Fine Tuning rate"] = self.fine_tuning_rate
        keras_config["Transfer Learning"] = self.transfer_learning
        keras_config["Save weights"] = self.save_weights
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
96 97

        return keras_config
98 99

    def set_config(self, configs):
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
100
        """Update configuration of classifier.
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
101

102 103 104 105 106
        Parameters
        ----------
        configs : OrderedDict
            New configs of classifier.
        """
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
107 108
        keras_config = OrderedDict()

109 110
        keras_config["Architecture"] = Config.nvl_config(
            configs["Architecture"], self.architecture)
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
111

112 113
        keras_config["Learning rate"] = Config.nvl_config(
            configs["Learning rate"], self.learning_rate)
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
114

115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
        keras_config["Momentum"] = Config.nvl_config(
            configs["Momentum"], self.momentum)

        keras_config["Batch size"] = Config.nvl_config(
            configs["Batch size"], self.batch_size)

        keras_config["Epochs"] = Config.nvl_config(
            configs["Epochs"], self.epochs)

        keras_config["Fine Tuning rate"] = Config.nvl_config(
            configs["Fine Tuning rate"], self.fine_tuning_rate)

        keras_config["Transfer Learning"] = Config.nvl_config(
            configs["Transfer Learning"], self.transfer_learning)

        keras_config["Save weights"] = Config.nvl_config(
            configs["Save weights"], self.save_weights)
132 133

    def get_summary_config(self):
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
134
        """Return fomatted summary of configuration.
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
135

136 137 138 139 140
        Returns
        -------
        summary : string
            Formatted string with summary of configuration.
        """
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
141
        keras_config = OrderedDict()
142

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
143
        keras_config[self.architecture.label] = self.architecture.value
144 145 146 147
        keras_config[self.learning_rate.label] = self.learning_rate.value
        keras_config[self.momentum.label] = self.momentum.value
        keras_config[self.batch_size.label] = self.batch_size.value
        keras_config[self.epochs.label] = self.epochs.value
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
148
        keras_config[self.fine_tuning_rate.label] = self.fine_tuning_rate.value
149 150
        keras_config[self.transfer_learning.label] = self.transfer_learning.value
        keras_config[self.save_weights.label] = self.save_weights.value
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
151 152 153 154

        summary = ''
        for config in keras_config:
            summary += "%s: %s\n" % (config, str(keras_config[config]))
155

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
156
        return summary
157 158

    def classify(self, dataset, test_dir, test_data):
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
159
        """"Perform the classification.
160

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
161 162 163 164 165 166 167 168
        Parameters
        ----------
        dataset : string
            Path to image dataset.
        test_dir : string
            Not used.
        test_data : string
            Name of test data file.
169

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
        Returns
        -------
        summary : list of string
            List of predicted classes for each instance in test data in ordered way.
        """

        predict_directory = File.make_path(dataset, test_dir)

        for root, dirs, files in os.walk(predict_directory, topdown=False):
            for name in files:
                print(os.path.join(root, name))
                if os.path.splitext(os.path.join(root, name))[1].lower() == ".tif":
                    if os.path.isfile(os.path.splitext(os.path.join(root, name))[0] + ".png"):
                        print "A jpeg file already exists for %s" % name
                    # If a jpeg is *NOT* present, create one from the tiff.
                    else:
                        outfile = os.path.splitext(
                            os.path.join(root, name))[0] + ".png"
                        try:
                            im = Image.open(os.path.join(root, name))
                            print "Generating jpeg for %s" % name
                            im.thumbnail(im.size)
                            im.save(outfile, "PNG", quality=100)
                        except Exception, e:
                            print e

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
196 197 198 199 200 201 202 203 204 205 206
        # Create a Keras class
        if not os.path.exists(predict_directory + "/png"):
            os.makedirs(predict_directory + "/png")

        # move the .png images to inside the class
        for _, _, files in os.walk(predict_directory, topdown=False):
            for name in files:
                if name.endswith('.png'):
                    shutil.move(os.path.join(predict_directory, name),
                                os.path.join(predict_directory, "png", name))

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
207 208 209 210
        classify_datagen = ImageDataGenerator()

        classify_generator = classify_datagen.flow_from_directory(
            predict_directory,
211
            target_size=(IMG_HEIGHT, IMG_WIDTH),
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
212 213 214 215 216 217 218 219 220 221
            batch_size=1,
            shuffle=False,
            class_mode=None)

        validation_datagen = ImageDataGenerator()

        validation_generator = validation_datagen.flow_from_directory(
            dataset,
            target_size=(IMG_HEIGHT, IMG_WIDTH),
            batch_size=self.batch_size.value,
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
222 223 224
            shuffle=False,
            class_mode=None)

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
225 226 227 228 229 230 231 232 233 234 235
        # TODO - A better solution to num_classes - 1
        self.model = self.select_model_params(
            validation_generator.num_classes - 1)

        try:
            self.model.load_weights(
                "../models_checkpoints/" + self.file_name + ".h5")
        except Exception, e:
            raise IException("Can't load the model in " +
                             "../models_checkpoints/" + self.file_name + ".h5" + str(e))

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
236
        output_classification = self.model.predict_generator(
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
237
            classify_generator, classify_generator.samples, verbose=2)
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
238 239 240

        one_hot_output = np.argmax(output_classification, axis=1)

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
241 242
        one_hot_output = one_hot_output.tolist()

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
        for index in range(0, len(one_hot_output)):
            one_hot_output[index] = CLASS_NAMES[one_hot_output[index]]

        return one_hot_output

    def train(self, dataset, training_data, force=False):
        """Perform the training of classifier.

        Parameters
        ----------
        dataset : string
            Path to image dataset.
        training_data : string
            Name of ARFF training file.
        force : boolean, optional, default = False
            If False don't perform new training if there is trained data.
        """
260

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
261 262 263 264 265 266 267 268 269 270
        # select .h5 filename
        if self.transfer_learning.value == 100:
            file_name = str(self.architecture.value) + \
                '_transfer_learning'
        elif self.transfer_learning.value == -1:
            file_name = str(self.architecture.value) + \
                '_without_transfer_learning'
        else:
            file_name = str(self.architecture.value) + \
                '_fine_tunning_' + str(self.fine_tuning_rate.value)
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
271

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
272 273 274
        File.remove_dir(File.make_path(dataset, ".tmp"))

        train_generator, validation_generator = self.make_dataset(dataset)
275

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
276 277 278 279 280 281 282
        self.model = self.select_model_params(train_generator.num_classes)

        # compile the model
        self.model.compile(loss="categorical_crossentropy",
                           optimizer=optimizers.SGD(
                               lr=self.learning_rate.value, momentum=self.momentum.value),
                           metrics=["accuracy"])
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
283

284
        # Save the model according to the conditions
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
285 286 287 288 289 290 291 292 293
        if self.save_weights:
            checkpoint = ModelCheckpoint("../models_checkpoints/" + self.file_name + ".h5", monitor='val_acc',
                                        verbose=1, save_best_only=True, save_weights_only=False,
                                        mode='auto', period=1)
        else:
            checkpoint = None

        train_images, validation_images = train_test_split(
            train_generator, test_size=0.4)
294 295

        # Train the model
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
296
        self.model.fit_generator(
297
            train_generator,
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
298 299 300 301 302 303 304 305 306
            steps_per_epoch=train_generator.samples // self.batch_size.value,
            epochs=self.epochs.value,
            callbacks=[checkpoint],
            validation_data=train_generator,
            validation_steps=train_generator.samples // self.batch_size.value)

        if self.save_weights:
            self.model.save_weights(
                "../models_checkpoints/" + self.file_name + ".h5")
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
307 308

    def must_train(self):
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
309
        """Return if classifier must be trained.
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
310 311 312 313 314

        Returns
        -------
        True
        """
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
315
        return not self.trained
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
316 317

    def must_extract_features(self):
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
318
        """Return if classifier must be extracted features.
319

Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
320 321 322 323 324
        Returns
        -------
        False
        """
        return False
Gabriel Kirsten's avatar
 
Gabriel Kirsten committed
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452

    def select_model_params(self, num_classes):
        if self.fine_tuning_rate.value != -1:
            if self.architecture.value == "Xception":
                model = applications.Xception(
                    weights="imagenet", include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
            elif self.architecture.value == "VGG16":
                model = applications.VGG16(
                    weights="imagenet", include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
            elif self.architecture.value == "VGG19":
                model = applications.VGG19(
                    weights="imagenet", include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
            elif self.architecture.value == "ResNet50":
                model = applications.ResNet50(
                    weights="imagenet", include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
            elif self.architecture.value == "InceptionV3":
                model = applications.InceptionV3(
                    weights="imagenet", include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
            elif self.architecture.value == "MobileNet":
                model = applications.MobileNet(
                    weights="imagenet", include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))

            for layer in model.layers[:int(len(model.layers) * (self.fine_tuning_rate.value / 100))]:
                layer.trainable = False

        else:  # without transfer learning
            if self.architecture.value == "Xception":
                model = applications.Xception(
                    weights=None, include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
            elif self.architecture.value == "VGG16":
                model = applications.VGG16(
                    weights=None, include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
            elif self.architecture.value == "VGG19":
                model = applications.VGG19(
                    weights=None, include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
            elif self.architecture.value == "ResNet50":
                model = applications.ResNet50(
                    weights=None, include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
            elif self.architecture.value == "InceptionV3":
                model = applications.InceptionV3(
                    weights=None, include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
            elif self.architecture.value == "MobileNet":
                model = applications.MobileNet(
                    weights=None, include_top=False, input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
            for layer in model.layers:
                layer.trainable = True

        # Adding custom Layers
        new_custom_layers = model.output
        new_custom_layers = Flatten()(new_custom_layers)
        new_custom_layers = Dense(1024, activation="relu")(new_custom_layers)
        new_custom_layers = Dropout(0.5)(new_custom_layers)
        new_custom_layers = Dense(1024, activation="relu")(new_custom_layers)
        predictions = Dense(num_classes,
                            activation="softmax")(new_custom_layers)

        # creating the final model
        model = Model(inputs=model.input, outputs=predictions)

        return model

    def make_dataset(self, dataset):

        # create symbolic links to the dataset
        KERAS_DATASET_DIR_NAME = "keras_dataset"
        KERAS_DIR_TRAIN_NAME = "train"
        KERAS_DIR_VALIDATION_NAME = "validation"
        PERC_TRAIN = 60

        # create keras dir dataset
        if not os.path.exists(File.make_path(dataset, KERAS_DATASET_DIR_NAME)):
            os.makedirs(File.make_path(dataset, KERAS_DATASET_DIR_NAME))

        # create keras dir train
        if not os.path.exists(File.make_path(dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_TRAIN_NAME)):
            os.makedirs(File.make_path(
                dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_TRAIN_NAME))

        # create keras dir validation
        if not os.path.exists(File.make_path(dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_VALIDATION_NAME)):
            os.makedirs(File.make_path(
                dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_VALIDATION_NAME))

        for root, dirs, files in os.walk(dataset, topdown=False):
            # shuffle array
            random.shuffle(files)
            quant_files = len(files)
            file_index = 0
            if not KERAS_DATASET_DIR_NAME in root.split("/") and root.split("/")[-1] != dataset.split("/")[-1]:

                    if not os.path.exists(File.make_path(dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_TRAIN_NAME, root.split("/")[-1])):
                        os.makedirs(File.make_path(
                            dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_TRAIN_NAME, root.split("/")[-1]))
                    
                    if not os.path.exists(File.make_path(dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_VALIDATION_NAME, root.split("/")[-1])):
                        os.makedirs(File.make_path(dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_VALIDATION_NAME, root.split("/")[-1]))

                    for file in files:
                        if file_index <= ((quant_files / 100) * PERC_TRAIN):
                            if not os.path.islink(File.make_path(dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_TRAIN_NAME, root.split("/")[-1], file)):
                                os.symlink(File.make_path(root, file), File.make_path(
                                    dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_TRAIN_NAME, root.split("/")[-1], file))
                                file_index += 1
                        else:
                            if not os.path.islink(File.make_path(dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_VALIDATION_NAME, root.split("/")[-1], file)):
                                os.symlink(File.make_path(root, file), File.make_path(dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_VALIDATION_NAME, root.split("/")[-1], file))


        train_datagen = ImageDataGenerator()

        train_generator = train_datagen.flow_from_directory(
            File.make_path(dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_TRAIN_NAME),
            target_size=(IMG_HEIGHT, IMG_WIDTH),
            batch_size=self.batch_size.value,
            shuffle=True,
            class_mode="categorical")


        validation_datagen = ImageDataGenerator()

        validation_generator = validation_datagen.flow_from_directory(
            File.make_path(dataset, KERAS_DATASET_DIR_NAME, KERAS_DIR_VALIDATION_NAME),
            target_size=(IMG_HEIGHT, IMG_WIDTH),
            batch_size=self.batch_size.value,
            shuffle=True,
            class_mode="categorical")

        return train_generator, validation_generator