cnn_caffe.py 11 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
"""
    Runs ImageNet Convolutional Neural Network implemented in software Caffe.
    This module only implements the classification. The network must be trained previously using caffe.
    
    Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton, Imagenet classification with deep convolutional neural networks, Advances in neural information processing systems, 2012.
    Jia, Yangqing and Shelhamer, Evan and Donahue, Jeff and Karayev, Sergey and Long, Jonathan and Girshick, Ross and Guadarrama, Sergio and Darrell, Trevor, Caffe: Convolutional Architecture for Fast Feature Embedding, arXiv preprint arXiv:1408.5093, 2014.
    
    Name: cnn_caffe.py
    Author: Alessandro dos Santos Ferreira ( santosferreira.alessandro@gmail.com )
"""

# Make sure that caffe is on the python path:
caffe_root = '/var/tmp/caffe/' 
import sys
sys.path.insert(0, caffe_root + 'python')
import caffe

import cv2
import numpy as np
import os

from collections import OrderedDict

from util.config import Config
from util.file_utils import File
from util.utils import TimeUtils

from classifier import Classifier

class CNNCaffe(Classifier):
34
    
35 36 37 38 39 40
    # I tried to use the default python interface to perform the classification as explained at here:
    # http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb
    # But for some unknown reason it didn't work as expected, it generated poor results.
    # I kept the implementation anyway, to use must be set CREATE_LMDB = False.
    # Otherwise it will used another approach that generates optimal results.
    CREATE_LMDB = True
41 42

    def __init__(self):
43 44
        """Constructor.
        """
45
        self.model_def = Config("ModelDef", '../examples/deploy.prototxt', str)
46
        self.model_weights = Config("ModelWeights", '../examples/caffenet_train_iter_15000.caffemodel', str)
47 48 49
        self.mean_image = Config("MeanImage", '../examples/imagenet_mean.binaryproto', str)
        self.labels_file = Config("LabelsFile", '../examples/labels.txt', str)
        
50 51
        self._create_net()
        
52
    def _create_net(self):     
53 54 55 56 57 58 59 60 61 62 63
        self.net = caffe.Net(self.model_def.value,          # defines the structure of the model
                                self.model_weights.value,   # contains the trained weights
                                caffe.TEST)                 # use test mode (e.g., don't perform dropout)

        # create transformer for the input called 'data'
        self.transformer = caffe.io.Transformer({'data': self.net.blobs['data'].data.shape})

        self.transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
        self.transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
        self.transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR
    
64

65
    def get_config(self):
66 67 68 69 70 71 72
        """Return configuration of classifier. 
        
        Returns
        -------
        config : OrderedDict
            Current configs of classifier.
        """
73 74 75 76 77 78 79 80 81 82
        caffe_config = OrderedDict()
        
        caffe_config["model_def"] = self.model_def
        caffe_config["model_weights"] = self.model_weights
        caffe_config["mean_image"] = self.mean_image
        caffe_config["labels_file"] = self.labels_file
        
        return caffe_config
        
    def set_config(self, configs):
83 84 85 86 87 88 89
        """Update configuration of classifier. 
        
        Parameters
        ----------
        configs : OrderedDict
            New configs of classifier.
        """
90 91 92 93 94 95 96 97
        self.model_def = Config.nvl_config(configs["model_def"], self.model_def)
        self.model_weights = Config.nvl_config(configs["model_weights"], self.model_weights)
        self.mean_image = Config.nvl_config(configs["mean_image"], self.mean_image)
        self.labels_file = Config.nvl_config(configs["labels_file"], self.labels_file)
        
        self._create_net()

    def get_summary_config(self):
98 99 100 101 102 103 104
        """Return fomatted summary of configuration. 
        
        Returns
        -------
        summary : string
            Formatted string with summary of configuration.
        """
105 106 107 108 109 110 111 112 113 114 115 116 117 118
        caffe_config = OrderedDict()
        
        caffe_config[self.model_def.label] = self.model_def.value
        caffe_config[self.model_weights.label] = self.model_weights.value
        caffe_config[self.mean_image.label] = self.mean_image.value
        caffe_config[self.labels_file.label] = self.labels_file.value

        summary = ''
        for config in caffe_config:
            summary += "%s: %s\n" % (config, str(caffe_config[config]))
        
        return summary

    
119
    def classify(self, dataset, test_dir, test_data, image):
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
        """Perform the classification. 
        
        Parameters
        ----------
        dataset : string
            Path to image dataset.
        test_dir : string
            Name of test data directory.
        test_data : string
            Not used.
            
        Returns
        -------
        summary : list of string
            List of predicted classes for each instance in test data in ordered way.
        """
        # if CNNCaffe.CREATE_LMDB = True use the alternative approach.
137 138 139
        if CNNCaffe.CREATE_LMDB:
            return self._classify_lmdb(dataset, test_dir, test_data)
        
140
        test_dir = File.make_path(dataset, test_dir)
141
        
142 143 144 145
        classes = []
        labels = np.loadtxt(self.labels_file.value, str)

        images = sorted(os.listdir(File.make_path(test_dir)))
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
        
        # convert mean.binaryproto to mean.npy
        blob = caffe.proto.caffe_pb2.BlobProto()
        data = open( self.mean_image.value, 'rb' ).read()
        blob.ParseFromString(data)
        np.save( File.make_path(test_dir, 'mean.npy'), np.array( caffe.io.blobproto_to_array(blob) )[0] )
        
        # load the mean image for subtraction
        mu = np.load( File.make_path(test_dir, 'mean.npy') )
        mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel values
        
        self.transformer.set_mean('data', mu)   # subtract the dataset-mean value in each channel
        
        self.net.blobs['data'].reshape(1,          # batch size
                                        3,         # 3-channel (BGR) images
                                        227, 227)  # image size is 227x227
162

163 164
        for im in images:
            filepath = File.make_path(test_dir, im)
165 166 167 168 169 170 171 172 173 174 175 176 177
            image = cv2.imread(filepath)
                
            # resize the segment
            resized_image = np.zeros((512, 512, image.shape[2]), dtype="uint8")
            resized_image[0:image.shape[0], 0:image.shape[1]] = image[:,:]
            resized_image = resized_image[0:256, 0:256]
            cv2.imwrite(filepath.replace('.tif', '.jpeg'), resized_image)
            
            # load the image
            input_image = caffe.io.load_image(filepath)
            transformed_image = self.transformer.preprocess('data', input_image)
            
            # copy the image data into the memory allocated for the net
178
            self.net.blobs['data'].data[...] = [transformed_image]
179 180 181 182

            # perform classification
            output = self.net.forward()
            
183
            # the output probability vector for the each image in the batch
184 185 186
            prediction = output['prob'][0] 
            print(["%0.4f" % pr for pr in prediction ])

187
            # append the class with max probability.
188 189 190
            classes.append(labels[prediction.argmax()])

        return classes
191 192 193


    def _classify_lmdb(self, dataset, test_dir, test_data):
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
        """Perform the alternative classification creating LMDB backend. 
        
        Parameters
        ----------
        dataset : string
            Path to image dataset.
        test_dir : string
            Name of test data directory.
        test_data : string
            Not used.
            
        Returns
        -------
        summary : list of string
            List of predicted classes for each instance in test data in ordered way.
        """
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
        test_dir = File.make_path(dataset, test_dir)
            
        classes = []
        labels = np.loadtxt(self.labels_file.value, str)
        
        images = sorted(os.listdir(File.make_path(test_dir)))
        
        # create LMDB listfile
        listfile = open(File.make_path(test_dir, 'listfile.txt'), 'w')

        for im in images:
            filepath = File.make_path(test_dir, im)
            image = cv2.imread(filepath)
                
            # resize the segment and save in jpeg format
            resized_image = np.zeros((512, 512, image.shape[2]), dtype="uint8")
            resized_image[0:image.shape[0], 0:image.shape[1]] = image[:,:]
            resized_image = resized_image[0:256, 0:256]
            cv2.imwrite(filepath.replace('.tif', '.jpeg'), resized_image)
            
            # append imagename in listfile
            listfile.write("%s %d\n" %(im.replace('.tif', '.jpeg'), 0))

        listfile.close()
        
        # create LMDB backend to be used as source of data
        from subprocess import call
        
        call([caffe_root + 'build/tools/convert_imageset', 
                File.make_path(test_dir, ''),
                File.make_path(test_dir, 'listfile.txt'),
                File.make_path(test_dir, 'lmdb')])

        # read model_def
        with open(self.model_def.value, 'r') as model_def:
            prototxt = model_def.read()
        
        # change structure of layer data
        layers = prototxt.split('layer')
        layers[1] = (' {\n'
                    '    name: "data"\n'
                    '    type: "Data"\n'
                    '    top: "data"\n'
                    '    top: "label"\n'
                    '    transform_param {\n'
                    '        mirror: false\n'
                    '        crop_size: 227\n'
                    '        mean_file: "' + self.mean_image.value  + '"\n'
                    '    }\n'
                    '    data_param {\n'
                    '        source: "' + File.make_path(test_dir, 'lmdb') + '"\n'
                    '        batch_size: 1\n'
                    '        backend: LMDB\n'
                    '    }\n'
                    '}\n')
        prototxt = 'layer'.join(layers)
        
        # create new model_def
        new_model_def_path = File.make_path(test_dir, 'deploy.prototxt')
        with open(new_model_def_path, 'w') as new_model_def:
            new_model_def.write(prototxt)

        net = caffe.Net(new_model_def_path,         # defines the structure of the model
                        self.model_weights.value,   # contains the trained weights
                        caffe.TEST)                 # use test mode (e.g., don't perform dropout)
                                
        for im in images:
            # perform classification
            output = net.forward()
            
            # the output probability vector for the first image in the batch
            prediction = output['prob'][0] 
            print(["%0.4f" % pr for pr in prediction ])

            classes.append(labels[prediction.argmax()])

        return classes