Image Classification using Data Augmentation

We want to make a Convolutional Neural Network to classify some images. We will also improve the model implementing a data augmentation.

For this we are going to use a CIFAR-10 dataset. This consists of several images divided into 10 categories or classes and it has 60,000 32×32 color images and 6,000 images of each class with a low resolution 32×32.

  • Airplanes
  • Cars
  • Cats
  • Birds
  • Deer
  • Dogs
  • Frogs
  • Horses
  • Ships
  • Trucks

Data Source: https://www.cs.toronto.edu/~kriz/cifar.html

1- Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn 

Load dataset and Train/Test split

from keras.datasets import cifar10

(X_train, y_train) , (X_test, y_test) = cifar10.load_data()
X_train.shape
(50000, 32, 32, 3)
  • We have (number of samples, height(px), width(px), color chanel(3))
X_test.shape
(10000, 32, 32, 3)
y_train.shape
(50000, 1)
y_test.shape
(10000, 1)

2 – Data visualization

#Let's see some training image
img = 20547
plt.imshow(X_train[img])
print(y_train[img])
W_grid = 12
L_grid = 12

fig, axes = plt.subplots(L_grid, W_grid, figsize = (20, 20))
axes = axes.ravel()

len_train = len(X_train)

for i in np.arange(0, L_grid * W_grid):
    index = np.random.randint(0, len_train) # pick a random number
    axes[i].imshow(X_train[index])
    axes[i].set_title(y_train[index])
    axes[i].axis('off')

plt.subplots_adjust(hspace = 0.2)
len_train
50000

3- Data preparation

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
#We know we have 10 categories
n_cat = 10
y_train
array([[6],
       [9],
       [9],
       ...,
       [9],
       [1],
       [1]], dtype=uint8)
  • We can see which class is belong to

Let’s put in categorial data.

import keras

y_train = keras.utils.to_categorical(y_train, n_cat)
y_train
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.]], dtype=float32)
y_test = keras.utils.to_categorical(y_test, n_cat)
y_test
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]], dtype=float32)

Normalize

X_train[0]
array([[[ 59.,  62.,  63.],
        [ 43.,  46.,  45.],
        [ 50.,  48.,  43.],
        ...,
        [158., 132., 108.],
        [152., 125., 102.],
        [148., 124., 103.]],

       [[ 16.,  20.,  20.],
        [  0.,   0.,   0.],
        [ 18.,   8.,   0.],
        ...,
        [123.,  88.,  55.],
        [119.,  83.,  50.],
        [122.,  87.,  57.]],

       [[ 25.,  24.,  21.],
        [ 16.,   7.,   0.],
        [ 49.,  27.,   8.],
        ...,
        [118.,  84.,  50.],
        [120.,  84.,  50.],
        [109.,  73.,  42.]],

       ...,

       [[208., 170.,  96.],
        [201., 153.,  34.],
        [198., 161.,  26.],
        ...,
        [160., 133.,  70.],
        [ 56.,  31.,   7.],
        [ 53.,  34.,  20.]],

       [[180., 139.,  96.],
        [173., 123.,  42.],
        [186., 144.,  30.],
        ...,
        [184., 148.,  94.],
        [ 97.,  62.,  34.],
        [ 83.,  53.,  34.]],

       [[177., 144., 116.],
        [168., 129.,  94.],
        [179., 142.,  87.],
        ...,
        [216., 184., 140.],
        [151., 118.,  84.],
        [123.,  92.,  72.]]], dtype=float32)

We can see bunch of numbers indicating the values of the pixels (from 0 to 255), so we are going to normalize them.

X_train = X_train/255
X_test = X_test/255

X_train[5]
array([[[0.62352943, 0.4       , 0.39607844],
        [0.5882353 , 0.35686275, 0.37254903],
        [0.6       , 0.37254903, 0.38039216],
        ...,
        [0.35686275, 0.2784314 , 0.21960784],
        [0.2901961 , 0.24705882, 0.21568628],
        [0.29803923, 0.22745098, 0.21568628]],

       [[0.5568628 , 0.29411766, 0.26666668],
        [0.57254905, 0.28235295, 0.25882354],
        [0.60784316, 0.29803923, 0.25490198],
        ...,
        [0.49803922, 0.4117647 , 0.2784314 ],
        [0.47843137, 0.43529412, 0.3647059 ],
        [0.3372549 , 0.27058825, 0.23921569]],

       [[0.42745098, 0.2627451 , 0.29411766],
        [0.3882353 , 0.22745098, 0.23529412],
        [0.4117647 , 0.23137255, 0.20392157],
        ...,
        [0.5372549 , 0.4392157 , 0.3137255 ],
        [0.6392157 , 0.5176471 , 0.4117647 ],
        [0.3647059 , 0.28235295, 0.2784314 ]],

       ...,

       [[0.95686275, 0.5058824 , 0.27450982],
        [0.9411765 , 0.48235294, 0.25490198],
        [0.94509804, 0.47843137, 0.25490198],
        ...,
        [0.6117647 , 0.16470589, 0.05882353],
        [0.7019608 , 0.23137255, 0.10196079],
        [0.78431374, 0.28627452, 0.14117648]],

       [[0.9647059 , 0.52156866, 0.2901961 ],
        [0.9529412 , 0.5019608 , 0.28235295],
        [0.9529412 , 0.49803922, 0.27450982],
        ...,
        [0.63529414, 0.17254902, 0.05490196],
        [0.69803923, 0.21960784, 0.08627451],
        [0.7529412 , 0.25490198, 0.10588235]],

       [[0.9647059 , 0.54509807, 0.32156864],
        [0.9529412 , 0.52156866, 0.30588236],
        [0.95686275, 0.5176471 , 0.3019608 ],
        ...,
        [0.6509804 , 0.18431373, 0.05490196],
        [0.6784314 , 0.2       , 0.06666667],
        [0.7137255 , 0.22352941, 0.07450981]]], dtype=float32)
X_train.shape
(50000, 32, 32, 3)
input_shape = X_train.shape[1:]
input_shape
(32, 32, 3)

4 – Training the model

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Dense, Flatten, Dropout
from keras.callbacks import TensorBoard

Convolutional Neural Network building

model = Sequential()

model.add(Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu', input_shape = input_shape))
model.add(Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.4)) #during training in each iteration 40% of neurons will be randomly turned off


model.add(Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu'))
model.add(Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.4))

model.add(Flatten())

model.add(Dense(units = 1024, activation = 'relu'))

model.add(Dense(units = 1024, activation = 'relu'))

model.add(Dense(units = 10, activation = 'softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = keras.optimizers.RMSprop(lr = 0.001), metrics = ['accuracy'])
history = model.fit(X_train, y_train, batch_size = 32, epochs = 2, shuffle = True)
Epoch 1/2
1563/1563 [==============================] - 552s 351ms/step - loss: 1.9038 - accuracy: 0.2989
Epoch 2/2
1563/1563 [==============================] - 553s 354ms/step - loss: 1.2795 - accuracy: 0.5521
history.history["accuracy"]
[0.4060400128364563, 0.5683599710464478]

5 – Testing and saving the model

evaluation = model.evaluate(X_test, y_test)

print('Accuracy: {}'.format(evaluation[1]))
313/313 [==============================] - 38s 122ms/step - loss: 1.0831 - accuracy: 0.6546
Accuracy: 0.6546000242233276
  • We have 65.5% of Accuracy!!

Predicting categories

pred_classes = model.predict_classes(X_test) 
pred_classes
array([3, 1, 8, ..., 5, 1, 7])
y_test
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]], dtype=float32)
y_test = y_test.argmax(1)
y_test
array([3, 8, 8, ..., 5, 1, 7])

Visualization test data

L = 8
W = 8
fig, axes = plt.subplots(L, W, figsize = (20, 20))
axes = axes.ravel()

for i in np.arange(0, L*W):
    axes[i].imshow(X_test[i])
    axes[i].set_title('Prediction = {}\n True = {}'.format(pred_classes[i], y_test[i]))
    axes[i].axis('off')

plt.subplots_adjust(wspace = 0.5, hspace = 0.15)   

Confusion Matrix

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, pred_classes)
cm
plt.figure(figsize = (10, 10))
sns.heatmap(cm, annot = True)

Saving data

import os 
directory = os.path.join(os.getcwd(), 'saved_models')

if not os.path.isdir(directory):
    os.makedirs(directory)
model_path = os.path.join(directory, 'cifar10_trained_model.h5')
model.save(model_path)

7 – Data Augmentation

We can improve our model if we set a data augmentation:

  • Image Augmentation is the process of artificially increasing the variations of the images in the datasets by flipping, enlarging, rotating the original images.
  • Augmentations also include shifting and changing the brightness of the images.

Source: https://machinelearningmastery.com/how-to-configure-image-data-augmentation-when-training-deep-learning-neural-networks/

import keras
from keras.datasets import cifar10

(X_train, y_train), (X_test, y_test) = cifar10.load_data()
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train.shape
(50000, 32, 32, 3)
n = 8 
X_train_sample = X_train[:n]
X_train_sample.shape
(8, 32, 32, 3)
from keras.preprocessing.image import ImageDataGenerator

# dataget_train = ImageDataGenerator(rotation_range = 40) #change rotation
# dataget_train = ImageDataGenerator(vertical_flip=True) #change vertical flip
# dataget_train = ImageDataGenerator(height_shift_range=0.8) #change height shift
dataget_train = ImageDataGenerator(brightness_range=(1,3)) #change brightness


dataget_train.fit(X_train_sample)
from keras.preprocessing.image import img_to_array
import numpy as np

fig = plt.figure(figsize = (20,2))
for x_batch in dataget_train.flow(X_train_sample, batch_size = n):
     for i in range(0,n):
            ax = fig.add_subplot(1, n, i+1)
            #ax.imshow(toimage(x_batch[i]))
            ax.imshow(img_to_array(x_batch[i]).astype('uint8'))
     fig.suptitle('Augmented images (change brightness from 1 to 3)')
     plt.show()
     break;

Training model with Data Augmentation

from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
                            rotation_range = 45,
                            width_shift_range = 0.4,
                            horizontal_flip = True,
                            vertical_flip = True
                             )
datagen.fit(X_train)
model.fit_generator(datagen.flow(X_train, y_train, batch_size = 32), epochs = 2)
Epoch 1/2
1563/1563 [==============================] - 553s 352ms/step - loss: 2854752474026213376.0000 - accuracy: 0.0019
Epoch 2/2
1563/1563 [==============================] - 482s 308ms/step - loss: nan - accuracy: 0.9398
We have 93% of accuracy!

Save the model

directory = os.path.join(os.getcwd(), 'saved_models')

if not os.path.isdir(directory):
    os.makedirs(directory)
model_path = os.path.join(directory, 'cifar10_trained_model_Augmentation.h5')
model.save(model_path)