We want to make a Convolutional Neural Network to classify some images. We will also improve the model implementing a data augmentation.

For this we are going to use a CIFAR-10 dataset. This consists of several images divided into 10 categories or classes and it has 60,000 32×32 color images and 6,000 images of each class with a low resolution 32×32.

Airplanes
Cars
Cats
Birds
Deer
Dogs
Frogs
Horses
Ships
Trucks

Data Source: https://www.cs.toronto.edu/~kriz/cifar.html

1- Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn

Load dataset and Train/Test split

from keras.datasets import cifar10

(X_train, y_train) , (X_test, y_test) = cifar10.load_data()

X_train.shape

(50000, 32, 32, 3)

We have (number of samples, height(px), width(px), color chanel(3))

X_test.shape

(10000, 32, 32, 3)

y_train.shape

(50000, 1)

y_test.shape

(10000, 1)

2 – Data visualization

#Let's see some training image
img = 20547
plt.imshow(X_train[img])
print(y_train[img])

W_grid = 12
L_grid = 12

fig, axes = plt.subplots(L_grid, W_grid, figsize = (20, 20))
axes = axes.ravel()

len_train = len(X_train)

for i in np.arange(0, L_grid * W_grid):
    index = np.random.randint(0, len_train) # pick a random number
    axes[i].imshow(X_train[index])
    axes[i].set_title(y_train[index])
    axes[i].axis('off')

plt.subplots_adjust(hspace = 0.2)

len_train

3- Data preparation

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

#We know we have 10 categories
n_cat = 10

y_train

array([[6],
       [9],
       [9],
       ...,
       [9],
       [1],
       [1]], dtype=uint8)

We can see which class is belong to

Let’s put in categorial data.

import keras

y_train = keras.utils.to_categorical(y_train, n_cat)
y_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.]], dtype=float32)

y_test = keras.utils.to_categorical(y_test, n_cat)
y_test

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]], dtype=float32)

Normalize

X_train[0]

array([[[ 59.,  62.,  63.],
        [ 43.,  46.,  45.],
        [ 50.,  48.,  43.],
        ...,
        [158., 132., 108.],
        [152., 125., 102.],
        [148., 124., 103.]],

       [[ 16.,  20.,  20.],
        [  0.,   0.,   0.],
        [ 18.,   8.,   0.],
        ...,
        [123.,  88.,  55.],
        [119.,  83.,  50.],
        [122.,  87.,  57.]],

       [[ 25.,  24.,  21.],
        [ 16.,   7.,   0.],
        [ 49.,  27.,   8.],
        ...,
        [118.,  84.,  50.],
        [120.,  84.,  50.],
        [109.,  73.,  42.]],

       ...,

       [[208., 170.,  96.],
        [201., 153.,  34.],
        [198., 161.,  26.],
        ...,
        [160., 133.,  70.],
        [ 56.,  31.,   7.],
        [ 53.,  34.,  20.]],

       [[180., 139.,  96.],
        [173., 123.,  42.],
        [186., 144.,  30.],
        ...,
        [184., 148.,  94.],
        [ 97.,  62.,  34.],
        [ 83.,  53.,  34.]],

       [[177., 144., 116.],
        [168., 129.,  94.],
        [179., 142.,  87.],
        ...,
        [216., 184., 140.],
        [151., 118.,  84.],
        [123.,  92.,  72.]]], dtype=float32)

We can see bunch of numbers indicating the values of the pixels (from 0 to 255), so we are going to normalize them.

X_train = X_train/255
X_test = X_test/255

X_train[5]

array([[[0.62352943, 0.4       , 0.39607844],
        [0.5882353 , 0.35686275, 0.37254903],
        [0.6       , 0.37254903, 0.38039216],
        ...,
        [0.35686275, 0.2784314 , 0.21960784],
        [0.2901961 , 0.24705882, 0.21568628],
        [0.29803923, 0.22745098, 0.21568628]],

       [[0.5568628 , 0.29411766, 0.26666668],
        [0.57254905, 0.28235295, 0.25882354],
        [0.60784316, 0.29803923, 0.25490198],
        ...,
        [0.49803922, 0.4117647 , 0.2784314 ],
        [0.47843137, 0.43529412, 0.3647059 ],
        [0.3372549 , 0.27058825, 0.23921569]],

       [[0.42745098, 0.2627451 , 0.29411766],
        [0.3882353 , 0.22745098, 0.23529412],
        [0.4117647 , 0.23137255, 0.20392157],
        ...,
        [0.5372549 , 0.4392157 , 0.3137255 ],
        [0.6392157 , 0.5176471 , 0.4117647 ],
        [0.3647059 , 0.28235295, 0.2784314 ]],

       ...,

       [[0.95686275, 0.5058824 , 0.27450982],
        [0.9411765 , 0.48235294, 0.25490198],
        [0.94509804, 0.47843137, 0.25490198],
        ...,
        [0.6117647 , 0.16470589, 0.05882353],
        [0.7019608 , 0.23137255, 0.10196079],
        [0.78431374, 0.28627452, 0.14117648]],

       [[0.9647059 , 0.52156866, 0.2901961 ],
        [0.9529412 , 0.5019608 , 0.28235295],
        [0.9529412 , 0.49803922, 0.27450982],
        ...,
        [0.63529414, 0.17254902, 0.05490196],
        [0.69803923, 0.21960784, 0.08627451],
        [0.7529412 , 0.25490198, 0.10588235]],

       [[0.9647059 , 0.54509807, 0.32156864],
        [0.9529412 , 0.52156866, 0.30588236],
        [0.95686275, 0.5176471 , 0.3019608 ],
        ...,
        [0.6509804 , 0.18431373, 0.05490196],
        [0.6784314 , 0.2       , 0.06666667],
        [0.7137255 , 0.22352941, 0.07450981]]], dtype=float32)

X_train.shape

(50000, 32, 32, 3)

input_shape = X_train.shape[1:]
input_shape

(32, 32, 3)

4 – Training the model

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Dense, Flatten, Dropout
from keras.callbacks import TensorBoard

Convolutional Neural Network building

model = Sequential()

model.add(Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu', input_shape = input_shape))
model.add(Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.4)) #during training in each iteration 40% of neurons will be randomly turned off


model.add(Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu'))
model.add(Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.4))

model.add(Flatten())

model.add(Dense(units = 1024, activation = 'relu'))

model.add(Dense(units = 1024, activation = 'relu'))

model.add(Dense(units = 10, activation = 'softmax'))

model.compile(loss = 'categorical_crossentropy', optimizer = keras.optimizers.RMSprop(lr = 0.001), metrics = ['accuracy'])

history = model.fit(X_train, y_train, batch_size = 32, epochs = 2, shuffle = True)

Epoch 1/2
1563/1563 [==============================] - 552s 351ms/step - loss: 1.9038 - accuracy: 0.2989
Epoch 2/2
1563/1563 [==============================] - 553s 354ms/step - loss: 1.2795 - accuracy: 0.5521

history.history["accuracy"]

[0.4060400128364563, 0.5683599710464478]

5 – Testing and saving the model

evaluation = model.evaluate(X_test, y_test)

print('Accuracy: {}'.format(evaluation[1]))

313/313 [==============================] - 38s 122ms/step - loss: 1.0831 - accuracy: 0.6546
Accuracy: 0.6546000242233276

We have 65.5% of Accuracy!!

Predicting categories

pred_classes = model.predict_classes(X_test) 
pred_classes

array([3, 1, 8, ..., 5, 1, 7])

y_test

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]], dtype=float32)

y_test = y_test.argmax(1)
y_test

array([3, 8, 8, ..., 5, 1, 7])

Visualization test data

L = 8
W = 8
fig, axes = plt.subplots(L, W, figsize = (20, 20))
axes = axes.ravel()

for i in np.arange(0, L*W):
    axes[i].imshow(X_test[i])
    axes[i].set_title('Prediction = {}\n True = {}'.format(pred_classes[i], y_test[i]))
    axes[i].axis('off')

plt.subplots_adjust(wspace = 0.5, hspace = 0.15)

Confusion Matrix

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, pred_classes)
cm
plt.figure(figsize = (10, 10))
sns.heatmap(cm, annot = True)

Saving data

import os 
directory = os.path.join(os.getcwd(), 'saved_models')

if not os.path.isdir(directory):
    os.makedirs(directory)
model_path = os.path.join(directory, 'cifar10_trained_model.h5')
model.save(model_path)

7 – Data Augmentation

We can improve our model if we set a data augmentation:

Image Augmentation is the process of artificially increasing the variations of the images in the datasets by flipping, enlarging, rotating the original images.
Augmentations also include shifting and changing the brightness of the images.

Source: https://machinelearningmastery.com/how-to-configure-image-data-augmentation-when-training-deep-learning-neural-networks/

import keras
from keras.datasets import cifar10

(X_train, y_train), (X_test, y_test) = cifar10.load_data()

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train.shape

(50000, 32, 32, 3)

n = 8 
X_train_sample = X_train[:n]
X_train_sample.shape

(8, 32, 32, 3)

from keras.preprocessing.image import ImageDataGenerator

# dataget_train = ImageDataGenerator(rotation_range = 40) #change rotation
# dataget_train = ImageDataGenerator(vertical_flip=True) #change vertical flip
# dataget_train = ImageDataGenerator(height_shift_range=0.8) #change height shift
dataget_train = ImageDataGenerator(brightness_range=(1,3)) #change brightness


dataget_train.fit(X_train_sample)

from keras.preprocessing.image import img_to_array
import numpy as np

fig = plt.figure(figsize = (20,2))
for x_batch in dataget_train.flow(X_train_sample, batch_size = n):
     for i in range(0,n):
            ax = fig.add_subplot(1, n, i+1)
            #ax.imshow(toimage(x_batch[i]))
            ax.imshow(img_to_array(x_batch[i]).astype('uint8'))
     fig.suptitle('Augmented images (change brightness from 1 to 3)')
     plt.show()
     break;

Training model with Data Augmentation

from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
                            rotation_range = 45,
                            width_shift_range = 0.4,
                            horizontal_flip = True,
                            vertical_flip = True
                             )

datagen.fit(X_train)

model.fit_generator(datagen.flow(X_train, y_train, batch_size = 32), epochs = 2)

Epoch 1/2
1563/1563 [==============================] - 553s 352ms/step - loss: 2854752474026213376.0000 - accuracy: 0.0019
Epoch 2/2
1563/1563 [==============================] - 482s 308ms/step - loss: nan - accuracy: 0.9398

We have 93% of accuracy!

Save the model

directory = os.path.join(os.getcwd(), 'saved_models')

if not os.path.isdir(directory):
    os.makedirs(directory)
model_path = os.path.join(directory, 'cifar10_trained_model_Augmentation.h5')
model.save(model_path)

Image Classification using Data Augmentation