In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.optimizers import Adam
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.advanced_activations import LeakyReLU 
from keras.preprocessing.image import ImageDataGenerator

np.random.seed(25)
Using TensorFlow backend.
In [5]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print("X_train original shape", X_train.shape)
print("y_train original shape", y_train.shape)
print("X_test original shape", X_test.shape)
print("y_test original shape", y_test.shape)
Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
11493376/11490434 [==============================] - 11s 1us/step
X_train original shape (60000, 28, 28)
y_train original shape (60000,)
X_test original shape (10000, 28, 28)
y_test original shape (10000,)
In [6]:
plt.imshow(X_train[0], cmap='gray')
plt.title('Class '+ str(y_train[0]))
Out[6]:
Text(0.5,1,'Class 5')
In [7]:
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train/=255
X_test/=255

X_train.shape
Out[7]:
(60000, 28, 28, 1)
In [9]:
number_of_classes = 10

Y_train = np_utils.to_categorical(y_train, number_of_classes)
Y_test = np_utils.to_categorical(y_test, number_of_classes)

y_train[0], Y_train[0]
Out[9]:
(5, array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]))
In [10]:
# Three steps to Convolution
# 1. Convolution
# 2. Activation
# 3. Polling
# Repeat Steps 1,2,3 for adding more hidden layers

# 4. After that make a fully connected network
# This fully connected network gives ability to the CNN
# to classify the samples

model = Sequential()

model.add(Conv2D(32, (3, 3), input_shape=(28,28,1)))
model.add(Activation('relu'))
BatchNormalization(axis=-1)
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

BatchNormalization(axis=-1)
model.add(Conv2D(64,(3, 3)))
model.add(Activation('relu'))
BatchNormalization(axis=-1)
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
# Fully connected layer

BatchNormalization()
model.add(Dense(512))
model.add(Activation('relu'))
BatchNormalization()
model.add(Dropout(0.2))
model.add(Dense(10))

# model.add(Convolution2D(10,3,3, border_mode='same'))
# model.add(GlobalAveragePooling2D())
model.add(Activation('softmax'))
In [11]:
model.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
activation_1 (Activation)    (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 24, 24, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 10, 10, 64)        18496     
_________________________________________________________________
activation_3 (Activation)    (None, 10, 10, 64)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 8, 8, 64)          36928     
_________________________________________________________________
activation_4 (Activation)    (None, 8, 8, 64)          0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 4, 4, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               524800    
_________________________________________________________________
activation_5 (Activation)    (None, 512)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                5130      
_________________________________________________________________
activation_6 (Activation)    (None, 10)                0         
=================================================================
Total params: 594,922
Trainable params: 594,922
Non-trainable params: 0
_________________________________________________________________
In [12]:
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
In [13]:
gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                         height_shift_range=0.08, zoom_range=0.08)

test_gen = ImageDataGenerator()
In [14]:
train_generator = gen.flow(X_train, Y_train, batch_size=64)
test_generator = test_gen.flow(X_test, Y_test, batch_size=64)
In [15]:
# model.fit(X_train, Y_train, batch_size=128, nb_epoch=1, validation_data=(X_test, Y_test))

model.fit_generator(train_generator, steps_per_epoch=60000//64, epochs=5, 
                    validation_data=test_generator, validation_steps=10000//64)
Epoch 1/5
937/937 [==============================] - 111s 119ms/step - loss: 0.2141 - acc: 0.9319 - val_loss: 0.0382 - val_acc: 0.9871
Epoch 2/5
937/937 [==============================] - 112s 120ms/step - loss: 0.0632 - acc: 0.9808 - val_loss: 0.0222 - val_acc: 0.9919
Epoch 3/5
937/937 [==============================] - 113s 120ms/step - loss: 0.0479 - acc: 0.9857 - val_loss: 0.0235 - val_acc: 0.9923
Epoch 4/5
937/937 [==============================] - 115s 123ms/step - loss: 0.0412 - acc: 0.9870 - val_loss: 0.0192 - val_acc: 0.9932
Epoch 5/5
937/937 [==============================] - 112s 120ms/step - loss: 0.0348 - acc: 0.9894 - val_loss: 0.0142 - val_acc: 0.9952
Out[15]:
<keras.callbacks.History at 0x2aba4134400>
In [16]:
score = model.evaluate(X_test, Y_test)
print()
print('Test accuracy: ', score[1])
10000/10000 [==============================] - 6s 609us/step

Test accuracy:  0.9952
In [18]:
predictions = model.predict_classes(X_test)

predictions = list(predictions)
actuals = list(y_test)

sub = pd.DataFrame({'Actual': actuals, 'Predictions': predictions})
sub.to_csv('./output_cnn.csv', index=False)

Pseudo Labelling

In [19]:
class MixIterator(object):
    def __init__(self, iters):
        self.iters = iters
        self.N = sum([it.n for it in self.iters])

    def reset(self):
        for it in self.iters: it.reset()

    def __iter__(self):
        return self

    def __next__(self, *args, **kwargs):
        nexts = [next(it) for it in self.iters]
        n0 = np.concatenate([n[0] for n in nexts])
        n1 = np.concatenate([n[1] for n in nexts])
        return (n0, n1)
In [20]:
predictions = model.predict(X_test, batch_size=64)

predictions[:5]
Out[20]:
array([[1.5049880e-11, 8.4843137e-09, 5.7679959e-08, 1.1252340e-10,
        2.0104309e-11, 2.7545119e-12, 9.6150654e-17, 9.9999988e-01,
        5.3873722e-10, 4.6261897e-09],
       [5.7154570e-10, 2.7290243e-08, 1.0000000e+00, 6.1176050e-11,
        2.7241825e-08, 1.2642251e-14, 4.3024220e-09, 1.0676515e-09,
        2.8205413e-10, 1.7703314e-08],
       [1.5563334e-08, 9.9999785e-01, 3.3040717e-08, 5.4056804e-11,
        1.2001974e-06, 4.9535043e-08, 7.2000375e-08, 5.8842977e-07,
        2.5030573e-07, 1.2015522e-08],
       [9.9978000e-01, 3.1680694e-09, 1.7188404e-08, 3.3866943e-10,
        3.4800298e-08, 1.6274764e-07, 2.1943208e-04, 1.4814659e-10,
        2.5864750e-07, 2.0843450e-07],
       [1.2516747e-10, 3.2045364e-09, 1.7159338e-11, 1.0174864e-13,
        9.9999261e-01, 7.5005967e-11, 2.7405916e-10, 6.2196845e-11,
        1.7501790e-10, 7.3918263e-06]], dtype=float32)
In [21]:
# gen = ImageDataGenerator()

batches = gen.flow(X_train, Y_train, batch_size=48)
test_batches = test_gen.flow(X_test, predictions, batch_size=16)
In [22]:
mi = MixIterator([batches, test_batches])

mi.N
Out[22]:
70000
In [23]:
model.fit_generator(mi, steps_per_epoch=mi.N//64, epochs=5, validation_data=(X_test, Y_test))
Epoch 1/5
1093/1093 [==============================] - 130s 119ms/step - loss: 0.0299 - acc: 0.9917 - val_loss: 0.0187 - val_acc: 0.9941
Epoch 2/5
1093/1093 [==============================] - 131s 120ms/step - loss: 0.0297 - acc: 0.9918 - val_loss: 0.0189 - val_acc: 0.9943
Epoch 3/5
1093/1093 [==============================] - 134s 122ms/step - loss: 0.0289 - acc: 0.9923 - val_loss: 0.0181 - val_acc: 0.9949
Epoch 4/5
1093/1093 [==============================] - 135s 124ms/step - loss: 0.0263 - acc: 0.9928 - val_loss: 0.0196 - val_acc: 0.9946
Epoch 5/5
1093/1093 [==============================] - 132s 121ms/step - loss: 0.0254 - acc: 0.9929 - val_loss: 0.0220 - val_acc: 0.9941
Out[23]:
<keras.callbacks.History at 0x2aba5903320>