Chapter 3: Data Preprocessing, Optimization, and Visualization

Feature standardization of image data

Show images without standardization

In [42]:
from keras.datasets import mnist
import matplotlib.pyplot as plt

(X_train, y_train), (X_test, y_test) = mnist.load_data()
for i in range(0,9):
    ax = plt.subplot(330+1+i)
    plt.tight_layout()
    ax.tick_params(axis='x', colors='blue')
    ax.tick_params(axis='y', colors='blue')
    plt.imshow(X_train[i], cmap=plt.get_cmap('gray'))
plt.show()

Initializing ImageDataGenerator

  • featurewise_center
  • samplewise_center
  • featurewise_std_normalization
  • samplewise_std_normalization
In [ ]:
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
K.set_image_dim_ordering('th')

X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

datagen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)
datagen.fit(X_train)
for X_batch, y_batch in datagen.flow(X_train, y_train, batch_size=9):
    for i in range(0, 9):
        ax = plt.subplot(330+1+i)
        plt.tight_layout()
        ax.tick_params(axis='x', colors='blue')
        ax.tick_params(axis='y', colors='blue')
        plt.imshow(X_batch[i].reshape(28,28), cmap=plt.get_cmap('gray'))
    plt.show()
    break

Sequence padding

In [ ]:
# Pre-padding with default 0.0 padding [p.51]

from keras.preprocessing.sequence import pad_sequences
sequences = [
    [1, 2, 3, 4],
    [5, 6, 7],
    [5]
]

padded = pad_sequences(sequences)
print(padded)
In [ ]:
# Post-padding [p.51]

padded_post = pad_sequences(sequences, padding='post')
print(padded_post)
In [ ]:
# Padding with truncation [p.51]

padded_maxlen_truncating_pre = pad_sequences(sequences, maxlen=3, truncating='pre')
print(padded_maxlen_truncating_pre)

padded_maxlen_truncating_post = pad_sequences(sequences, maxlen=3, truncating='post')
print(padded_maxlen_truncating_post)
In [ ]:
# Padding with a non-default value [p.52]

padded_value = pad_sequences(sequences, value=1.0)
print(padded_value)

Model Visualization

Keras は model を visualize (視覚化)するのに、いろいろな方法を提供している。 その最も上位の層が graphviz

以下のコードを実行する場合は、graphviz と pydot のインストールが必要とのこと。

In [43]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

print(model.summary())
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_44 (Dense)             (None, 512)               401920    
_________________________________________________________________
dropout_27 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_45 (Dense)             (None, 512)               262656    
_________________________________________________________________
dropout_28 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_46 (Dense)             (None, 10)                5130      
=================================================================
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________
None
In [44]:
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

Optimization

  • SGD
  • RMSprop
  • Adam
  • AdaDelta
  • TFOptimizer
  • AdaGrad
In [34]:
# Common code for samples

from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
import matplotlib.pyplot as plt
%matplotlib inline

def runMnist(opt, epochs=12, batch_size = 128):
  num_classes = 10
  (x_train, y_train), (x_test, y_test) = mnist.load_data()
    
  x_train = x_train.reshape(60000, 784).astype('float32') / 255
  x_test = x_test.reshape(10000, 784).astype('float32') / 255

  print(x_train.shape[0], 'train samples')
  print(x_test.shape[0], 'test samples')

  y_train = keras.utils.to_categorical(y_train, num_classes)
  y_test = keras.utils.to_categorical(y_test, num_classes)

  model = Sequential()
  model.add(Dense(512, activation='relu', input_shape=(784,)))
  model.add(Dropout(0.2))
  model.add(Dense(512, activation='relu'))
  model.add(Dropout(0.2))
  model.add(Dense(num_classes, activation='softmax'))

  # print(model.summary())

  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
  history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))

  print(history.history.keys())
    
  plot(history, 'acc')
  plot(history, 'loss')
    
def plot(history, item):
  plt.plot(history.history[item])
  plt.plot(history.history['val_'+item])
  plt.title(item)
  plt.ylabel(item)
  plt.xlabel('epoch')
  plt.legend(['train', 'test'], loc='upper left')
  plt.show()    
In [36]:
# SGD
from keras.optimizers import SGD

runMnist(SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True))
60000 train samples
10000 test samples
Train on 60000 samples, validate on 10000 samples
Epoch 1/12
60000/60000 [==============================] - 6s 96us/step - loss: 0.4838 - acc: 0.8601 - val_loss: 0.2145 - val_acc: 0.9395
Epoch 2/12
60000/60000 [==============================] - 5s 85us/step - loss: 0.2186 - acc: 0.9365 - val_loss: 0.1504 - val_acc: 0.9553
Epoch 3/12
60000/60000 [==============================] - 5s 85us/step - loss: 0.1622 - acc: 0.9525 - val_loss: 0.1217 - val_acc: 0.9624
Epoch 4/12
60000/60000 [==============================] - 5s 85us/step - loss: 0.1306 - acc: 0.9618 - val_loss: 0.1034 - val_acc: 0.9673
Epoch 5/12
60000/60000 [==============================] - 5s 88us/step - loss: 0.1095 - acc: 0.9670 - val_loss: 0.0935 - val_acc: 0.9698
Epoch 6/12
60000/60000 [==============================] - 5s 85us/step - loss: 0.0949 - acc: 0.9716 - val_loss: 0.0814 - val_acc: 0.9741
Epoch 7/12
60000/60000 [==============================] - 5s 89us/step - loss: 0.0822 - acc: 0.9753 - val_loss: 0.0769 - val_acc: 0.9754
Epoch 8/12
60000/60000 [==============================] - 5s 85us/step - loss: 0.0733 - acc: 0.9774 - val_loss: 0.0726 - val_acc: 0.9764
Epoch 9/12
60000/60000 [==============================] - 5s 84us/step - loss: 0.0662 - acc: 0.9802 - val_loss: 0.0673 - val_acc: 0.9790
Epoch 10/12
60000/60000 [==============================] - 5s 84us/step - loss: 0.0592 - acc: 0.9822 - val_loss: 0.0669 - val_acc: 0.9781
Epoch 11/12
60000/60000 [==============================] - 5s 84us/step - loss: 0.0548 - acc: 0.9837 - val_loss: 0.0629 - val_acc: 0.9785
Epoch 12/12
60000/60000 [==============================] - 5s 84us/step - loss: 0.0491 - acc: 0.9854 - val_loss: 0.0594 - val_acc: 0.9802
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
In [37]:
# RMSProp
from keras.optimizers import RMSprop

runMnist(RMSprop())
60000 train samples
10000 test samples
Train on 60000 samples, validate on 10000 samples
Epoch 1/12
60000/60000 [==============================] - 7s 112us/step - loss: 0.2451 - acc: 0.9245 - val_loss: 0.1203 - val_acc: 0.9619
Epoch 2/12
60000/60000 [==============================] - 6s 98us/step - loss: 0.1033 - acc: 0.9691 - val_loss: 0.0941 - val_acc: 0.9708
Epoch 3/12
60000/60000 [==============================] - 6s 101us/step - loss: 0.0737 - acc: 0.9775 - val_loss: 0.0719 - val_acc: 0.9795
Epoch 4/12
60000/60000 [==============================] - 6s 99us/step - loss: 0.0586 - acc: 0.9825 - val_loss: 0.0726 - val_acc: 0.9791
Epoch 5/12
60000/60000 [==============================] - 6s 100us/step - loss: 0.0500 - acc: 0.9856 - val_loss: 0.0778 - val_acc: 0.9791
Epoch 6/12
60000/60000 [==============================] - 6s 99us/step - loss: 0.0436 - acc: 0.9869 - val_loss: 0.0692 - val_acc: 0.9825
Epoch 7/12
60000/60000 [==============================] - 6s 101us/step - loss: 0.0375 - acc: 0.9897 - val_loss: 0.0764 - val_acc: 0.9817
Epoch 8/12
60000/60000 [==============================] - 6s 101us/step - loss: 0.0352 - acc: 0.9896 - val_loss: 0.0820 - val_acc: 0.9833
Epoch 9/12
60000/60000 [==============================] - 6s 100us/step - loss: 0.0296 - acc: 0.9912 - val_loss: 0.1113 - val_acc: 0.9779
Epoch 10/12
60000/60000 [==============================] - 6s 100us/step - loss: 0.0274 - acc: 0.9917 - val_loss: 0.0865 - val_acc: 0.9827
Epoch 11/12
60000/60000 [==============================] - 6s 100us/step - loss: 0.0273 - acc: 0.9923 - val_loss: 0.1028 - val_acc: 0.9800
Epoch 12/12
60000/60000 [==============================] - 6s 99us/step - loss: 0.0245 - acc: 0.9933 - val_loss: 0.1086 - val_acc: 0.9811
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
In [39]:
# Adam
from keras.optimizers import Adam

runMnist(Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False))
60000 train samples
10000 test samples
Train on 60000 samples, validate on 10000 samples
Epoch 1/12
60000/60000 [==============================] - 8s 131us/step - loss: 0.2494 - acc: 0.9256 - val_loss: 0.1038 - val_acc: 0.9683
Epoch 2/12
60000/60000 [==============================] - 7s 113us/step - loss: 0.1000 - acc: 0.9691 - val_loss: 0.0862 - val_acc: 0.9733
Epoch 3/12
60000/60000 [==============================] - 7s 112us/step - loss: 0.0724 - acc: 0.9770 - val_loss: 0.0703 - val_acc: 0.9775
Epoch 4/12
60000/60000 [==============================] - 7s 112us/step - loss: 0.0578 - acc: 0.9817 - val_loss: 0.0711 - val_acc: 0.9778
Epoch 5/12
60000/60000 [==============================] - 7s 113us/step - loss: 0.0442 - acc: 0.9856 - val_loss: 0.0682 - val_acc: 0.9799
Epoch 6/12
60000/60000 [==============================] - 7s 113us/step - loss: 0.0378 - acc: 0.9879 - val_loss: 0.0732 - val_acc: 0.9782
Epoch 7/12
60000/60000 [==============================] - 7s 113us/step - loss: 0.0359 - acc: 0.9880 - val_loss: 0.0699 - val_acc: 0.9805
Epoch 8/12
60000/60000 [==============================] - 7s 115us/step - loss: 0.0286 - acc: 0.9905 - val_loss: 0.0626 - val_acc: 0.9827
Epoch 9/12
60000/60000 [==============================] - 7s 114us/step - loss: 0.0296 - acc: 0.9896 - val_loss: 0.0738 - val_acc: 0.9807
Epoch 10/12
60000/60000 [==============================] - 7s 116us/step - loss: 0.0267 - acc: 0.9912 - val_loss: 0.0683 - val_acc: 0.9814
Epoch 11/12
60000/60000 [==============================] - 7s 117us/step - loss: 0.0221 - acc: 0.9929 - val_loss: 0.0738 - val_acc: 0.9803
Epoch 12/12
60000/60000 [==============================] - 7s 115us/step - loss: 0.0212 - acc: 0.9926 - val_loss: 0.0675 - val_acc: 0.9842
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
In [40]:
# AdaDelta
from keras.optimizers import Adadelta

runMnist(Adadelta(lr=0.1, rho=0.95, epsilon=None, decay=0.0))
60000 train samples
10000 test samples
Train on 60000 samples, validate on 10000 samples
Epoch 1/12
60000/60000 [==============================] - 9s 144us/step - loss: 0.6453 - acc: 0.8233 - val_loss: 0.2800 - val_acc: 0.9228
Epoch 2/12
60000/60000 [==============================] - 8s 126us/step - loss: 0.2964 - acc: 0.9149 - val_loss: 0.2170 - val_acc: 0.9392
Epoch 3/12
60000/60000 [==============================] - 8s 126us/step - loss: 0.2329 - acc: 0.9331 - val_loss: 0.1786 - val_acc: 0.9476
Epoch 4/12
60000/60000 [==============================] - 8s 128us/step - loss: 0.1947 - acc: 0.9431 - val_loss: 0.1535 - val_acc: 0.9546
Epoch 5/12
60000/60000 [==============================] - 8s 127us/step - loss: 0.1694 - acc: 0.9512 - val_loss: 0.1348 - val_acc: 0.9601
Epoch 6/12
60000/60000 [==============================] - 8s 127us/step - loss: 0.1491 - acc: 0.9564 - val_loss: 0.1194 - val_acc: 0.9646
Epoch 7/12
60000/60000 [==============================] - 8s 128us/step - loss: 0.1341 - acc: 0.9615 - val_loss: 0.1112 - val_acc: 0.9660
Epoch 8/12
60000/60000 [==============================] - 8s 130us/step - loss: 0.1222 - acc: 0.9641 - val_loss: 0.1027 - val_acc: 0.9685
Epoch 9/12
60000/60000 [==============================] - 8s 130us/step - loss: 0.1126 - acc: 0.9671 - val_loss: 0.0951 - val_acc: 0.9708
Epoch 10/12
60000/60000 [==============================] - 8s 132us/step - loss: 0.1020 - acc: 0.9696 - val_loss: 0.0907 - val_acc: 0.9721
Epoch 11/12
60000/60000 [==============================] - 8s 131us/step - loss: 0.0953 - acc: 0.9716 - val_loss: 0.0852 - val_acc: 0.9739
Epoch 12/12
60000/60000 [==============================] - 8s 129us/step - loss: 0.0873 - acc: 0.9746 - val_loss: 0.0837 - val_acc: 0.9738
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])

評価

validation 用のデータがあると次のコードで評価ができる。

    score = model.evaluate(x_val, y_val, verbose=0)
    print('Validation loss:', score[0])
    print('Validation acc:', score[1])