第7章 分類モデルの学習機能開発

第5章から必要なコードをコピーしておく

In [1]:
import numpy as np

relu_fn = lambda Z: np.maximum(Z, 0)   # ReLU Function
In [2]:
from scipy.special import softmax

softmax_fn = lambda Z: softmax(Z, axis=1)
In [3]:
import mnist

X_train, Y_train = mnist.train_images(), mnist.train_labels()
X_test, Y_test = mnist.test_images(), mnist.test_labels()
X_train, X_test = X_train / 255.0, X_test / 255.0
In [4]:
# list 5.18 画像データをフラットに変換する
# p.102

X_train_flat = X_train.reshape(-1, 28* 28)
X_test_flat = X_test.reshape(-1, 28*28)

print(X_train_flat.shape, X_test_flat.shape)
(60000, 784) (10000, 784)
In [5]:
# list 5.18 正解ラベルを one hot vector 化する
# p.102

num_classes = 10   # 0, 1, ..., 9 に分類するので 10 classes classification

Y_train_ohe = np.eye(num_classes)[Y_train]
Y_test_ohe = np.eye(num_classes)[Y_test]

print(Y_train_ohe.shape, Y_test_ohe.shape)
(60000, 10) (10000, 10)

7.1 ユーティリティ関数の準備

In [6]:
# list 7.1 交差エントロピー誤差
# p.145

def cross_entropy_loss(Y_hat, Y):      # Y_hat: 推論結果 , Y: 正解データ(one-hot vector)
    batch_size = Y_hat.shape[0]
    return -np.sum(Y*np.log(Y_hat + 1e-7)) / batch_size
In [7]:
# list 7.2 Softmax with Loss の導関数
# p.146

def d_softmax_with_loss_fn(Y_hat, Y):
    batch_size = Y_hat.shape[0]
    return (Y_hat - Y) / batch_size
In [8]:
# list 7.3 ReLU 関数の導関数
# p.146

def d_relu_fn(Z):
    return np.where(Z > 0, 1, 0)
In [9]:
# list 7.5 出力層用のダミーの導関数
# p.147

def d_output_fn(dJ_dZ):
    return 1

7.2 Layer クラスの改修

In [16]:
# list 7.6 Layer Class
# p.147

import numpy as np

class Layer:
    
    def __init__(self, prev_layer_size, layer_size, activation_fn, d_activation_fn):
        np.random.seed(1)
        self.W = np.random.randn(prev_layer_size, layer_size) / np.sqrt(prev_layer_size)   # Xavier initialization
        self.b = np.zeros((layer_size))
        self.activation_fn = activation_fn
        self.d_activation_fn = d_activation_fn   # derivation of activation function

    def forward(self, A_prev):
        Z = np.dot(A_prev, self.W) + self.b
        A = self.activation_fn(Z)
        self.Z = Z   # この層からの出力を記憶しておく
        self.A_prev = A_prev # この層への入力を記憶しておく
        return A
    
    def backward(self, dJ_dA):
        dJ_dZ = dJ_dA * self.d_activation_fn(self.Z)
        self.dJ_dW = np.dot(self.A_prev.T, dJ_dZ)     # shape: (prev_layer_size, layer_size)
        self.dJ_db = np.dot(np.ones(dJ_dA.shape[0]), dJ_dZ)    # shape: (layer_size, )
        dJ_dA_prev = np.dot(dJ_dZ, self.W.T)   # shape: (batch_size, prev_layer_size)
        return dJ_dA_prev
    
    def update_params(self, learning_rate):
        self.W -= learning_rate * self.dJ_dW
        self.b -= learning_rate * self.dJ_db

7.3 SimpleClassifier クラスの改修

In [25]:
# list 7.7 SimpleClassifier Class
# p.150-153

class SimpleClassifier:
 
    def __init__(self, input_layer_size, output_layer_size, hidden_layers_sizes, activation_fn, d_activation_fn):
        layer_sizes = [ input_layer_size, *hidden_layers_sizes ]
        self.layers = [ Layer(layer_sizes[i], layer_sizes[i+1], activation_fn, d_activation_fn) for i in range(len(layer_sizes)-1) ]
        output_layer = Layer(layer_sizes[-1], output_layer_size, softmax_fn, d_output_fn)
        self.layers.append(output_layer)
        
    def forward(self, A0):
        A = A0
        for layer in self.layers:
            A = layer.forward(A)
        Y_hat = A
        return Y_hat
    
    def predict(self, X):
        Y_hat = self.forward(X)
        return Y_hat
    
    def evaluate_accuracy(self, X, Y):
        predict = np.argmax(self.predict(X), axis=1)
        actual = np.argmax(Y, axis=1)
        num_corrects = len(np.where(predict == actual)[0])
        accuracy = num_corrects / len(X)
        return accuracy

    def backward(self, dJ_dZ):
        dJ_dA = dJ_dZ
        for layer in reversed(self.layers):
            dJ_dA = layer.backward(dJ_dA)
        return dJ_dA
    
    def update_params(self, learning_rate):
        for layer in self.layers:
            layer.update_params(learning_rate)
            
    def train(self, X_train, Y_train, X_test, Y_test, mini_batch_size, num_epochs, learning_rate):
        # num_mini_batches_per_epoch = (X_train.shape[0] - 1) // mini_batch_size + 1
        num_mini_batches_per_epoch = (X_train.shape[0] + mini_batch_size - 1) // mini_batch_size ### !!! [自分へのメモ]この式がわかりやすい
        costs, accuracies = [], []
        for epoch in range(num_epochs):
            epoch_cost = 0
            for b in range(num_mini_batches_per_epoch):
                start = b * mini_batch_size
                end = start + mini_batch_size
                X, Y = X_train[start:end], Y_train[start:end]
                Y_hat = self.forward(X)
                epoch_cost += cross_entropy_loss(Y_hat, Y)
                self.backward(d_softmax_with_loss_fn(Y_hat, Y))
                self.update_params(learning_rate)  ### !!! [自分へのメモ] ミニバッチ毎にまとめてパラメータを更新する
            costs.append(epoch_cost / num_mini_batches_per_epoch)
            accuracies.append(self.evaluate_accuracy(X_test, Y_test))
            print(f'Epoch {epoch:4d}: training cost = {costs[epoch]:.6f} accuracy = {accuracies[epoch]*100:.3f} %')
        self.costs = np.array(costs)
        self.accuracies = np.array(accuracies)
            

7.4 学習

In [26]:
# list 7.8 SimpleClassifier の生成と学習
# p.154

mnist_classifier = SimpleClassifier(X_test_flat.shape[1], num_classes, [64, 32], relu_fn, d_relu_fn)
mnist_classifier.train(X_train_flat, Y_train_ohe, X_test_flat, Y_test_ohe, mini_batch_size=50, num_epochs=20, learning_rate = 0.01)
Epoch    0: training cost = 0.975552 accuracy = 87.970 %
Epoch    1: training cost = 0.381819 accuracy = 90.660 %
Epoch    2: training cost = 0.316514 accuracy = 91.720 %
Epoch    3: training cost = 0.283249 accuracy = 92.310 %
Epoch    4: training cost = 0.258692 accuracy = 92.950 %
Epoch    5: training cost = 0.238035 accuracy = 93.490 %
Epoch    6: training cost = 0.220105 accuracy = 93.820 %
Epoch    7: training cost = 0.204623 accuracy = 94.140 %
Epoch    8: training cost = 0.191142 accuracy = 94.430 %
Epoch    9: training cost = 0.179337 accuracy = 94.770 %
Epoch   10: training cost = 0.168937 accuracy = 94.950 %
Epoch   11: training cost = 0.159691 accuracy = 95.200 %
Epoch   12: training cost = 0.151409 accuracy = 95.450 %
Epoch   13: training cost = 0.143864 accuracy = 95.650 %
Epoch   14: training cost = 0.137004 accuracy = 95.780 %
Epoch   15: training cost = 0.130707 accuracy = 95.910 %
Epoch   16: training cost = 0.124923 accuracy = 96.030 %
Epoch   17: training cost = 0.119596 accuracy = 96.090 %
Epoch   18: training cost = 0.114674 accuracy = 96.250 %
Epoch   19: training cost = 0.110119 accuracy = 96.350 %
In [29]:
# list 7.9 正解率とコストの変化の可視化
# p.155
# [自分へのメモ] 独自のコードに変更した。

%matplotlib inline
import matplotlib.pyplot as plt

N = len(mnist_classifier.accuracies)
xs = np.arange(0, N)
xs_shifted = np.arange(1, N+1)

W, H = 6, 4
fig, ax = plt.subplots(1,2,figsize=(W*2, H))

ax[0].plot(xs_shifted, mnist_classifier.accuracies * 100)
ax[0].set_xticks(xs)
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Accuracy (%)')

ax[1].plot(xs_shifted, mnist_classifier.costs)
ax[1].set_xticks(xs)
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Costs')

plt.show()
In [ ]: