Updated 21/Nov/2021 by Yoshihisa Nitta
Train Variational Auto Encoder further on CelebA dataset. It is assumed that it is in the state after executing VAE_CelebA_Train.ipynb.
CelebA データセットに対して変分オートエンコーダをさらに学習させる。 VAE_CelebA_Train.ipynb を実行した後の状態であることを前提としている。
#! pip install tensorflow==2.7.0
%tensorflow_version 2.x
import tensorflow as tf
print(tf.__version__)
! nvidia-smi
! cat /proc/cpuinfo
! cat /etc/issue
! free -h
from google.colab import drive
drive.mount('/content/drive')
! ls /content/drive
Basically, gdown
from Google Drive.
Download from nw.tsuda.ac.jp above only if the specifications of Google Drive change and you cannot download from Google Drive.
基本的に Google Drive から gdown
してください。
Google Drive の仕様が変わってダウンロードができない場合にのみ、nw.tsuda.ac.jp からダウンロードしてください。
# Download source file
nw_path = './nw'
! rm -rf {nw_path}
! mkdir -p {nw_path}
if True: # from Google Drive
url_model = 'https://drive.google.com/uc?id=1ZCihR7JkMOity4wCr66ZCp-3ZOlfwwo3'
! (cd {nw_path}; gdown {url_model})
else: # from nw.tsuda.ac.jp
URL_NW = 'https://nw.tsuda.ac.jp/lec/GoogleColab/pub'
url_model = f'{URL_NW}/models/VariationalAutoEncoder.py'
! wget -nd {url_model} -P {nw_path}
! cat {nw_path}/VariationalAutoEncoder.py
Official WWW of CelebA dataset: https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html
Google Drive of CelebA dataset: https://drive.google.com/drive/folders/0B7EVK8r0v71pWEZsZE9oNnFzTm8?resourcekey=0-5BR16BdXnb8hVj6CNHKzLg
img_align_celeba.zip mirrored on my Google Drive:
https://drive.google.com/uc?id=1LFKeoI-hb96jlV0K10dO1o04iQPBoFdx
CelebA データセットの公式ページ: https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html
CelebA データセットのGoogle Drive: https://drive.google.com/drive/folders/0B7EVK8r0v71pWEZsZE9oNnFzTm8?resourcekey=0-5BR16BdXnb8hVj6CNHKzLg
自分の Google Drive 上にミラーした img_align_celeba.zip:
https://drive.google.com/uc?id=1LFKeoI-hb96jlV0K10dO1o04iQPBoFdx
# Download img_align_celeba.zip from GoogleDrive
MIRRORED_URL = 'https://drive.google.com/uc?id=1LFKeoI-hb96jlV0K10dO1o04iQPBoFdx'
! gdown {MIRRORED_URL}
! ls -l
DATA_DIR = 'data'
DATA_SUBDIR = 'img_align_celeba'
! rm -rf {DATA_DIR}
! unzip -d {DATA_DIR} -q {DATA_SUBDIR}.zip
! ls -l {DATA_DIR}/{DATA_SUBDIR} | head
! ls {DATA_DIR}/{DATA_SUBDIR} | wc
# paths to all the image files.
import os
import glob
import numpy as np
all_file_paths = np.array(glob.glob(os.path.join(DATA_DIR, DATA_SUBDIR, '*.jpg')))
n_all_images = len(all_file_paths)
print(n_all_images)
# slect some image files.
n_to_show = 10
selected_indices = np.random.choice(range(n_all_images), n_to_show)
selected_paths = all_file_paths[selected_indices]
# Display some images.
%matplotlib inline
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, n_to_show, figsize=(1.4 * n_to_show, 1.4))
for i, path in enumerate(selected_paths):
img = tf.keras.preprocessing.image.load_img(path)
ax[i].imshow(img)
ax[i].axis('off')
plt.show()
TRAIN_DATA_DIR = 'train_data'
TEST_DATA_DIR = 'test_data'
import os
split = 0.05
indices = np.arange(n_all_images)
np.random.shuffle(indices)
train_indices = indices[: -int(n_all_images * split)]
test_indices = indices[-int(n_all_images * split):]
! rm -rf {TRAIN_DATA_DIR} {TEST_DATA_DIR}
dst=f'{TRAIN_DATA_DIR}/celeba'
if not os.path.exists(dst):
os.makedirs(dst)
for idx in train_indices:
path = all_file_paths[idx]
dpath, fname = os.path.split(path)
os.symlink(f'../../{path}', f'{dst}/{fname}')
dst=f'{TEST_DATA_DIR}/celeba'
if not os.path.exists(dst):
os.makedirs(dst)
for idx in test_indices:
path = all_file_paths[idx]
dpath, fname = os.path.split(path)
os.symlink(f'../../{path}', f'{dst}/{fname}')
INPUT_DIM = (128, 128, 3)
BATCH_SIZE = 32
data_gen = tf.keras.preprocessing.image.ImageDataGenerator(
rescale = 1.0/255
)
data_flow = data_gen.flow_from_directory(
TRAIN_DATA_DIR,
target_size = INPUT_DIM[:2],
batch_size = BATCH_SIZE,
shuffle=True,
class_mode = 'input'
)
val_data_flow = data_gen.flow_from_directory(
TEST_DATA_DIR,
target_size = INPUT_DIM[:2],
batch_size = BATCH_SIZE,
shuffle=True,
class_mode = 'input'
)
print(len(data_flow))
print(len(val_data_flow))
save_path3 = '/content/drive/MyDrive/ColabRun/VAE_CelebA03/'
# Load the parameters and model weights saved before
# 保存したパラメータと重みを読み込む
from nw.VariationalAutoEncoder import VariationalAutoEncoder
vae3 = VariationalAutoEncoder.load(save_path3)
print(vae3.epoch)
import os
import pickle
var_path = f'{save_path3}/loss_{vae3.epoch-1}.pkl'
with open(var_path, 'rb') as f:
loss3_1, rloss3_1, kloss3_1, val_loss3_1, val_rloss3_1, val_kloss3_1 = pickle.load(f)
print(len(loss3_1))
LEARNING_RATE = 0.0005
# initial_learning_rate * decay_rate ^ (step // decay_steps)
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate = LEARNING_RATE,
decay_steps = len(data_flow),
decay_rate=0.96
)
optimizer3 = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
log3_2 = vae3.train_tf_generator(
data_flow,
epochs = 200,
run_folder = save_path3,
optimizer = optimizer3,
save_epoch_interval = 50,
validation_data_flow = val_data_flow
)
loss3_2 = log3_2['loss']
rloss3_2 = log3_2['reconstruction_loss']
kloss3_2 = log3_2['kl_loss']
val_loss3_2 = log3_2['val_loss']
val_rloss3_2 = log3_2['val_reconstruction_loss']
val_kloss3_2 = log3_2['val_kl_loss']
loss3 = np.concatenate([loss3_1, loss3_2], axis=0)
rloss3 = np.concatenate([rloss3_1, rloss3_2], axis=0)
kloss3 = np.concatenate([kloss3_1, kloss3_2], axis=0)
val_loss3 = np.concatenate([val_loss3_1, val_loss3_2], axis=0)
val_rloss3 = np.concatenate([val_rloss3_1, val_rloss3_2], axis=0)
val_kloss3 = np.concatenate([val_kloss3_1, val_kloss3_2], axis=0)
VariationalAutoEncoder.plot_history(
[loss3, val_loss3],
['total_loss', 'val_total_loss']
)
VariationalAutoEncoder.plot_history(
[rloss3, val_rloss3],
['reconstruction_loss', 'val_reconstruction_loss']
)
VariationalAutoEncoder.plot_history(
[kloss3, val_kloss3],
['kl_loss', 'val_kl_loss']
)
x_, _ = next(val_data_flow)
selected_images = x_[:10]
z_mean3, z_log_var3, z3 = vae3.encoder(selected_images)
reconst_images3 = vae3.decoder(z3).numpy() # decoder() returns Tensor for @tf.function declaration. Convert the Tensor to numpy array.
txts3 = [f'{p[0]:.3f}, {p[1]:.3f}' for p in z3 ]
%matplotlib inline
VariationalAutoEncoder.showImages(selected_images, reconst_images3, txts3, 1.4, 1.4)
# Save loss variables for future training
# 将来の学習のために loss 変数をセーブしておく
import os
import pickle
var_path = f'{save_path3}/loss_{vae3.epoch-1}.pkl'
dpath, fname = os.path.split(var_path)
if dpath != '' and not os.path.exists(dpath):
os.makedirs(dpath)
with open(var_path, 'wb') as f:
pickle.dump([
loss3,
rloss3,
kloss3,
val_loss3,
val_rloss3,
val_kloss3
], f)