# Explore the platform of the execution environment
# 実行環境のプラットホームを調べる
import os
import platform

is_colab = 'google.colab' in str(get_ipython())   # Google Colab
is_win = (os.name == 'nt')                        # Windows
is_mac = 'macOS' in str(platform.platform())      # mac


## [CHECK] If you get errors when running this page, please install specific versions of the packages.
### [CHECK] このページの実行でエラーが起きる場合は、特定のバージョンのパッケージをインストールしてください。

#! pip install tensorflow==2.8.2
#! pip install numpy==1.21.6
#! pip install matplotlib==3.2.2


if is_colab:
    %tensorflow_version 2.x
    # Install the specific versions of GPU Libraries
    !apt install --allow-change-held-packages libcudnn8=8.4.1.50-1+cuda11.6

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.
Reading package lists... Done
Building dependency tree       
Reading state information... Done
libcudnn8 is already the newest version (8.4.1.50-1+cuda11.6).
The following package was automatically installed and is no longer required:
  libnvidia-common-460
Use 'apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 18 not upgraded.


import tensorflow as tf
print(tf.__version__)

2.8.2


import numpy as np
print(np.__version__)

1.21.6


%matplotlib inline
import matplotlib.pyplot as plt

# verson check
import matplotlib
print(matplotlib.__version__)

3.2.2


FILE_PREFIX = '.'

if is_colab:
    from google.colab import drive
    drive.mount('/content/drive')

    FILE_PREFIX = '/content/drive/MyDrive/'
elif is_win or is_mac:
    FILE_PREFIX = os.path.join(os.path.expanduser('~'), 'Documents')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### [CHECK] If you want to change the value of the FILE_PREFIX variable, please do it here.

#FILE_PREFIX = "."        ### [CHECK] ###


### [CHECK] If you want to change the saving path, please change it here.

SAVE_PREFIX = os.path.join(FILE_PREFIX, "tf/styletransfer_run")

print(SAVE_PREFIX)

/content/drive/MyDrive/tf/styletransfer_run


# Setup

import tensorflow as tf
import numpy as np

base_image_path = tf.keras.utils.get_file(
    "paris.jpg",
    "https://i.imgur.com/F28w3Ac.jpg"
)
style_reference_image_path = tf.keras.utils.get_file(
    "starry_night.jpg",
    "https://i.imgur.com/9ooB60I.jpg"
)
result_prefix = "paris_generated"

# weights of different loss components
total_variation_weight = 1e-6
style_weight = 1e-6
content_weight = 2.5e-8

# Dimensions of the generated picture.
base_img = tf.keras.preprocessing.image.load_img(base_image_path)
width, height = base_img.size
img_nrows = 400
img_ncols = int(width * img_nrows / height)


style_img = tf.keras.preprocessing.image.load_img(style_reference_image_path)


# 画像を表示する
%matplotlib inline
import matplotlib.pyplot as plt

fig, ax = plt.subplots(2,1,figsize=(8,6*2))

ax[0].imshow(base_img)
ax[0].axis('off')

ax[1].imshow(style_img)
ax[1].axis('off')

plt.show()


# 画像の前処理および後処理のための関数
def preprocess_image(image_path,rows=400,cols=640):
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=(rows, cols))
    img = tf.keras.preprocessing.image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.vgg19.preprocess_input(img)
    return tf.convert_to_tensor(img)

def deprocess_image(x, rows=400, cols=640):
    x = x.reshape((rows, cols, 3))
    # Remove zero-center by mean pixel, VGG_MEAN=[103.939, 116.779, 123.68]
    x[:,:,0] += 103.939
    x[:,:,1] += 116.779
    x[:,:,2] += 123.68
    # 'BGR' -> 'RGB'
    x = x[:, :, ::-1]
    x = np.clip(x, 0, 255).astype('uint8')
    return x


def gram_matrix(x):
    x = tf.transpose(x, (2, 0, 1))   # channel, rows, cols
    features = tf.reshape(x, (tf.shape(x)[0], -1))  # channel, flat(rows x cols)
    gram = tf.matmul(features, tf.transpose(features))
    return gram

def style_loss(style, combination, rows=400, cols=640):
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = rows * cols
    return tf.reduce_sum(tf.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))

def content_loss(base, combination):
    return tf.reduce_sum(tf.square(combination - base))

def total_variation_loss(x, rows=400, cols=640):
    a = tf.square(
        x[:, :rows - 1, :cols-1, :] - x[:, 1:, :cols-1, :]   # 下のピクセルとの差
    )
    b = tf.square(
        x[:, :rows-1, :cols-1, :] - x[:, :rows-1, 1:, :]  # 右のピクセルとの差
    )
    return tf.reduce_sum(tf.pow(a+b, 1.25))


model = tf.keras.applications.vgg19.VGG19(weights='imagenet', include_top=False)

output_dict = dict([(layer.name, layer.output) for layer in model.layers])

feature_extractor = tf.keras.models.Model(inputs=model.inputs, outputs=output_dict)


model.summary()

Model: "vgg19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_2 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 block1_conv1 (Conv2D)       (None, None, None, 64)    1792      
                                                                 
 block1_conv2 (Conv2D)       (None, None, None, 64)    36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, None, None, 64)    0         
                                                                 
 block2_conv1 (Conv2D)       (None, None, None, 128)   73856     
                                                                 
 block2_conv2 (Conv2D)       (None, None, None, 128)   147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, None, None, 128)   0         
                                                                 
 block3_conv1 (Conv2D)       (None, None, None, 256)   295168    
                                                                 
 block3_conv2 (Conv2D)       (None, None, None, 256)   590080    
                                                                 
 block3_conv3 (Conv2D)       (None, None, None, 256)   590080    
                                                                 
 block3_conv4 (Conv2D)       (None, None, None, 256)   590080    
                                                                 
 block3_pool (MaxPooling2D)  (None, None, None, 256)   0         
                                                                 
 block4_conv1 (Conv2D)       (None, None, None, 512)   1180160   
                                                                 
 block4_conv2 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block4_conv3 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block4_conv4 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block4_pool (MaxPooling2D)  (None, None, None, 512)   0         
                                                                 
 block5_conv1 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block5_conv2 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block5_conv3 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block5_conv4 (Conv2D)       (None, None, None, 512)   2359808   
                                                                 
 block5_pool (MaxPooling2D)  (None, None, None, 512)   0         
                                                                 
=================================================================
Total params: 20,024,384
Trainable params: 20,024,384
Non-trainable params: 0
_________________________________________________________________


# style loss
style_layer_names = [
    'block1_conv1',
    'block2_conv1',
    'block3_conv1',
    'block4_conv1',
    'block5_conv1'
]

# content loss
content_layer_name = 'block5_conv2'

def compute_loss(combination_image, base_image, style_reference_image):
    input_tensor = tf.concat(
        [base_image, style_reference_image, combination_image], # 0, 1, 2
        axis=0
    )
    features = feature_extractor(input_tensor)
    
    # initialize the loss
    loss = tf.zeros(shape=())
    
    # add content loss
    layer_features = features[content_layer_name]
    base_image_features = layer_features[0, :, :, :]
    combination_features = layer_features[2, :, :, :]
    loss += content_weight * content_loss(
        base_image_features,
        combination_features
    )
   
    # add style loss
    for layer_name in style_layer_names:
        layer_features = features[layer_name]
        style_reference_features = layer_features[1, :, :, :]
        combination_features = layer_features[2, :, :, :]
        sl = style_loss(style_reference_features, combination_features, img_nrows, img_ncols)
        loss += (style_weight / len(style_layer_names)) * sl

    # add total variation loss
    loss += total_variation_weight * total_variation_loss(combination_image, img_nrows, img_ncols)

    return loss


@tf.function
def compute_loss_and_grads(combination_image, base_image, style_reference_image):
    with tf.GradientTape() as tape:
        loss = compute_loss(combination_image, base_image, style_reference_image)
    grads = tape.gradient(loss, combination_image)
    return loss, grads


optimizer = tf.keras.optimizers.SGD(
    tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate = 100.0,
        decay_steps=100,
        decay_rate=0.96
    )
)

base_image = preprocess_image(base_image_path, img_nrows, img_ncols)
style_reference_image = preprocess_image(style_reference_image_path, img_nrows, img_ncols)
combination_image = tf.Variable(preprocess_image(base_image_path, img_nrows, img_ncols))


import datetime

iterations = 4000

start_time = datetime.datetime.now()
for i in range(1, iterations+1):
    loss, grads =  compute_loss_and_grads(
        combination_image,
        base_image,
        style_reference_image
    )
    optimizer.apply_gradients([(grads, combination_image)])
    if i<10 or i % 100 == 0:
        elapsed_time = datetime.datetime.now() - start_time
        print(f'{i}/{iterations} loss={loss:.3f} {elapsed_time}')
        img = deprocess_image(combination_image.numpy(),img_nrows, img_ncols)
        if SAVE_PREFIX != '' and not os.path.exists(SAVE_PREFIX): os.makedirs(SAVE_PREFIX)
        tf.keras.preprocessing.image.save_img(os.path.join(SAVE_PREFIX, f'{result_prefix}_{i}.png'), img)
        model.save_weights(os.path.join(SAVE_PREFIX, f'weights_{i}.h5'))

1/4000 loss=129682.633 0:00:01.149144
2/4000 loss=88181.367 0:00:02.910192
3/4000 loss=61806.898 0:00:04.398269
4/4000 loss=47752.000 0:00:10.524030
5/4000 loss=40212.648 0:00:17.073770
6/4000 loss=35858.645 0:00:23.019150
7/4000 loss=33105.453 0:00:25.040859
8/4000 loss=31132.023 0:00:32.059365
9/4000 loss=29602.184 0:00:37.971021
100/4000 loss=11018.412 0:00:45.993662
200/4000 loss=8514.305 0:00:54.847238
300/4000 loss=7571.876 0:01:07.920508
400/4000 loss=7064.247 0:01:16.529676
500/4000 loss=6736.470 0:01:25.113611
600/4000 loss=6501.880 0:01:33.521652
700/4000 loss=6323.258 0:01:42.468605
800/4000 loss=6181.448 0:01:51.694051
900/4000 loss=6065.218 0:02:01.481402
1000/4000 loss=5967.510 0:02:10.867779
1100/4000 loss=5884.394 0:02:19.447000
1200/4000 loss=5812.719 0:02:29.956758
1300/4000 loss=5750.338 0:02:43.472120
1400/4000 loss=5695.677 0:02:52.073777
1500/4000 loss=5647.251 0:03:05.595724
1600/4000 loss=5604.086 0:03:19.349829
1700/4000 loss=5565.345 0:03:33.077539
1800/4000 loss=5530.551 0:03:41.716354
1900/4000 loss=5499.004 0:03:55.812361
2000/4000 loss=5470.346 0:04:04.402788
2100/4000 loss=5444.279 0:04:13.045851
2200/4000 loss=5420.424 0:04:21.637045
2300/4000 loss=5398.479 0:04:35.719099
2400/4000 loss=5378.232 0:04:44.319641
2500/4000 loss=5359.581 0:04:58.410412
2600/4000 loss=5342.394 0:05:07.233716
2700/4000 loss=5326.505 0:05:21.431637
2800/4000 loss=5311.761 0:05:30.048280
2900/4000 loss=5298.058 0:05:44.231710
3000/4000 loss=5285.284 0:05:52.824746
3100/4000 loss=5273.375 0:06:01.459197
3200/4000 loss=5262.232 0:06:15.210375
3300/4000 loss=5251.796 0:06:23.811187
3400/4000 loss=5242.021 0:06:32.486263
3500/4000 loss=5232.848 0:06:46.351155
3600/4000 loss=5224.250 0:07:00.320290
3700/4000 loss=5216.194 0:07:14.161143
3800/4000 loss=5208.595 0:07:22.754298
3900/4000 loss=5201.430 0:07:36.649543
4000/4000 loss=5194.661 0:07:50.508291


# 作成された画像を表示する

%matplotlib inline
import matplotlib.pyplot as plt

indices = [1, 9, 100, 500, 1000, 2000, 3000, 4000]
n = len (indices)

fig, ax = plt.subplots(n, 1, figsize=(5.99*4, 4.00*4 * n))

for i in range(n):
    fname = f'{result_prefix}_{indices[i]}.png'
    img = tf.keras.preprocessing.image.load_img(os.path.join(SAVE_PREFIX, fname))
    ax[i].imshow(img)
    ax[i].axis('off')
    ax[i].set_title(fname)
    
plt.show()

事前準備¶

保存に関する設定¶

5.5 ニューラルスタイル変換¶

5.5.1 コンテンツ損失¶

5.5.2 スタイル損失¶

5.5.3 全変動損失¶

Neural style transfer¶

Introduction¶

画像の前処理と後処理¶

スタイル損失の計算¶

VGG19の途中までの層を抜き出す¶

loss と gradient の計算に tf.function デコレータを追加する¶

training¶