In [1]:
# [自分へのメモ] music21 を使うのに必要なコード
import os
from music21 import environment

if os.name == 'nt':
    us = environment.UserSettings()
    us['musescoreDirectPNGPath']='C:/Program Files/MuseScore 3/bin/MuseScore3.exe'
    us['musicxmlPath']='C:/Program Files/MuseScore 3/bin/MuseScore3.exe'

7.7 MuseGAN の解析

MuseGANを使って楽譜を生成し、その後、出力への影響を見るために入力のノイズパラメータを変更して実験を行う。

Generatorの出力は、最終層が $activation=\tanh$であるため $[-1, 1]$ の範囲の値となる。 これを各トラックの一つの音符に変換するため、84音程のすべての中で最大値を持つ音符を選択する。 MuseGANの原論文では、各トラックが複数の音符を持てるように閾値0を使っている、とのこと。 しかし、この本の例では、単に最大値を採用して、トラック毎に、各タイムステップ毎に一つの音符を生成している。

Data

=============================== Start of code from gdl_ch07_04 ===============================

In [2]:
import os

DATA_PATH = '../local/GDL_code/data/'
SECTION = 'compose'
DATA_NAME = 'chorales'
FILENAME = 'Jsb16thSeparated.npz'

DATAFILE_PATH = os.path.join(DATA_PATH, SECTION, DATA_NAME, FILENAME)

RUN_FOLDER=f'run/{SECTION}/{DATA_NAME}'
In [3]:
# GDL_code/utils/loaders.py
# [自分へのメモ 1] 元コードでは load_music(path, filename, bars, steps_per_bar) を指定するが、ここではloadだけを行う関数を用意した。
# [自分へのメモ 2] 元コードではパス './data/' を足してロードするが、ここでは f'{data_path}/{filename}' をロードするように変更した。
# また、パスとファイル名をまとめて関数に渡すように変更した。
# [自分へのメモ 3]「小節」を意味する英語:は bar, measure 
# [自分へのメモ 4] numpy load() の返り値について
#  引数が.npy ファイルのとき a single array
#  引数が .npz ファイルのとき 辞書オブジェクト {filename: array} key-value pairs。このとき f[key]形式でデータにアクセスできる。

import os
import numpy as np

def get_music_data(file_path):
    with np.load(file_path, encoding='bytes', allow_pickle=True) as f:
        for key in f.keys():
            print(key)
        train_data = f['train']
        val_data = f['valid']
        test_data = f['test']
    return train_data, val_data, test_data
In [4]:
train_data, val_data, test_data = get_music_data(DATAFILE_PATH)
test
train
valid
In [5]:
# [自分へのメモ]
# np.any(x) : 引数xのいずれかが Trueならば True
# np.isnan(x): 引数xを要素ごとにnanであるか、真理値の配列を返す
# np.delete(arr, object, axis=None) : 配列から行や列を削除する

def make_data(data, n_bars, n_steps_per_bar):  # data = train_data
    data_ints = []
    for x in data:
        counter = 0
        cont = True
        while cont:    # 4声なので各timestepに4個データがある。休みはNaN。4声にひとつもNaNでなければループから抜ける
            if not np.any(np.isnan(x[counter:(counter+4)])):
                cont = False
            else:
                counter += 4
                
        if n_bars * n_steps_per_bar < x.shape[0]:  # 必要とする長さだけデータがあれば追加する
            data_ints.append(x[counter:(counter+(n_bars * n_steps_per_bar)), :])
    
    data_ints = np.array(data_ints)
    
    n_songs = data_ints.shape[0]
    n_tracks = data_ints.shape[2]
    
    data_ints = data_ints.reshape([n_songs, n_bars, n_steps_per_bar, n_tracks])
    
    max_note = 83  # 普通の音程は83まで。84以上の音程はNaN
    
    where_are_NaNs = np.isnan(data_ints) # 万が一NaNがあれば、数値を割り当てておく
    data_ints[where_are_NaNs] = max_note + 1  # NaNに割り当てた値
    max_note = max_note + 1
    
    data_ints = data_ints.astype(int)
    
    num_classes = max_note + 1
    
    data_binary = np.eye(num_classes)[data_ints]
    data_binary[data_binary == 0] = -1
    data_binary = np.delete(data_binary, max_note, -1)
    
    data_binary = data_binary.transpose([0,1,2,4,3])
    
    return data_binary, data_ints, data
In [6]:
BATCH_SIZE = 64
n_bars = 2
n_steps_per_bar = 16
n_pitches = 84
n_tracks = 4
In [7]:
data_binary, data_ints, raw_data = make_data(train_data, n_bars, n_steps_per_bar)
print(data_binary.shape)
(229, 2, 16, 84, 4)
In [8]:
data_binary = np.squeeze(data_binary)   # 意味がない次元(size=1)を削除する
print(data_binary.shape)    # 変わらず
(229, 2, 16, 84, 4)
In [9]:
# GDL_code/models/MuseGAN.py
import numpy as np
import tensorflow as tf
tf.compat.v1.disable_eager_execution()

from functools import partial

import matplotlib.pyplot as plt

import music21
import random
import os
import pickle

import datetime


def grad(y, x):
    V = tf.keras.layers.Lambda(
        lambda z: tf.keras.backend.gradients(z[0], z[1]),
        output_shape=[1]
    )([y, x])
    return V


class RandomWeightedAverage(tf.keras.layers.Layer):
    def __init__(self, batch_size):
        super().__init__()
        self.batch_size = batch_size

    def call(self, inputs):
        alpha = tf.keras.backend.random_uniform((self.batch_size, 1, 1, 1, 1))
        return (alpha * inputs[0]) + ((1 - alpha) * inputs[1])

    
class MuseGAN():
    
    def __init__(
            self,
            input_dim,
            critic_learning_rate,
            generator_learning_rate,
            optimizer,                 # [nitta's memo] spelling
            grad_weight,
            z_dim,
            batch_size,
            n_tracks,
            n_bars,
            n_steps_per_bar,
            n_pitches):
        self.name = 'MuseGAN'
        
        self.input_dim = input_dim
        self.critic_learning_rate = critic_learning_rate
        self.generator_learning_rate = generator_learning_rate
        self.optimizer = optimizer
        self.grad_weight = grad_weight
        self.z_dim = z_dim
        self.batch_size = batch_size
        self.n_tracks = n_tracks
        self.n_bars = n_bars
        self.n_steps_per_bar = n_steps_per_bar
        self.n_pitches = n_pitches

        self.weight_init = tf.keras.initializers.RandomNormal(mean=0., stddev=0.02) # he_normal

        self.d_losses = []
        self.g_losses = []
        self.epoch = 0

        self._build_critic()
        self._build_generator()
        self._build_adversarial()

        
    def gradient_penalty_loss(self, y_true, y_pred, interpolated_samples):
        '''
        Computes gradient penalty based on prediction and weighted_real/fake_samples
        '''
        gradients = grad(y_pred, interpolated_samples)[0]
        gradients_sqr = tf.keras.backend.square(gradients)
        gradients_sqr_sum = tf.keras.backend.sum(
            gradients_sqr,
            axis=np.arange(1, len(gradients_sqr.shape))
        )
        gradient_12_norm = tf.keras.backend.sqrt(gradients_sqr_sum)
        gradient_penalty = tf.keras.backend.square(1 - gradient_12_norm)
        return tf.keras.backend.mean(gradient_penalty)

    
    def wasserstein(self, y_true, y_pred):
        return - tf.keras.backend.mean(y_true * y_pred)


    def get_activation(self, activation):
        if activation == 'leaky_relu':
            layer = tf.keras.layers.LeakyReLU(alpha=0.2)
        else:
            layer = tf.keras.layers.Activation(activation)
        return layer
        

    def conv(self, x, f, k, s, a, p):
        x = tf.keras.layers.Conv3D(
            filters = f,
            kernel_size = k,
            padding = p,
            strides = s,
            kernel_initializer = self.weight_init
        )(x)
        if a == 'relu':
            x = tf.keras.layers.Activation(a)(x)
        elif a == 'lrelu':
            x = tf.keras.layers.LeakyReLU()(x)
        return x

    
    def _build_critic(self):
        critic_input = tf.keras.layers.Input(shape=self.input_dim, name='critic_input')

        x = critic_input
        x = self.conv(x, f=128, k=(2,1,1), s=(1,1,1), a='lrelu', p='valid')
        x = self.conv(x, f=128, k=(self.n_bars-1,1,1), s=(1,1,1), a='lrelu', p='valid')
        x = self.conv(x, f=128, k=(1,1,12), s=(1,1,12), a='lrelu', p='same')
        x = self.conv(x, f=128, k=(1,1,7), s=(1,1,7), a='lrelu', p='same')
        x = self.conv(x, f=128, k=(1,2,1), s=(1,2,1), a='lrelu', p='same')
        x = self.conv(x, f=128, k=(1,2,1), s=(1,2,1), a='lrelu', p='same')
        x = self.conv(x, f=256, k=(1,4,1), s=(1,2,1), a='lrelu', p='same')
        x = self.conv(x, f=512, k=(1,3,1), s=(1,2,1), a='lrelu', p='same')

        x = tf.keras.layers.Flatten()(x)
        x = tf.keras.layers.Dense(1024, kernel_initializer=self.weight_init)(x)
        x = tf.keras.layers.LeakyReLU()(x)
        critic_output = tf.keras.layers.Dense(1, activation=None, kernel_initializer=self.weight_init)(x) # ??? [nitta's memo] why not activation='softmax' ?

        self.critic = tf.keras.models.Model(critic_input, critic_output)


    def conv_t(self, x, f, k, s, a, p, bn):
        x = tf.keras.layers.Conv2DTranspose(
            filters=f,
            kernel_size=k,
            padding=p,
            strides=s,
            kernel_initializer=self.weight_init
            )(x)

        if bn:
            x = tf.keras.layers.BatchNormalization(momentum=0.9)(x)

        if a == 'relu':
            x = tf.keras.layers.Activation(a)(x)
        elif a == 'lrelu':
            x = tf.keras.layers.LeakyReLU()(x)

        return x
        

    def TemporalNetwork(self):
        input_layer = tf.keras.layers.Input(shape=(self.z_dim,), name='temporal_input')

        x = tf.keras.layers.Reshape([1,1,self.z_dim])(input_layer)
        x = self.conv_t(x, f=1024, k=(2,1), s=(1,1), a='relu', p='valid', bn=True)
        x = self.conv_t(x, f=self.z_dim, k=(self.n_bars-1,1), s=(1,1), a='relu', p='valid', bn=True)

        output_layer = tf.keras.layers.Reshape([self.n_bars, self.z_dim])(x)

        return tf.keras.models.Model(input_layer, output_layer)

    
    def BarGenerator(self):
        input_layer = tf.keras.layers.Input(shape=(self.z_dim*4,), name='bar_generator_input')

        x = tf.keras.layers.Dense(1024)(input_layer)
        x = tf.keras.layers.BatchNormalization(momentum=0.9)(x)
        x = tf.keras.layers.Activation('relu')(x)

        x = tf.keras.layers.Reshape([2,1,512])(x)
        x = self.conv_t(x, f=512, k=(2,1), s=(2,1), a='relu', p='same', bn=True)
        x = self.conv_t(x, f=256, k=(2,1), s=(2,1), a='relu', p='same', bn=True)
        x = self.conv_t(x, f=256, k=(2,1), s=(2,1), a='relu', p='same', bn=True)
        x = self.conv_t(x, f=256, k=(1,7), s=(1,7), a='relu', p='same', bn=True)
        x = self.conv_t(x, f=1, k=(1,12), s=(1,12), a='tanh', p='same', bn=False)

        output_layer = tf.keras.layers.Reshape([1, self.n_steps_per_bar, self.n_pitches, 1])(x)

        return tf.keras.models.Model(input_layer, output_layer)


    def _build_generator(self):
        chords_input = tf.keras.layers.Input(shape=(self.z_dim,), name='chords_input')
        style_input = tf.keras.layers.Input(shape=(self.z_dim,), name='style_input')
        melody_input = tf.keras.layers.Input(shape=(self.n_tracks, self.z_dim), name='melody_input')
        groove_input = tf.keras.layers.Input(shape=(self.n_tracks, self.z_dim), name='groove_input')

        # chords -> temporal network
        self.chords_tempNetwork = self.TemporalNetwork()
        chords_over_time = self.chords_tempNetwork(chords_input)  # [n_bars, z_dim]

        # melody -> temporal network
        melody_over_time = [None] * self.n_tracks
        self.melody_tempNetwork = [None] * self.n_tracks
        for track in range(self.n_tracks):
            self.melody_tempNetwork[track] = self.TemporalNetwork()
            melody_track = tf.keras.layers.Lambda(lambda x: x[:,track,:])(melody_input)
            melody_over_time[track] = self.melody_tempNetwork[track](melody_track)

        # create bar generator for each track
        self.barGen = [None] * self.n_tracks
        for track in range(self.n_tracks):
            self.barGen[track] = self.BarGenerator()

        # create output for every track and bar
        bars_output = [None] * self.n_bars
        for bar in range(self.n_bars):
            track_output = [None] * self.n_tracks
            c = tf.keras.layers.Lambda(
                lambda x: x[:,bar,:], 
                name='chords_input_bar_' + str(bar)
            )(chords_over_time)
            s = style_input

            for track in range(self.n_tracks):
                m = tf.keras.layers.Lambda(
                    lambda x: x[:, bar,:]
                )(melody_over_time[track])
                g = tf.keras.layers.Lambda(
                    lambda x: x[:, track, :]
                )(groove_input)
                z_input = tf.keras.layers.Concatenate(
                    axis=1, 
                    name=f'total_input_bar_{bar}_track_{track}'
                )([c,s,m,g])
                track_output[track] = self.barGen[track](z_input)

            bars_output[bar] = tf.keras.layers.Concatenate(axis=-1)(track_output)

        generator_output = tf.keras.layers.Concatenate(axis=1, name='concat_bars')(bars_output)
        self.generator = tf.keras.models.Model(
            [chords_input, style_input, melody_input, groove_input], 
            generator_output
        )


    def get_opti(self, lr):
        if self.optimizer == 'adam':
            opti = tf.keras.optimizers.Adam(lr=lr, beta_1=0.5, beta_2=0.9)
        elif self.optimizer == 'rmsprop':
            opti = tf.keras.optimizers.RMSprop(lr=lr)
        else :
            opti = tf.keras.optimizers.Adam(lr=lr)
        return opti


    def set_trainable(self, m, val):
        m.trainable = val
        for layer in m.layers:
            layer.trainable = val


    def _build_adversarial(self):

        #############################
        # Critic
        #############################
        self.set_trainable(self.generator, False)

        # real
        real_img = tf.keras.layers.Input(shape=self.input_dim)

        # fake
        chords_input = tf.keras.layers.Input(shape=(self.z_dim,), name='chords_input')
        style_input = tf.keras.layers.Input(shape=(self.z_dim,), name='style_input')
        melody_input = tf.keras.layers.Input(shape=(self.n_tracks, self.z_dim), name='melody_input')
        groove_input = tf.keras.layers.Input(shape=(self.n_tracks, self.z_dim), name='groove_input')

        fake_img = self.generator([chords_input, style_input, melody_input, groove_input])

        #----
        # critic's predicts
        #----
        fake = self.critic(fake_img)
        valid = self.critic(real_img)

        # weighted average between real and fake images
        interpolated_img = RandomWeightedAverage(self.batch_size)([real_img, fake_img])
        validity_interpolated = self.critic(interpolated_img)

        # partial loss function
        self.partial_gp_loss = partial(
            self.gradient_penalty_loss,
            interpolated_samples=interpolated_img
        )
        self.partial_gp_loss.__name__ = 'gradient_penalty'

        self.critic_model = tf.keras.models.Model(
            inputs=[real_img, chords_input, style_input, melody_input, groove_input],
            outputs=[valid, fake, validity_interpolated]
        )
        self.critic_model.compile(
            loss=[self.wasserstein, self.wasserstein, self.partial_gp_loss],
            optimizer=self.get_opti(self.critic_learning_rate),
            loss_weights=[1, 1, self.grad_weight]
        )

        #############################
        # Generator
        #############################
        self.set_trainable(self.critic, False)
        self.set_trainable(self.generator, True)

        chords_input = tf.keras.layers.Input(shape=(self.z_dim,), name='chords_input')
        style_input = tf.keras.layers.Input(shape=(self.z_dim,), name='style_input')
        melody_input = tf.keras.layers.Input(shape=(self.n_tracks, self.z_dim), name='melody_input')
        groove_input = tf.keras.layers.Input(shape=(self.n_tracks, self.z_dim), name='groove_input')

        # generate image
        img = self.generator([chords_input, style_input, melody_input, groove_input])
        # critic for the image
        model_output = self.critic(img)

        self.model = tf.keras.models.Model(
            [chords_input, style_input, melody_input, groove_input],
            model_output
        )
        self.model.compile(
            optimizer=self.get_opti(self.generator_learning_rate),
            loss=self.wasserstein
        )

        self.set_trainable(self.critic, True)


    def train_critic(self, x_train, batch_size, using_generator):

        valid = np.ones((batch_size, 1), dtype=np.float32)
        fake = -np.ones((batch_size, 1), dtype=np.float32)
        dummy = np.zeros((batch_size, 1), dtype=np.float32) # dummy gt for gradient penalty

        if using_generator:
            true_imgs = next(x_train)[0]
            if true_imgs.shape[0] != batch_size:
                true_imgs = next(x_train)[0]
        else:
            idx = np.random.randint(0, x_train.shape[0], batch_size) # low, high, size
            true_imgs = x_train[idx]

        chords_noise = np.random.normal(0, 1, (batch_size, self.z_dim))
        style_noise = np.random.normal(0, 1, (batch_size, self.z_dim))
        melody_noise = np.random.normal(0, 1, (batch_size, self.n_tracks, self.z_dim))
        groove_noise = np.random.normal(0, 1, (batch_size, self.n_tracks, self.z_dim))

        d_loss = self.critic_model.train_on_batch(
            [true_imgs, chords_noise, style_noise, melody_noise, groove_noise],
            [valid, fake, dummy]
        )

        return d_loss


    def train_generator(self, batch_size):
        valid = np.ones((batch_size, 1), dtype=np.float32)

        chords_noise = np.random.normal(0, 1, (batch_size, self.z_dim))
        style_noise = np.random.normal(0, 1, (batch_size, self.z_dim))
        melody_noise = np.random.normal(0, 1, (batch_size, self.n_tracks, self.z_dim))
        groove_noise = np.random.normal(0, 1, (batch_size, self.n_tracks, self.z_dim))

        return self.model.train_on_batch(
            [chords_noise, style_noise, melody_noise, groove_noise],
            valid
        )


    def train(self, 
              x_train, 
              batch_size, 
              epochs, 
              run_folder, 
              print_every_n_batches=10, 
              n_critic=5, 
              using_generator=False):
        start_time = datetime.datetime.now()
        for epoch in range(self.epoch+1, self.epoch+epochs+1):  # [nitta] changed for adding 1
            if epoch % 100 == 0:
                critic_loops = 5
            else:
                critic_loops = n_critic

            for _ in range(critic_loops):
                d_loss = self.train_critic(x_train, batch_size, using_generator)

            g_loss = self.train_generator(batch_size)
            
            elapsed_time = datetime.datetime.now() - start_time

            print(f'{epoch} {critic_loops} [D loss: {d_loss[0]:.3f} R {d_loss[1]:.3f} F {d_loss[2]:.3f} G {d_loss[3]:.3f}] [G loss: {g_loss:.3f}] {elapsed_time}');

            self.d_losses.append(d_loss)
            self.g_losses.append(g_loss)

            if epoch % print_every_n_batches == 0:
                self.sample_images(run_folder)
                self.generator.save_weights(os.path.join(run_folder, 'weights/weights-g.h5'))
                self.generator.save_weights(os.path.join(run_folder, f'weights/weights-g-{epoch}.h5'))
                self.critic.save_weights(os.path.join(run_folder, 'weights/weights-c.h5'))
                self.critic.save_weights(os.path.join(run_folder, f'weights/weights-c-{epoch}.h5'))
                self.save_model(run_folder)

            self.epoch += 1


    def sample_images(self, run_folder):
        r = 5

        chords_noise = np.random.normal(0, 1, (r, self.z_dim))
        style_noise = np.random.normal(0, 1, (r, self.z_dim))
        melody_noise = np.random.normal(0, 1, (r, self.n_tracks, self.z_dim))
        groove_noise = np.random.normal(0, 1, (r, self.n_tracks, self.z_dim))

        gen_scores = self.generator.predict([chords_noise, style_noise, melody_noise, groove_noise])
        np.save(os.path.join(run_folder, f'images/sample_{self.epoch}.npy'), gen_scores)
        self.notes_to_midi(run_folder, gen_scores)
        

    def binaries_output(self, output):  # [memo] spelling of 'binaries' is wrong in the source code.
        # output is a set of score: [batch_size, steps, pitches, tracks]
        max_pitches = np.argmax(output, axis=3)
        return max_pitches


    def notes_to_midi(self, run_folder, output, filename=None):
        for score_num in range(len(output)):
            max_pitches = self.binaries_output(output)
            midi_note_score = max_pitches[score_num].reshape([
                self.n_bars * self.n_steps_per_bar,
                self.n_tracks
            ])
            parts = music21.stream.Score()
            parts.append(music21.tempo.MetronomeMark(number=66))

            for i in range(self.n_tracks):
                last_x = int(midi_note_score[:,i][0])
                s = music21.stream.Part()
                dur = 0

                for idx, x in enumerate(midi_note_score[:, i]):
                    x = int(x)

                    if (x != last_x or idx % 4 == 0) and idx > 0:
                        n = music21.note.Note(last_x)
                        n.duration = music21.duration.Duration(dur)
                        s.append(n)
                        dur = 0

                    last_x = x
                    dur = dur + 0.25

                n = music21.note.Note(last_x)
                n.duration = music21.duration.Duration(dur)
                s.append(n)

                parts.append(s)

            if filename is None:
                parts.write('midi', fp=os.path.join(run_folder, f'samples/sample_{self.epoch}_{score_num}.midi'))
            else:
                parts.write('midi', fp=os.path.join(run_folder, f'samples/{filename}.midi'))


    def plot_model(self, run_folder):
        tf.keras.utils.plot_model(
            self.model, 
            to_file=os.path.join(run_folder, 'viz/mode.png'),
            show_shapes=True,
            show_layer_names=True
        )
        tf.keras.utils.plot_model(
            self.critic, 
            to_file=os.path.join(run_folder, 'viz/critic.png'),
            show_shapes=True,
            show_layer_names=True
        )
        tf.keras.utils.plot_model(
            self.generator, 
            to_file=os.path.join(run_folder, 'viz/generator.png'),
            show_shapes=True,
            show_layer_names=True
        )


    def save(self, folder):

        with open(os.path.join(folder, 'params.pkl'), 'wb') as f:
            pickle.dump([
                self.input_dim,
                self.critic_learning_rate,
                self.generator_learning_rate,
                self.optimizer,
                self.grad_weight,
                self.z_dim,
                self.batch_size,
                self.n_tracks,
                self.n_bars,
                self.n_steps_per_bar,
                self.n_pitches
            ], f)

        self.plot_model(folder)


    def save_model(self, run_folder):
        self.model.save(os.path.join(run_folder, 'model.h5'))
        self.critic.save(os.path.join(run_folder, 'critic.h5'))
        self.generator.save(os.path.join(run_folder, 'generator.h5'))


    def load_weights(self, run_folder, epoch=None):

        if epoch is None:

            self.generator.load_weights(os.path.join(run_folder, 'weights', 'weights-g.h5'))
            self.critic.load_weights(os.path.join(run_folder, 'weights', 'weights-c.h5'))
        else:
            self.generator.load_weights(os.path.join(run_folder, 'weights', f'weights-g-{self.epoch}.h5'))
            self.critic.load_weights(os.path.join(run_folder, 'weights', f'weights-c-{self.epoch}.h5'))


    def draw_bar(self, data, score_num, bar, part):
        fig, ax = plt.subplots(1, 1)
        ax.imshow(
            data[score_num, bar, :, :, part].transpose([1,0]), 
            origin='lower',
            cmap='Greys',
            vmin=-1,
            vmax=1
        )
        plt.show()

        
    def draw_score(self, data, score_num):
        fig, ax = plt.subplots(self.n_tracks, self.n_bars, figsize=(12, 8), sharey=True, sharex=True)
        #plt.subplots_adjust(0, 0, 0.2, 1.5, 0, 0)   # left, bottom, right, top, wspace, hspace
        for bar in range(self.n_bars):
            for track in range(self.n_tracks):
                if self.n_bars > 1:
                    ax[track][bar].imshow(
                        data[score_num, bar, :, :, track].transpose([1,0]),
                        origin='lower',
                        cmap='Greys',
                        vmin=-1,
                        vmax=1
                    )
                else:
                    ax[track].imshow(
                        data[score_num, bar, :, :, track].transpose([1,0]),
                        origin='lower',
                        cmap='Greys',
                        vmin=-1,
                        vmax=1
                    )
In [10]:
gan = MuseGAN(
    input_dim=data_binary.shape[1:],
    critic_learning_rate = 0.001,
    generator_learning_rate = 0.001,
    optimizer='adam',     # [nitta's memo] spelling
    grad_weight=10,
    z_dim=32,
    batch_size=BATCH_SIZE,
    n_tracks=n_tracks,
    n_bars=n_bars,
    n_steps_per_bar = n_steps_per_bar,
    n_pitches=n_pitches
)
WARNING:tensorflow:From D:\sys\Anaconda3\envs\generative\lib\site-packages\tensorflow\python\ops\resource_variable_ops.py:1666: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.

=============================== End of code from gdl_ch07_04 ===============================

training 済みの重みをファイルから読み込む

In [11]:
# 第一引数のパスから以下の重みを読み込む
# weights-c.h5
# weights-g.h5
gan.load_weights(RUN_FOLDER, None)

サンプルとして楽譜を生成する

In [14]:
chords_noise = np.random.normal(0, 1, (1, gan.z_dim))
style_noise = np.random.normal(0, 1, (1, gan.z_dim))
melody_noise = np.random.normal(0, 1, (1, gan.n_tracks, gan.z_dim))
groove_noise = np.random.normal(0, 1, (1, gan.n_tracks, gan.z_dim))
In [15]:
gen_scores = gan.generator.predict([chords_noise, style_noise, melody_noise, groove_noise])
In [16]:
print(gen_scores.shape)  # 1曲, 2小節, 1小節内の音符, 音符ベクタ, 4声
(1, 2, 16, 84, 4)
In [18]:
# 生成した楽譜を一旦midi fileとして保存して、読み込みなおす。
midi_filename = 'analyze01'
gan.notes_to_midi(RUN_FOLDER, gen_scores, midi_filename)
In [36]:
import music21

music_gen = music21.converter.parse(os.path.join(RUN_FOLDER, f'samples/{midi_filename}.midi'))
music_gen.show()
In [37]:
music_gen.show('midi')
In [24]:
gan.draw_score(gen_scores, 0) # data, score_num

生成した楽譜と最も近い元の楽譜を探す

In [25]:
def find_closest(candidates, target):
    distance = 99999999
    idx = -1
    for i, c in enumerate(candidates):
        d = np.sqrt(np.sum(pow((c - target), 2)))
        if d < distance:
            distance = d
            idx = i
    return idx            
In [26]:
closest_idx = find_closest(data_binary, gen_scores[0])
closest_data = data_binary[closest_idx]   # [nitta's memo] changed from original source
print(closest_idx)
21
In [29]:
# [自習] 
print(data_binary.shape)
print(data_binary[closest_idx].shape)
print(data_binary[[closest_idx]].shape) # 次元が増える
(229, 2, 16, 84, 4)
(2, 16, 84, 4)
(1, 2, 16, 84, 4)
In [30]:
# 一旦 midi file に保存する
closest_filename = 'closest'
gan.notes_to_midi(RUN_FOLDER, [closest_data], closest_filename) # [closest_data] の形で次元を増やす方が自分には分かりやすい
In [38]:
music_closest = music21.converter.parse(os.path.join(RUN_FOLDER,f'samples/{closest_filename}.midi'))
music_closest.show()
In [39]:
music_closest.show('midi')

chords の初期ノイズを変更する

In [35]:
chords_noise2 = 5 * np.ones((1, gan.z_dim))
In [41]:
gen_scores2 = gan.generator.predict([chords_noise2, style_noise, melody_noise, groove_noise])
In [42]:
chords2_filename='changing_chords'
gan.notes_to_midi(RUN_FOLDER, gen_scores2, chords2_filename)
In [43]:
music_chords2 = music21.converter.parse(os.path.join(RUN_FOLDER,f'samples/{chords2_filename}.midi'))
music_chords2.show()
In [44]:
music_chords2.show('midi')

style の初期noise を変更する

In [48]:
style_noise2 = 5 * np.ones((1, gan.z_dim))
gen_scores3 = gan.generator.predict([chords_noise, style_noise2, melody_noise, groove_noise])
In [49]:
style2_filename = 'changing_style'
gan.notes_to_midi(RUN_FOLDER, gen_scores3, style2_filename)
In [50]:
music_style2 = music21.converter.parse(os.path.join(RUN_FOLDER, f'samples/{style2_filename}.midi'))
music_style2.show()
In [51]:
music_style2.show('midi')

melody の小節毎のノイズを変更する

In [52]:
melody_noise2 = np.copy(melody_noise)
melody_noise2[0,0,:] = 5 * np.ones(gan.z_dim)

gen_scores4 = gan.generator.predict([chords_noise, style_noise, melody_noise2, groove_noise])
In [53]:
melody2_filename = 'changing_melody'
gan.notes_to_midi(RUN_FOLDER, gen_scores4, melody2_filename)
In [54]:
music_melody2 = music21.converter.parse(os.path.join(RUN_FOLDER, f'samples/{melody2_filename}.midi'))
music_melody2.show()
In [55]:
music_melody2.show('midi')

groove の小節毎のノイズを変更する

In [57]:
groove_noise2 = np.copy(groove_noise)
groove_noise2[0,3,:] = 5 * np.ones(gan.z_dim)

gen_scores5 = gan.generator.predict([chords_noise, style_noise, melody_noise, groove_noise2])
In [59]:
groove2_filename = 'changing_groove'
gan.notes_to_midi(RUN_FOLDER, gen_scores5, groove2_filename)
In [60]:
music_groove2 = music21.converter.parse(os.path.join(RUN_FOLDER, f'samples/{groove2_filename}.midi'))
music_groove2.show()
In [61]:
music_groove2.show('midi')
In [ ]: