In [9]:
# [自分へのメモ] Windows で music21 を使うのに必要なコード
import os
from music21 import environment

if os.name == 'nt':
    us = environment.UserSettings()
    us['musescoreDirectPNGPath']='C:/Program Files/MuseScore 3/bin/MuseScore3.exe'
    us['musicxmlPath']='C:/Program Files/MuseScore 3/bin/MuseScore3.exe'

7.2.3 アテンション機構つき RNN の解析

本が配布している 07_03_lstm_compose_predict.ipynb に相当するコード

次の手順で、音楽の一節を生成する。

  1. モデルへの入力として現在の音程(pitch)と持続時間(duration)のシーケンスを与える。モデルは次の音符の音程と持続時間の確率分布を出力する。
  2. 最初に音程のシーケンスとして与えるのは、全ての要素が START トークンのシーケンスである。
  3. 最初に持続時間のシーケンスとして与えるのは、全ての要素が0トークンのシーケンスである。
  4. 出力される2つの確率分布からサンプリングする。変分を与えるために temperature パラメータを使う。
  5. 選択された音程と持続時間をシーケンスに追加する。
  6. シーケンスの長さは、前を刈り込んで、モデルを訓練したときの長さに合わせる。
  7. 新しいシーケンスをモデルに与える処理を、必要な回数だけ繰り返す。

ここでの解析は、主として音程の予測に焦点を当てている。

In [10]:
music_name = 'cello'
In [11]:
# path to the run results

section = 'compose'
run_folder = f'run/{section}/{music_name}'
store_folder = os.path.join(run_folder, 'store')
In [12]:
# [自習] run_folder の下の構成を調べる
import glob
g = glob.glob(os.path.join(run_folder,'*'))
print(g)

s = glob.glob(os.path.join(store_folder,'*'))
print(s)
['run/compose/cello\\store', 'run/compose/cello\\output', 'run/compose/cello\\weights', 'run/compose/cello\\viz']
['run/compose/cello\\store\\notes', 'run/compose/cello\\store\\durations', 'run/compose/cello\\store\\distincts', 'run/compose/cello\\store\\lookups']

Load Lookup Table

In [13]:
# load lookup tables
import pickle

with open(os.path.join(store_folder, 'distincts'), 'rb') as f:
    distincts = pickle.load(f)
    note_names, n_notes, duration_names, n_durations = distincts
    
with open(os.path.join(store_folder, 'lookups'), 'rb') as f:
    lookups = pickle.load(f)
    note_to_int, int_to_note, duration_to_int, int_to_duration = lookups

Build and Load the Model

In [14]:
# [自習] keras の layer について
import tensorflow as tf

# tf.keras.layers.RepeatVector (n)
# n回入力を繰り返す。
# input shape (num_samples, features)
# output shape (num_samples, n, features)

tmp = tf.keras.models.Sequential()
tmp.add(tf.keras.layers.Dense(32, input_dim=32))  # input_shape == (None, 32)
tmp.add(tf.keras.layers.RepeatVector(3))    # output_shape = None, 3, 32)

tmp.summary()
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 32)                1056      
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 3, 32)             0         
=================================================================
Total params: 1,056
Trainable params: 1,056
Non-trainable params: 0
_________________________________________________________________
In [15]:
# [自習] keras の layer について
import tensorflow as tf

# tf.keras.layers.Permute(dims)
# 与えられたパターンにより入力の次元を入れ替える。RNNsやconvnetsの連結(concatenate)に対して役立つ。

tmp = tf.keras.models.Sequential()
tmp.add(tf.keras.layers.Permute((2,1), input_shape=(10, 64))) # batch_size x 10 x 60 の次元1と2を入れ替える。次元は1から始まり、batch_sizeは指定できない。

tmp.summary()
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
permute_1 (Permute)          (None, 64, 10)            0         
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________
In [16]:
# [自習] keras の layer について
import tensorflow as tf

# tf.keras.layers.Lambda(function, output_shape=None, mask=None, arguments=None)
# Layer オブジェクトのように、任意の式をwrapする。

tmp = tf.keras.models.Sequential()
tmp.add(tf.keras.layers.Lambda(lambda x: x**2))

y_tmp = tmp.predict([[1, 2, 3], [4, 5, 6]])
print(y_tmp)
[[ 1  4  9]
 [16 25 36]]
In [17]:
# copied from gdl_ch07_02.ipynb
def create_network(n_notes, n_durations, embed_size=100, rnn_units=256, use_attention=False): # models/RNNAttention.py
    notes_in = tf.keras.layers.Input(shape=(None,))
    durations_in = tf.keras.layers.Input(shape=(None,))
    
    x1 = tf.keras.layers.Embedding(n_notes, embed_size)(notes_in)
    x2 = tf.keras.layers.Embedding(n_durations, embed_size)(durations_in)
    
    x = tf.keras.layers.Concatenate()([x1, x2])
    
    x = tf.keras.layers.LSTM(rnn_units, return_sequences=True)(x)
    # x = tf.keras.layers.Dropout(0.2)(x)
    
    if use_attention:
        x = tf.keras.layers.LSTM(rnn_units, return_sequences=True)(x)
        # x = tf.keras.layers.Dropout(0.2)(x)
        
        e = tf.keras.layers.Dense(1, activation='tanh')(x)
        e = tf.keras.layers.Reshape([-1])(e)   # batch_size * N 
        alpha = tf.keras.layers.Activation('softmax')(e)
        
        alpha_repeated = tf.keras.layers.Permute([2,1])(tf.keras.layers.RepeatVector(rnn_units)(alpha))
        
        c = tf.keras.layers.Multiply()([x, alpha_repeated])
        c = tf.keras.layers.Lambda(lambda xin: tf.keras.backend.sum(xin, axis=1), output_shape=(rnn_units,))(c)
        
    else:
        
        c = tf.keras.layers.LSTM(rnn_units)(x)
        #c = tf.keras.layers.Dropout(0.2)(c)
    
    notes_out = tf.keras.layers.Dense(n_notes, activation='softmax', name='pitch')(c)
    durations_out = tf.keras.layers.Dense(n_durations, activation='softmax', name='duration')(c)
    
    model = tf.keras.models.Model([notes_in, durations_in], [notes_out, durations_out])
    
    if use_attention:
        att_model = tf.keras.models.Model([notes_in, durations_in], alpha)
    else:
        att_model = None
        
    opti = tf.keras.optimizers.RMSprop(lr= 0.001)
    
    model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'], optimizer=opti)
    
    return model, att_model
In [18]:
embed_size = 100
rnn_units = 256
use_attention = True
In [19]:
# build the model
import tensorflow as tf

model, att_model = create_network(n_notes, n_durations, embed_size, rnn_units, use_attention)
In [38]:
# load weights to each node

weights_folder = os.path.join(run_folder, 'weights')
weights_file = 'weights_gdl_ch07_02.h5'   # 'weights.h5'  # 手動で保存した学習済み重みファイルをロードしてみる。

weight_source = os.path.join(weights_folder, weights_file)
In [39]:
model.load_weights(weight_source)
#model.load_weights(os.path.join("D:/tmp", 'weights_myself.h5')) # Google Drive 以外からロードしてみる

model.summary()
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_1 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, None, 100)    46100       input_1[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 100)    1900        input_2[0][0]                    
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, None, 200)    0           embedding[0][0]                  
                                                                 embedding_1[0][0]                
__________________________________________________________________________________________________
lstm (LSTM)                     (None, None, 256)    467968      concatenate[0][0]                
__________________________________________________________________________________________________
lstm_1 (LSTM)                   (None, None, 256)    525312      lstm[0][0]                       
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, None, 1)      257         lstm_1[0][0]                     
__________________________________________________________________________________________________
reshape (Reshape)               (None, None)         0           dense_2[0][0]                    
__________________________________________________________________________________________________
activation (Activation)         (None, None)         0           reshape[0][0]                    
__________________________________________________________________________________________________
repeat_vector_2 (RepeatVector)  (None, 256, None)    0           activation[0][0]                 
__________________________________________________________________________________________________
permute_2 (Permute)             (None, None, 256)    0           repeat_vector_2[0][0]            
__________________________________________________________________________________________________
multiply (Multiply)             (None, None, 256)    0           lstm_1[0][0]                     
                                                                 permute_2[0][0]                  
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 256)          0           multiply[0][0]                   
__________________________________________________________________________________________________
pitch (Dense)                   (None, 461)          118477      lambda_2[0][0]                   
__________________________________________________________________________________________________
duration (Dense)                (None, 19)           4883        lambda_2[0][0]                   
==================================================================================================
Total params: 1,164,897
Trainable params: 1,164,897
Non-trainable params: 0
__________________________________________________________________________________________________

build your own phrase

In [40]:
notes_temp = 0.5
duration_temp = 0.5
max_extra_notes = 50
max_seq_len = 32
seq_len = 32
In [41]:
notes = [ 'START' ]
durations = [ 0 ]

if seq_len is not None:
    notes = [ 'START' ] * (seq_len - len(notes)) + notes
    durations = [ 0 ] * (seq_len - len(durations)) + durations
    
sequence_length = len(notes)
In [42]:
# [自習]
print(notes)
print(durations)
['START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START', 'START']
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

Generate notes from the neural network based on a sequence of notes

In [43]:
prediction_output = []
notes_input_sequence = []
durations_input_sequence = []

overall_preds = []

for n, d in zip(notes, durations):
    note_int = note_to_int[n]
    duration_int = duration_to_int[d]
    
    notes_input_sequence.append(note_int)
    durations_input_sequence.append(duration_int)
    
    prediction_output.append([n, d])
    
    if n != 'START':
        midi_note = note.Note(n)
        
        new_note = np.zeros(128)
        new_note[midi_note.pitch.midi] = 1
        overall_preds.append(new_note)
In [44]:
print(prediction_output)
[['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0]]
In [45]:
print(note_to_int['START'])
print(notes_input_sequence)
460
[460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460, 460]
In [46]:
print(duration_to_int[0])
print(durations_input_sequence)
0
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
In [47]:
import numpy as np

att_matrix = np.zeros(shape = (max_extra_notes + sequence_length, max_extra_notes))
In [48]:
def sample_with_temp(preds, temperature):    # models/RNNAttention.py
    if temperature == 0:
        return np.argmax(preds)
    else:
        preds = np.log(preds) / temperature
        exp_preds = np.exp(preds)
        preds = exp_preds / np.sum(exp_preds)
        return np.random.choice(len(preds), p=preds)
In [49]:
for note_index in range(max_extra_notes):
    prediction_input = [
        np.array([notes_input_sequence]),
        np.array([durations_input_sequence])
    ]
    notes_prediction, durations_prediction = model.predict(prediction_input, verbose=0)
    if use_attention:
        att_prediction = att_model.predict(prediction_input, verbose=0)[0]
        att_matrix[(note_index-len(att_prediction)+sequence_length):(note_index+sequence_length), note_index] = att_prediction
        
    new_note = np.zeros(128)
    
    for idx, n_i in enumerate(notes_prediction[0]):
        try:
            note_name = int_to_note[idx]
            midi_note = note.Note(note_name)
            new_note[midi_note.pitch.midi] = n_i
        except:
            pass
        
    overall_preds.append(new_note)
    
    i1 = sample_with_temp(notes_prediction[0], notes_temp)
    i2 = sample_with_temp(durations_prediction[0], duration_temp)
    
    note_result = int_to_note[i1]
    duration_result = int_to_duration[i2]
    
    prediction_output.append([note_result, duration_result])
    
    notes_input_sequence.append(i1)
    durations_input_sequence.append(i2)
    
    if len(notes_input_sequence) > max_seq_len:
        notes_input_sequence = notes_input_sequence[1:]
        durations_input_sequence = durations_input_sequence[1:]
        
    if note_result == 'START':
        break
In [50]:
overall_preds = np.transpose(np.array(overall_preds))
print(f'generated sequence of {len(prediction_output)} notes')
generated sequence of 82 notes
In [51]:
%matplotlib inline
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1, figsize=(8,8))
ax.set_yticks([int(j) for j in range(35, 70)])

plt.imshow(overall_preds[35:70, :], origin='lower', cmap='coolwarm', vmin=-0.5, vmax = 0.5, extent=[0, max_extra_notes, 35, 70])

#plt.imshow(overall_preds[35:70, :], origin='low', cmap='coolwarm', extent=[0, max_extra_notes, 35, 70])

plt.show()
In [52]:
print(prediction_output)
[['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['START', 0], ['G3', 0.5], ['E-3', 0.5], ['B2', 0.5], ['C3', 0.5], ['G#2', 1.0], ['C4', 0.5], ['G#3', 0.5], ['E3', 0.5], ['F3', 0.5], ['B2', 1.0], ['D4', 1.0], ['G#3', 0.5], ['E3', 0.5], ['F3', 0.5], ['G2', 0.5], ['G3', 0.5], ['F3', 0.5], ['G3', 0.5], ['B2', 0.5], ['B2', 0.5], ['C2', 1.0], ['C3', 1.0], ['F3', 0.5], ['E3', 0.5], ['G3', 0.5], ['E3', 0.5], ['G3', 0.5], ['C3', 0.5], ['C4', 0.5], ['B-3', 0.5], ['G#3', 0.5], ['G#3', 0.5], ['E3', 0.5], ['G#3', 0.5], ['G3', 0.5], ['D3', 0.5], ['E-3', 0.5], ['D3', 0.5], ['C3', 0.5], ['C3', 0.5], ['G3', 0.5], ['G2', 0.5], ['E-3', 0.5], ['F3', 0.5], ['G3', 0.5], ['B-3', 0.5], ['C4', 0.5], ['C4', 0.5], ['D4', 0.5], ['D4', 0.5]]
In [53]:
print(overall_preds[35:70,:])
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
In [ ]:
 

中止

gdl_ch07_02 での重みのセーブか、 gdl_ch07_03 での重みのロードがうまくいっていない。

gdl_ch07_02 で続きを行う。

gdl_ch07_02 で保存したモデルを使ってみる

run/compose/cell/weights/weights_gdl_ch07_02.h5

In [ ]:
# load weights to each node

weights_folder = os.path.join(run_folder, 'weights')
weights_file = 'weights_gdl_ch07_02.h5'   # 'weights.h5'  # 手動で保存した学習済み重みファイルをロードしてみる。

weight_source = os.path.join(weights_folder, weights_file)

model.load_weights(weight_source) # gdl_ch07_02 の最後