<i>Updated 10/Dec/2021</i> by Yoshihisa Nitta &nbsp; <img src="https://nw.tsuda.ac.jp/icons/nitta-email.gif" />

# LSTM_Music_Train をローカルのWindows上で動作するJupyterで実行する

https://nw.tsuda.ac.jp/lec/GoogleColab/pub/html/LSTM_Music_Train.html

jupyter notebook が使うpython仮想環境: <a href="https://nw.tsuda.ac.jp/lec/python/anaconda/win/generative">generative</a> <br />

In [1]:
import tensorflow as tf
print(tf.__version__)

2.2.0


In [2]:
import numpy as np

np.random.seed(2022)

In [3]:
# Windows で music21 を使うための余分なコード
import os
from music21 import environment

if os.name == 'nt':
    us = environment.UserSettings()
    us['musescoreDirectPNGPath']='C:/Program Files/MuseScore 3/bin/MuseScore3.exe'
    us['musicxmlPath']='C:/Program Files/MuseScore 3/bin/MuseScore3.exe'

# LSTMMusic クラスの定義

In [4]:
#from LSTMMusic import LSTMMusic

#rnn = LSTMMusic()

## Midi ファイルをデータセットとして用意する

http://www.jsbach.net/midi/midi_solo_cello.html

<pre>
bwv1107.zip
bwv1108.zip
bwv1109.zip
bwv1110.zip
bwv1111.zip
bwv1112.zip
</pre>

<font color="red"><b>[注意]</b> 上記の URL から bwv11??.zip をブラウザを使って手動でダウンロードして、以下のフォルダに解凍したものとする。</font>

In [5]:
VidTIMIT_fnames = [ 'bwv1007', 'bwv1008', 'bwv1009', 'bwv1010', 'bwv1011', 'bwv1012' ]
data_dir = 'D:\\data\\gdl_book14\\MIDI\\jsbach'

In [6]:
!dir /s {data_dir}

 ドライブ D のボリューム ラベルがありません。
 ボリューム シリアル番号は 606C-349E です

 D:\data\gdl_book14\MIDI\jsbach のディレクトリ

2021/12/11  10:53    <DIR>          .
2021/12/11  10:53    <DIR>          ..
2021/12/11  10:53    <DIR>          bwv1007
2021/12/11  10:53    <DIR>          bwv1008
2021/12/11  10:53    <DIR>          bwv1009
2021/12/11  10:53    <DIR>          bwv1010
2021/12/11  10:53    <DIR>          bwv1011
2021/12/11  10:53    <DIR>          bwv1012
               0 個のファイル                   0 バイト

 D:\data\gdl_book14\MIDI\jsbach\bwv1007 のディレクトリ

2021/12/11  10:53    <DIR>          .
2021/12/11  10:53    <DIR>          ..
1997/02/22  21:59             4,922 cs1-1pre.mid
1997/02/22  21:59             7,088 cs1-2all.mid
1997/02/22  22:00             6,159 cs1-3cou.mid
1997/02/22  22:25             3,209 cs1-4sar.mid
1997/02/22  23:13             5,704 cs1-5men.mid
1997/02/22  22:01             3,966 cs1-6gig.mid
               6 個のファイル              31,048 バイト

 D:\data\gdl_book14\MIDI\jsbach\bwv1008 のディレク

In [7]:
import music21

def extractMidi(midi_path):
    notes = []
    durations = []
    score = music21.converter.parse(midi_path).chordify()
    for element in score.flat:
        if isinstance(element, music21.note.Note): # 音符
            if element.isRest: # 休止符
                notes.append(str(element.name))   # 音程がないから name でよい
            else: # 音程を持つ音符
                notes.append(str(element.nameWithOctave))
            durations.append(element.duration.quaterLength) # 1/4単位
        
        if isinstance(element, music21.chord.Chord): # 和音であれば
            notes.append('.'.join(n.nameWithOctave for n in element.pitches)) # '.'でつなぐ
            durations.append(element.duration.quarterLength) # 1/4単位
            
    return notes, durations

In [8]:
import os
import glob

midi_paths = glob.glob(os.path.join(data_dir, '*/*.mid'))

print(midi_paths[:3])
print(len(midi_paths))

['D:\\data\\gdl_book14\\MIDI\\jsbach\\bwv1007\\cs1-1pre.mid', 'D:\\data\\gdl_book14\\MIDI\\jsbach\\bwv1007\\cs1-2all.mid', 'D:\\data\\gdl_book14\\MIDI\\jsbach\\bwv1007\\cs1-3cou.mid']
36


In [9]:
import music21

# [notes1, ..., notesN], [durations1, ..., durationsN]
def makeMusicData(midi_paths):
    notes_list, durations_list = [], []
    for path in midi_paths:
        notes, durations = extractMidi(path)  # notes, durations
        notes_list.append(notes)
        durations_list.append(durations)
    
    return notes_list, durations_list

In [10]:
notes_list, durations_list = makeMusicData(midi_paths)

In [11]:
print([len(x) for x in notes_list])

[654, 916, 812, 260, 642, 420, 634, 688, 736, 334, 522, 732, 974, 850, 984, 338, 872, 956, 812, 1050, 914, 326, 1319, 966, 1304, 686, 422, 216, 1197, 436, 1334, 778, 1278, 332, 801, 1166]


In [12]:
import itertools

notes_set = sorted(set(itertools.chain.from_iterable(notes_list)))       # flatten 2D -> 1D, Unique, Sort
durations_set = sorted(set(itertools.chain.from_iterable(durations_list)))

In [13]:
# Lookup Table
def createLookups(names):
    element_to_index = dict((element, idx) for idx, element in enumerate(names))
    index_to_element = dict((idx, element) for idx, element in enumerate(names))
    return element_to_index, index_to_element

In [14]:
note_to_index, index_to_note = createLookups(notes_set)
duration_to_index, index_to_duration = createLookups(durations_set)

c_notes = len(note_to_index)
c_durations = len(duration_to_index)

print(c_notes)
print(c_durations)

460
18


In [15]:
def convertIndex(data, element_to_index):
    return [ [ element_to_index[element] for element in x] for x in data]

In [16]:
notes_index_list = convertIndex(notes_list, note_to_index)
durations_index_list = convertIndex(durations_list, duration_to_index)

In [17]:
print(notes_index_list[0][:5])

[400, 206, 100, 29, 100]


In [18]:
class ScoreDataset(tf.keras.utils.Sequence):
    def __init__(self, notes_list, durations_list, c_notes, c_durations, seq_len=32):
        self.notes_list = notes_list  # data is 2D: [[int, ...], ...]
        self.durations_list = durations_list
        self.c_notes = c_notes            # number of class of notes
        self.c_durations = c_durations    # number of class of durations
        self.seq_len = seq_len

        self.n_music = len(notes_list)
        self.index = 0

        self._build()


    def _build(self):
        a = [ len(x)-self.seq_len for x in notes_list ]  # [int, int, ...]
        for i in range(1, len(a)):   # cumulative frequency of data
            a[i] = a[i-1] + a[i]
        self.cumulative_freq = a
        #print(f'cumulative_freq: {self.cumulative_freq}')

            
    def searchTbl(self, index):
        index = index % self.__len__()
        low = 0
        high = self.n_music - 1
        for i in range(self.n_music):
            mid = (low + high) // 2
            #print(f'{i}/{self.n_music}: {high} {low} {mid} {index}')
            if self.cumulative_freq[mid] > index:
                if mid == 0 or self.cumulative_freq[mid-1] <= index:
                    return mid
                high = mid - 1
            else:
                low = mid + 1

                
    def __len__(self):
        return self.cumulative_freq[-1]

    
    def __getitem__(self, index):
        if isinstance(index, slice):
            start, stop, step = index.indices(self.__len__())
            if start == None: start = 0
            if stop == None: stop = self.__len__()
            if step == None:
                if start < stop:
                    step = 1
                elif start > stop:
                    step = -1
                else:
                    step = 0                    
            return self.__getitemList__(range(start, stop, step))
        
        elif isinstance(index, int):
            return self.__getitemInt__(index)
        
        else:
            return self.__getitemList__(index)
        
        
    def __getitemList__(self, indices):
        x_notes, x_durations, y_notes, y_durations = [], [], [], []
        for i in indices:
            [x_note, x_duration], [y_note, y_duration] = self.__getitemInt__(i)
            x_notes.append(x_note)
            x_durations.append(x_duration)
            y_notes.append(y_note)
            y_durations.append(y_duration)

        return (x_notes, x_durations), (y_notes, y_durations)
        

        
    def __getitemInt__(self, index):
        index = index % self.__len__()
        #print(f'index = {index} {self.__len__()}')
        tbl_idx = self.searchTbl(index)
        #print(f'tbl_idx = {tbl_idx}')
        tgt = index
        if (tbl_idx > 0):
            tgt -= self.cumulative_freq[tbl_idx - 1]
        #print(f'tgt = {tgt}')
        
        x_note = self.notes_list[tbl_idx][tgt: (tgt + self.seq_len)]
        y_note = self.notes_list[tbl_idx][tgt + self.seq_len]
        x_duration = self.durations_list[tbl_idx][tgt: (tgt + self.seq_len)]
        y_duration = self.durations_list[tbl_idx][tgt + self.seq_len]
        
        #ohv_y_note = tf.keras.utils.to_categorical(y_note, self.c_notes)
        #ohv_y_duration = tf.keras.utils.to_categorical(y_duration, self.c_durations)
        
        return (x_note, x_duration), (y_note, y_duration)

    
    def __next__(self):
        self.index += 1
        return self.__getitem__(self.index-1)


In [19]:
data_flow = ScoreDataset(notes_index_list, durations_index_list, len(note_to_index), len(duration_to_index), 32)

### train用のデータを全部書き出す

trainの最中に訓練データを動的に生成する方がメモリ効率が良くなるが、この例では出力が2個あり、それぞれが長さの異なるone-hot vector で表されているため nd.array() でデータ型 'float32' を指定するとエラーを起こす。
そのため、fit()関数でgeneratorを渡せなくなっている。

Tensor を作り出すためには nd.array(,dtype='float32')である必要があるが、サイズが等しくないと('object'型になってしまうため) nd.arrayでタイプを 'float32', 'int32' などが指定できない。

In [20]:
[all_x_notes, all_x_durations], [all_y_notes, all_y_durations] = data_flow[range(len(data_flow))]
print(len(all_x_notes))

26509


In [21]:
import numpy as np

val_split = 0.05

N = len(all_x_notes)
V_N = int(N * val_split)

arr= np.arange(len(all_x_notes))
np.random.shuffle(arr)

t_indices = sorted(arr[:-V_N])
v_indices = sorted(arr[-V_N:])

all_x_notes = np.array(all_x_notes)
all_x_durations = np.array(all_x_durations)
all_y_notes = np.array(all_y_notes)
all_y_durations = np.array(all_y_durations)

t_x_notes = all_x_notes[t_indices]
t_x_durations = all_x_durations[t_indices]
t_y_notes = all_y_notes[t_indices]
t_y_durations = all_y_durations[t_indices]

v_x_notes = all_x_notes[v_indices]
v_x_durations = all_x_durations[v_indices]
v_y_notes = all_y_notes[v_indices]
v_y_durations = all_y_durations[v_indices] 

print(len(t_x_notes))
print(len(v_x_notes))

25184
1325


In [22]:
all_y_notes_ohv = tf.keras.utils.to_categorical(all_y_notes, c_notes)
all_y_durations_ohv = tf.keras.utils.to_categorical(all_y_durations, c_durations)

# ニューラルネットワーク・モデルを定義する

In [23]:
embed_size = 100
rnn_units = 256
use_attention = True

In [24]:
def create_network(n_notes, n_durations, embed_size=100, rnn_units=256, use_attention=False):
    notes_in = tf.keras.layers.Input(shape=(None,))
    durations_in = tf.keras.layers.Input(shape=(None,))
    
    x1 = tf.keras.layers.Embedding(n_notes, embed_size)(notes_in)
    x2 = tf.keras.layers.Embedding(n_durations, embed_size)(durations_in)
    
    x = tf.keras.layers.Concatenate()([x1, x2])
    
    x = tf.keras.layers.LSTM(rnn_units, return_sequences=True)(x)
    # x = tf.keras.layers.Dropout(0.2)(x)
    
    if use_attention:
        x = tf.keras.layers.LSTM(rnn_units, return_sequences=True)(x)
        # x = tf.keras.layers.Dropout(0.2)(x)
        
        e = tf.keras.layers.Dense(1, activation='tanh')(x)
        e = tf.keras.layers.Reshape([-1])(e)   # batch_size * N 
        alpha = tf.keras.layers.Activation('softmax')(e)
        
        alpha_repeated = tf.keras.layers.Permute([2,1])(tf.keras.layers.RepeatVector(rnn_units)(alpha))
        
        c = tf.keras.layers.Multiply()([x, alpha_repeated])
        c = tf.keras.layers.Lambda(lambda xin: tf.keras.backend.sum(xin, axis=1), output_shape=(rnn_units,))(c)
        
    else:
        
        c = tf.keras.layers.LSTM(rnn_units)(x)
        #c = tf.keras.layers.Dropout(0.2)(c)
    
    notes_out = tf.keras.layers.Dense(n_notes, activation='softmax', name='pitch')(c)
    durations_out = tf.keras.layers.Dense(n_durations, activation='softmax', name='duration')(c)
    
    model = tf.keras.models.Model([notes_in, durations_in], [notes_out, durations_out])
    
    if use_attention:
        att_model = tf.keras.models.Model([notes_in, durations_in], alpha)
    else:
        att_model = None
        
    opti = tf.keras.optimizers.RMSprop(lr= 0.001)
    
    model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'], optimizer=opti)
    
    return model, att_model

In [25]:
model, att_model = create_network(c_notes, c_durations, embed_size, rnn_units, use_attention)

In [26]:
import tensorflow as tf
import numpy as np


# 訓練する

In [27]:
history = model.fit(
    [np.array(all_x_notes), np.array(all_x_durations)],
    [np.array(all_y_notes_ohv), np.array(all_y_durations_ohv)],
    epochs=1,
    batch_size=32,
    shuffle=True,
    validation_split=0.05
)



In [28]:
print(history.history.keys())

dict_keys(['loss', 'pitch_loss', 'duration_loss', 'val_loss', 'val_pitch_loss', 'val_duration_loss'])


In [29]:
def loss_fn(y_true, y_pred):
    cce = tf.keras.losses.CategoricalCrossentropy()
    loss = cce(y_true, y_pred)
    return loss.numpy()

def train_step(model, x_notes, x_durations, y_notes, y_durations, optimizer):
    #tf.executing_eagerly(True)
    cce1 = tf.keras.losses.CategoricalCrossentropy()
    cce2 = tf.keras.losses.CategoricalCrossentropy()
    with tf.GradientTape() as tape:
        p_notes_ohv, p_durations_ohv = model([x_notes, x_durations])
        note_loss = cce1(y_notes, p_notes_ohv)
        duration_loss = cce2(y_durations, p_durations_ohv)
        loss = tf.add(note_loss, duration_loss)
    variables = model.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
    return loss, note_loss, duration_loss
        
        
def train(flow, epochs=1, batch_size=32, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), shuffle=False):
    steps = len(flow) // batch_size
    for epoch in range(epochs):
        indices = tf.range(len(flow), dtype=tf.int32)
        if shuffle:
            indices = tf.random.shuffle(indices)
        for step in range(steps):
            start = batch_size * step
            end = start + batch_size
            idxs = indices[start:end]
            (x_notes, x_durations), (y_notes, y_durations) = flow[idxs]
            n_ = np.array(x_notes, dtype='float32')
            d_ = np.array(x_durations, dtype='float32')
            y_notes_ohv = tf.keras.utils.to_categorical(y_notes, c_notes)
            y_durations_ohv = tf.keras.utils.to_categorical(y_durations, c_durations)
                         
            loss, n_loss, d_loss = train_step(model, n_, d_, y_notes_ohv, y_durations_ohv, optimizer)


In [30]:
his = train(data_flow, epochs=1, batch_size=32, shuffle=False)

In [31]:
(x_n, x_d), (y_n, y_d) = data_flow[0,1,2]

In [32]:
y_n_ohv = tf.keras.utils.to_categorical(y_n, c_notes)
print(y_n_ohv)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [33]:
y_d_ohv = tf.keras.utils.to_categorical(y_d, c_durations)
print(y_d_ohv)

[[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [34]:
n_ = np.array(x_n, dtype='float32')
d_ = np.array(x_d, dtype='float32')
yn_ = np.array(y_n, dtype='int32')
yd_ = np.array(y_d, dtype='int32')
print(n_.shape)

(3, 32)


In [35]:
outputs = model([n_, d_])

In [36]:
print(outputs)

[<tf.Tensor: shape=(3, 460), dtype=float32, numpy=
array([[3.0682784e-02, 5.3443538e-05, 2.0402975e-04, ..., 2.0894455e-02,
        4.1645773e-05, 8.3751569e-05],
       [3.0460177e-02, 5.8205740e-05, 2.2089636e-04, ..., 2.1843564e-02,
        4.8628892e-05, 9.7930686e-05],
       [3.0708479e-02, 5.4561318e-05, 2.1162932e-04, ..., 2.1224793e-02,
        4.5615932e-05, 9.1532966e-05]], dtype=float32)>, <tf.Tensor: shape=(3, 18), dtype=float32, numpy=
array([[1.36665197e-03, 1.33941928e-03, 8.84538829e-01, 6.04659843e-04,
        6.32519805e-05, 8.81990120e-02, 2.66816554e-04, 8.74954555e-03,
        8.31269100e-03, 6.66071661e-04, 4.78503935e-05, 2.70399614e-03,
        9.85994411e-05, 1.84305455e-03, 7.04583872e-05, 1.00363359e-04,
        8.03445000e-04, 2.25342999e-04],
       [1.43944845e-03, 1.42998830e-03, 8.86375010e-01, 6.41274557e-04,
        7.52371343e-05, 8.57009217e-02, 2.89972784e-04, 8.88304319e-03,
        8.30828026e-03, 7.19360542e-04, 5.28744349e-05, 2.74102646e-03,
 

In [37]:
losses = train_step(model, n_, d_, y_n_ohv, y_d_ohv, tf.keras.optimizers.Adam(learning_rate=0.001))