我在用DTW算法对齐两份文件的语音时,有一步任务是可视化包裹路径,但是效果图应该是这样的,我试着操作了几遍没有成功,不知道该怎么做?这样的图片要用什么方式获得?
我目前的代码如下:
import librosa
import numpy as np
from fastdtw import fastdtw
import matplotlib.pyplot as plt
#First, upload the file
example_path="./LJ001-0048.wav"
myself_path="./Re_MyVoice.wav"
exam_audio,exam_rate=librosa.load(example_path, sr=16000)
my_audio,my_rate=librosa.load(myself_path, sr=16000)
#calculate the spec
exam_spect=np.abs(librosa.stft(y=exam_audio,n_fft=exam_rate))
my_spect=np.abs(librosa.stft(y=my_audio,n_fft=my_rate))
#use DTW algorithm
distance, path=fastdtw(exam_spect.T, my_spect.T)
#Visualize the aligned power spectrogram of the reference speech and my own recording
plt.figure(figsize=(6, 6))
plt.subplot(2, 1, 1)
plt.title('Spectrogram of LJ001-0048.wav')
librosa.display.specshow(librosa.amplitude_to_db(exam_spect), y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.subplot(2, 1, 2)
plt.title('Spectrogram of Re_MyVoice.wav')
librosa.display.specshow(librosa.amplitude_to_db(my_spect), y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.tight_layout()
plt.show()
#Visualize the wraped path/map from the DTW algorithm. Here is how the wraped path may look like.
plt.figure(figsize=(6, 6))
b=[]
for i in range(len(path)):
b.append(np.array( path[i]))
c=np.array(b)
librosa.display.specshow(c, x_axis='time', y_axis='time')
plt.plot(c[:, 1],c[:, 0], color='r')
plt.xlim([0, c.shape[1]])
plt.ylim([0, c.shape[0]])
plt.title('DTW')
plt.colorbar(format='%+2.0f')
plt.tight_layout()
plt.show()