这是我的代码,是提取fbank特征的
代码如下:
# -*- coding: utf-8 -*-
#导入相关的库
from keras.models import Model
from keras.layers import Input, Activation, Conv1D, Lambda, Add, Multiply, BatchNormalization
from keras.optimizers import Adam, SGD
from keras import backend as K
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import random
import pickle
import glob
from tqdm import tqdm
import os
from python_speech_features import fbank
import scipy.io.wavfile as wav
import librosa
from IPython.display import Audio
#读取数据集文件
text_paths = glob.glob('E:\叶儿\课程课程\毕设\data2try/*.trn')
total = len(text_paths)
print(total)
with open(text_paths[0], 'r', encoding='utf8') as fr:
lines = fr.readlines()
print(lines)
#数据集文件trn内容读取保存到数组中
texts = []
paths = []
for path in text_paths:
with open(path, 'r', encoding='utf8') as fr:
lines = fr.readlines()
line = lines[0].strip('\n').replace(' ', '')
texts.append(line)
paths.append(path.rstrip('.trn'))
print(paths[0], texts[0])
#根据数据集标定的音素读入
def load_and_trim(path):
audio, sr = librosa.load(path)
energy = librosa.feature.rms(audio)
frames = np.nonzero(energy >= np.max(energy) / 5)
indices = librosa.core.frames_to_samples(frames)[1]
audio = audio[indices[0]:indices[-1]] if indices.size else audio[0:0]
return audio, sr
#可视化,显示语音文件的Fbank图
def visualize(index):
path = paths[index]
text = texts[index]
print('Audio Text:', text)
audio, sr = load_and_trim(path)
plt.figure(figsize=(12, 3))
plt.plot(np.arange(len(audio)), audio)
plt.title('Raw Audio Signal')
plt.xlabel('Time')
plt.ylabel('Audio Amplitude')
plt.show()
feature = fbank(audio, sr,nfft=512)
print('Shape of Fbank:', feature.shape)
就是最后一行代码有报错,报错如下:
File "", line 91, in
Audio(visualize(0))
File "", line 76, in visualize
print('Shape of Fbank:', feature.shape)
AttributeError: 'tuple' object has no attribute 'shape'