下面是对样本数据的预处理,包含九种故障类型,一种正常类型,将数据分为测试集、训练集和验证集,比例为7:2:1,样本量为1000,如果要进行不均衡样本分类,将正常类样本与故障样本比例设置为200:1,应该怎么写代码呢?
for data_type in range(10):
fs = 12000
t = 0.1
opt = "0-"
N = 1024
data = all_data[data_type]
for load_type in range(1):
load_data = data
start = 0
for i in range(700):
temp = load_data[start: start + N]
start += 100
temp = ([i for arr in temp for i in arr])
f, t, nd = signal.stft(temp, fs=fs, window='hann', nperseg=64, noverlap=None, nfft=None,
detrend=False, return_onesided=True, boundary='zeros', padded=True, axis=-1)
Gaussian = cv2.GaussianBlur(abs(nd), (3, 3), 1)
stfts_train_pics.append(Gaussian)
stfts_train_labels.append(data_type)
for i in range(200):
temp = load_data[start: start + N]
start += 100
temp = ([i for arr in temp for i in arr])
f, t, nd = signal.stft(temp, fs=fs, window='hann', nperseg=64, noverlap=None, nfft=None,
detrend=False, return_onesided=True, boundary='zeros', padded=True, axis=-1)
Gaussian = cv2.GaussianBlur(abs(nd), (3, 3), 1)
stfts_test_pics.append(Gaussian)
stfts_test_labels.append(data_type)
for i in range(100):
temp = load_data[start: start + N]
start += 100
temp = ([i for arr in temp for i in arr])
f, t, nd = signal.stft(temp, fs=fs, window='hann', nperseg=64, noverlap=None, nfft=None,
detrend=False, return_onesided=True, boundary='zeros', padded=True, axis=-1)
Gaussian = cv2.GaussianBlur(abs(nd), (3, 3), 1)
stfts_validation_pics.append(Gaussian)
stfts_validation_labels.append(data_type)