我是想用LSTM做个时序数据的多分类问题,最终目标类别有三种
先放一下数据结构和报错代码
X_train, X_test, Y_train, Y_test (3449, 64, 9) (1699, 64, 9) (3449, 1) (1699, 1)
y_train, y_test (3449, 3) (1699, 3) #经过one_hot转变了一下
#报错内容
#WARNING:tensorflow:Model was constructed with shape (None, 64) for input KerasTensor(type_spec=TensorSpec(shape=(None, 64), dtype=tf.float32, name='embedding_17_input'), name='embedding_17_input', description="created by layer 'embedding_17_input'"), but it was called on an input with incompatible shape (None, 64, 9).
X_train的数据样式如下:
print(X_train)
[[[-1.20000000e-02 8.90000000e-02 -6.50000000e-02 ... -5.65103125e-01
1.01594729e+00 -1.30111378e+00]
[-3.50000000e-02 9.40000000e-02 -7.00000000e-02 ... -6.28892400e-01
8.63049905e-01 -1.29727921e+00]
[-2.60000000e-02 8.60000000e-02 -5.90000000e-02 ... -6.53028518e-01
8.31202442e-01 -1.29352155e+00]
...
[-1.00000000e-02 -1.70000000e-02 -1.10000000e-02 ... -6.14332537e-01
-9.90024734e-01 -1.32757610e+00]
[-4.00000000e-03 -1.40000000e-02 -4.00000000e-03 ... -6.13891972e-01
-9.88219437e-01 -1.33221045e+00]
[-1.30000000e-02 -1.30000000e-02 0.00000000e+00 ... -6.11675136e-01
-9.60667708e-01 -1.38080713e+00]]
...
[[-4.40000000e-02 3.00000000e-03 0.00000000e+00 ... 1.03778140e+00
3.10468117e-01 8.45172798e-01]
[-1.80000000e-02 8.00000000e-03 1.10000000e-02 ... 1.15684726e+00
3.61290484e-01 8.35110613e-01]
[-2.70000000e-02 -3.00000000e-03 -4.00000000e-03 ... 1.19378916e+00
3.73164160e-01 8.17015112e-01]
...
[-2.70000000e-02 -3.50000000e-02 4.00000000e-03 ... 7.82883921e-01
1.37620405e+00 6.17715226e-01]
[-1.80000000e-02 -3.30000000e-02 -2.00000000e-03 ... 7.76200044e-01
1.34245390e+00 6.07810177e-01]
[-1.30000000e-02 -3.30000000e-02 -9.00000000e-03 ... 7.70533406e-01
1.29560074e+00 6.04281932e-01]]]
我的代码如下所示,刚开始学习,写的不好请见谅:
import tensorflow as tf
import os
from tensorflow import keras
import numpy as np
import _pickle as cPickle
from sklearn.model_selection import train_test_split
#超参数设置
n_steps = 64 #时间窗口内的数据行数
n_hidden = 100 #隐藏层的神经元数,隐藏层输出h的维数
n_classes = 3 #最终分类类别数量
learning_rate = 0.0025 #学习速度
n_inputs = 9 #每个step内的数据维度,当前实验内为9个,即paper中选取的9个数据维度
training_epochs = 2000 #整个数据集历遍次数
batch_size = 1500 #单次迭代训练样本数
#定义LSTM模型
'''
1. embedding()函数为结构的第一层,规定了输入数据后转化的shape,输出结果为(None, 64, 100),在原paper中时间窗口的size为64,初始的隐藏层h的维数为100
2. SpatialDropout1D与Dropout的形式不太一样,后者为随机关闭20%的神经元,前者为关闭总量为20%的区域
3. 原paper为两层LSTM结构,所以写了两个LSTM的cell,需要在第一个cell中声明,recurrent_activation = 'relu'即为在cell内的循环激活函数为'relu',
函数的目的是,将输入小于0的值幅值为0,输入大于0的值不变,以及return_sequences = True,目的是将第一个LSTM Cell的输出作为第二个Cell的输入,且
shape与第一层(embedding层)输出的保持一致,而第二个Cell的return_sequences = False,是因为这一层的输出是要接到全连接层根据激活函数给出output
4. use_bias = 'True'表示LSTM的计算方式为WX+B,默认也是True,可以省略
5. unit_forget_bias = 'True'在初始情况下C的状态为全保留,True对应Tensorflow1中的forget_bias = 1的情况,即全都不遗忘
6. dropout和recurrent_dropout与上述的drop类似,均是防止过拟合情况出现
7. Dense()因为前面已经定义过输入的shape,所以这里不需要定义了,驾驶行为分为3类(n_class),因为是多分类问题,所以激活函数选择softmax
8. compile()在原paper中已经说到使用Adam进行优化,而激活函数选择的softmax,多分类问题选择的损失函数为分类-交叉熵函数,categorical_crossentropy
对于metrics参数来讲,独热码就是one_hot,y和y_都是数值 ‘accuracy’,y_和y都是独热码(概率分布y)使用 ‘categorical_accuracy’,
y_是数值,y是独热码(概率分布)则使用 ‘sparse_categorical_accuracy’
'''
def lstm_model():
model = tf.keras.Sequential([
keras.layers.Embedding(input_dim = n_inputs, output_dim = n_hidden, input_length = n_steps),
keras.layers.SpatialDropout1D(0.2),
keras.layers.LSTM(n_hidden, use_bias = 'True', unit_forget_bias = 'True', dropout = 0.2, recurrent_dropout = 0.2, recurrent_activation = 'relu',return_sequences = True),
keras.layers.LSTM(n_hidden, return_sequences = False),
keras.layers.Dense(n_classes, activation='softmax')
])
model.compile( #使用model.compile()方法来配置训练方法
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate), #使用Adam优化器,学习率为0.0025
loss = 'categorical_crossentropy', #配置损失函数
metrics = ['‘sparse_categorical_accuracy’']
)
return model
model = lstm_model()
model.summary()
def one_hot(y_):
y_ = y_.reshape(len(y_))
n_values = np.max(y_) + 1
return np.eye(n_values)[np.array(y_, dtype=np.int32)] # Returns FLOATS,np.eye()转化为one_hot数组
#定义数据导入
def load_motorway_dataset(data_path='data'):
# Function to load the motorway dataset only
#open()中的'rb'代表着以二进制格式打开一个文件用于只读。文件指针将会放在文件的开头。这是默认模式。一般用于非文本文件如图片等
with open(os.path.join(data_path, 'motorway_dataset_window_64_proc_veh_DtA.pkl'), 'rb') as f:
save = cPickle.load(f, encoding='bytes')
dt = {}
#从print(data)看键的名称分别是b'labels', b'dataset',而引用时用的dataset,labels,名称不一样,所以没有找到对应的键。需要对data的键decode一下
for k,v in save.items():
dt.update({k.decode():v})
motorway_dataset = dt['dataset']
motorway_labels = dt['labels']
del save
del dt
print('Motorway set', motorway_dataset.shape, motorway_labels.shape)
X_train, X_test, y_train, y_test = train_test_split(motorway_dataset, motorway_labels, test_size=0.33, random_state=42)
return X_train, X_test, y_train, y_test
def load_secondary_dataset(data_path='data'):
# Function to load the secondary dataset only
with open(os.path.join(data_path,'secondary_dataset_window_64_proc_veh_DtA.pkl'), 'rb') as f:
save = cPickle.load(f, encoding='bytes')
dt = {}
for k,v in save.items():
dt.update({k.decode():v})
secondary_dataset = dt['dataset']
secondary_labels = dt['labels']
del save
del dt
print('Secondary set', secondary_dataset.shape, secondary_labels.shape)
X_train, X_test, y_train, y_test = train_test_split(secondary_dataset, secondary_labels, test_size=0.33, random_state=42)
return X_train, X_test, y_train, y_test
def load_full_dataset(data_path='data'):
# Function to load the full dataset (motorway+secondary roads)
with open(os.path.join(data_path, 'motorway_dataset_window_64_proc_veh_DtA.pkl'), 'rb') as f:
save = cPickle.load(f, encoding='bytes')
dt = {}
for k,v in save.items():
dt.update({k.decode():v})
motorway_dataset = dt['dataset']
motorway_labels = dt['labels']
del save
del dt
print('Motorway set', motorway_dataset.shape, motorway_labels.shape)
with open(os.path.join(data_path,'secondary_dataset_window_64_proc_veh_DtA.pkl'), 'rb') as f:
save = cPickle.load(f, encoding='bytes')
dt = {}
for k,v in save.items():
dt.update({k.decode():v})
secondary_dataset = dt['dataset']
secondary_labels = dt['labels']
del save
del dt
print('Secondary set', secondary_dataset.shape, secondary_labels.shape)
dataset = np.concatenate((motorway_dataset,secondary_dataset), axis=0)
labels = np.concatenate((motorway_labels,secondary_labels), axis=0)
X_train, X_test, y_train, y_test = train_test_split(dataset, labels, test_size=0.33, random_state=42)
return X_train, X_test, y_train, y_test
#载入数据
'''
载入全部数据
X_train, X_test, y_train, y_test = load_full_dataset()
载入高速公路数据
X_train, X_test, y_train, y_test = load_motorway_dataset()
载入二级道路数据
X_train, X_test, y_train, y_test = load_secondary_dataset()
查看是否有空值
np.isnan(X_test).sum()
'''
X_train, X_test, Y_train, Y_test = load_motorway_dataset()
print('X_train, X_test, Y_train, Y_test',X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)
#将labels转为one_hot独热编码,与分类列别数对应上,原labels二位数组为[xxx,1],转化后为[xxx,3]
Y_train = one_hot(Y_train)
Y_test = one_hot(Y_test)
print('Y_train, Y_test', Y_train.shape, Y_test.shape)
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss',min_delta=0.0001, patience=10, restore_best_weights=True)
history = model.fit(X_train, Y_train, epochs = training_epochs, batch_size = batch_size, validation_split=0.1, callbacks=[callback])
目前的问题就是输入数据格式不匹配,我尝试用下面的方法解决,但是又报错说Y的样本格式不匹配,烦请帮忙解决一下,谢谢。
X_train = tf.reshape(X_train,[len(X_train),-1])
X_test = tf.reshape(X_test,[len(X_test),-1])
#报错的形式内容:Can not squeeze dim[1], expected a dimension of 1, got 3 for '{{node Squeeze}} = Squeeze[T=DT_FLOAT, squeeze_dims=[-1]](IteratorGetNext:1)' with input shapes: [?,3].