m0_74420115 2024-05-06 21:00 采纳率: 71.9%
浏览 4
已结题

优于LSTM预测股票价格的代码

这是我的代码,用lstm预测股票的价格,在这个代码的基础上,优化这个代码,使预测的结果更加清晰

import pandas as pd
def parse_date(date_string):
    return pd.Timestamp(date_string.replace('_', '-'))
df = pd.read_csv('D:/LSTMdata.csv', index_col='Date', parse_dates=True, date_parser=parse_date)
df.sort_index(inplace=True)
def Stock_Price_LSTM_Data_Precesing(df,mem_his_days,pre_days):
    df.dropna(inplace=True)
    df.sort_index(inplace=True)
    df['label']= df['Close'].shift(-pre_days)
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    sca_X=scaler.fit_transform(df.iloc[:,:-1])
    
    
    mem_his_days = 10
    
    from collections import deque
    deq = deque(maxlen=mem_his_days)
    
    X = []
    for i in sca_X:
        deq.append(list(i))
        if len(deq)==mem_his_days:
            X.append(list(deq))
    X_lately = X[-pre_days:]
    X = X[:-pre_days]
    y = df['label'].values[mem_his_days-1:-pre_days]
    
    
    import numpy as np
    X = np.array(X)
    y = np.array(y)
    return X,y,X_lately
X,y,X_lately = Stock_Price_LSTM_Data_Precesing(df,5,10)
print(len(X))
print(len(y))
print(len(X_lately))
pre_days = 10
mem_days=[5,10,15]
lstm_layers=[1,2,3]
dense_layers=[1,2,3]
units = [16,32]
# mem_days=[10]
# lstm_layers=[1]
# dense_layers=[1]
# units = [32]

from tensorflow.keras.callbacks import ModelCheckpoint
for the_mem_days in mem_days:
    for the_lstm_layers in lstm_layers:
        for the_dense_layers in dense_layers:
            for the_units in units:
                filepath=filepath=f"./theLSTMbestmodel1/{{val_mape:.2f}}{{epoch:02d}}men{the_mem_days}lstm{the_lstm_layers}dense{the_dense_layers}unit{the_units}.keras"
                checkpoint = ModelCheckpoint(
                    filepath=filepath,
                    save_weights_only=False,
                    monitor='val_mape',
                    mode='min',
                    save_best_only=True)
                X,y,X_lately = Stock_Price_LSTM_Data_Precesing(df,the_mem_days,pre_days)
                from sklearn.model_selection import train_test_split
                X_train,X_test,y_train,y_test = train_test_split(X,y,shuffle=False,test_size=0.1)
                import tensorflow as tf
                from tensorflow.keras.models import Sequential
                from tensorflow.keras.layers import LSTM,Dense,Dropout
                model = Sequential()
                model.add(LSTM(the_units,input_shape=X.shape[1:],activation='relu',return_sequences=True))
                model.add(Dropout(0.1))
                for i in range(the_lstm_layers):
                    model.add(LSTM(the_units,activation='relu',return_sequences=True))
                    model.add(Dropout(0.1))
                
                model.add(LSTM(the_units,activation='relu'))
                model.add(Dropout(0.1))
                for i in range(the_dense_layers):
                    model.add(Dense(the_units,activation='relu'))
                    model.add(Dropout(0.1))
                
                model.add(Dense(1))
                model.compile(optimizer='adam',
                             loss='mse',
                             metrics=['mape'])
                model.fit(X_train,y_train,batch_size=32,epochs=50,validation_data=(X_test,y_test),callbacks=[checkpoint])

  • 写回答

2条回答 默认 最新

  • 专家-赤兔[在线] 优质创作者: 编程框架技术领域 2024-05-06 21:02
    关注

    引自免费微信小程序:皆我百晓生

    在您提供的代码基础上,我们可以进行一些改进以提高模型的性能并使得预测结果更清晰。以下是具体的建议和修改后的代码:

    1. 数据标准化:确保输入数据范围在相似尺度上对模型的训练很有帮助。您已经使用了StandardScaler对数据进行了预处理,但是我们还可以尝试其他的归一化方法,比如MinMaxScaler。

    2. 调整学习率:尝试使用不同的学习率,可能会影响模型的收敛速度和最终表现。

    3. 早停法(EarlyStopping):添加一个早停回调,当验证损失不再降低时停止训练,防止过拟合。

    4. 模型复杂度:减少过度复杂的模型,尝试只保留最优的层结构组合,例如只保留一个LSTM层和一个全连接层。

    5. 正则化:增加权重衰减(L1、L2),有助于防止过拟合。

    6. 批量大小:调整批量大小,看看是否能改善训练效果。

    7. 优化器:除了Adam外,可以尝试其他优化器,如RMSprop或SGD。

    8. 随机种子:设置随机种子以确保实验可重复性。

    9. 可视化:绘制训练过程中的损失和验证损失曲线,以便观察模型的训练趋势。

    下面是根据上述建议修改后的代码:

    import pandas as pd
    import numpy as np
    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import LSTM, Dense, Dropout
    from tensorflow.keras.optimizers import Adam, RMSprop, SGD
    from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import MinMaxScaler
    
    np.random.seed(42)
    tf.random.set_seed(42)
    
    def parse_date(date_string):
        return pd.Timestamp(date_string.replace('_', '-'))
    
    df = pd.read_csv('D:/LSTMdata.csv', index_col='Date', parse_dates=True, date_parser=parse_date)
    df.sort_index(inplace=True)
    
    def Stock_Price_LSTM_Data_Precesing(df, mem_his_days, pre_days):
        df.dropna(inplace=True)
        df.sort_index(inplace=True)
        df['label'] = df['Close'].shift(-pre_days)
    
        # 使用MinMaxScaler
        scaler = MinMaxScaler()
        scaled_df = scaler.fit_transform(df)
    
        mem_his_days = 10
    
        from collections import deque
        deq = deque(maxlen=mem_his_days)
    
        X = []
        for i in scaled_df[:, :-1]:
            deq.append(list(i))
            if len(deq) == mem_his_days:
                X.append(list(deq))
        X_lately = X[-pre_days:]
        X = X[:-pre_days]
        y = df['label'].values[mem_his_days - 1:-pre_days]
    
        X = np.array(X)
        y = np.array(y)
        return X, y, X_lately
    
    pre_days = 10
    mem_days = [10]  # 选择一个值来简化搜索空间
    lstm_layers = [1]  # 同样简化搜索空间
    dense_layers = [1]
    units = [32]
    
    # 调整参数
    batch_sizes = [32, 64]  # 尝试不同的批量大小
    learning_rates = [0.001, 0.0001]  # 不同的学习率
    optimizer_names = ['adam', 'rmsprop', 'sgd']  # 尝试不同优化器
    weight_regularization = [0.001, 0.0001]  # 添加权重衰减
    
    # 早停和模型保存
    filepath = "./theLSTMbestmodel1/val_mape_{val_mape:.2f}_epoch_{epoch:02d}_men{mem_days}_lstm{lstm_layers}_dense{dense_layers}_unit{units}.h5"
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    checkpoint = ModelCheckpoint(filepath, save_best_only=True, monitor='val_loss')
    
    for the_mem_days in mem_days:
        for the_lstm_layers in lstm_layers:
            for the_dense_layers in dense_layers:
                for the_units in units:
                    for batch_size in batch_sizes:
                        for learning_rate in learning_rates:
                            for optimizer_name in optimizer_names:
                                for weight_reg in weight_regularization:
    
                                    X, y, X_lately = Stock_Price_LSTM_Data_Precesing(df, the_mem_days, pre_days)
                                    X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.1)
    
                                    model = Sequential()
                                    model.add(LSTM(units, input_shape=X.shape[1:], activation='relu', return_sequences=True))
                                    model.add(Dropout(0.1))
    
                                    model.add(LSTM(units, activation='relu', return_sequences=True))
                                    model.add(Dropout(0.1))
    
                                    model.add(LSTM(units, activation='relu'))
                                    model.add(Dropout(0.1))
    
                                    model.add(Dense(units, activation='relu'))
                                    model.add(Dropout(0.1))
    
                                    model.add(Dense(1))
                                    model.compile(optimizer=getattr(tf.keras.optimizers, optimizer_name)(learning_rate=learning_rate, 
                                                                                              kernel_regularizer=tf.keras.regularizers.l2(weight_reg)),
                                                 loss='mse',
                                                 metrics=['mape'])
    
                                    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=50, validation_data=(X_test, y_test),
                                                       callbacks=[early_stopping, checkpoint])
    
                                    # 可视化训练过程
                                    plt.plot(history.history['loss'], label='Training Loss')
                                    plt.plot(history.history['val_loss'], label='Validation Loss')
                                    plt.xlabel("Epochs")
                                    plt.ylabel("Loss")
                                    plt.legend()
                                    plt.show()
    
    # 在这里,您可以进一步分析历史记录,找出最佳模型,并根据需要进行预测
    

    请注意,这只是一个基础的优化方案,实际应用中可能需要更多的调整和试验。同时,由于代码的运行时间和计算资源需求可能会显著增加,因此可能需要在具有足够计算能力的环境中执行此代码。

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

问题事件

  • 系统已结题 5月14日
  • 已采纳回答 5月6日
  • 创建了问题 5月6日