m0_74420115 2024-05-07 20:19 采纳率: 71.9%
浏览 18

python求决定系数

这是用LSTM预测股票价格,怎么用预测的数据和实际的数据求决定系数来判断模型的好坏

import pandas as pd
def parse_date(date_string):
    return pd.Timestamp(date_string.replace('_', '-'))
df = pd.read_csv('D:/LSTMdata.csv', index_col='Date', parse_dates=True, date_parser=parse_date)
df.sort_index(inplace=True)
def Stock_Price_LSTM_Data_Precesing(df,mem_his_days,pre_days):
    df.dropna(inplace=True)
    df.sort_index(inplace=True)
    df['label']= df['Close'].shift(-pre_days)
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    sca_X=scaler.fit_transform(df.iloc[:,:-1])
    
    
    mem_his_days = 10
    
    from collections import deque
    deq = deque(maxlen=mem_his_days)
    
    X = []
    for i in sca_X:
        deq.append(list(i))
        if len(deq)==mem_his_days:
            X.append(list(deq))
    X_lately = X[-pre_days:]
    X = X[:-pre_days]
    y = df['label'].values[mem_his_days-1:-pre_days]
    
    
    import numpy as np
    X = np.array(X)
    y = np.array(y)
    return X,y,X_lately
X,y,X_lately = Stock_Price_LSTM_Data_Precesing(df,5,10)
print(len(X))
print(len(y))
print(len(X_lately))
pre_days = 10
mem_days=[5,10,15]
lstm_layers=[1,2,3]
dense_layers=[1,2,3]
units = [16,32]
# mem_days=[10]
# lstm_layers=[1]
# dense_layers=[1]
# units = [32]
 
from tensorflow.keras.callbacks import ModelCheckpoint
for the_mem_days in mem_days:
    for the_lstm_layers in lstm_layers:
        for the_dense_layers in dense_layers:
            for the_units in units:
                filepath=filepath=f"./theLSTMbestmodel1/{{val_mape:.2f}}{{epoch:02d}}men{the_mem_days}lstm{the_lstm_layers}dense{the_dense_layers}unit{the_units}.keras"
                checkpoint = ModelCheckpoint(
                    filepath=filepath,
                    save_weights_only=False,
                    monitor='val_mape',
                    mode='min',
                    save_best_only=True)
                X,y,X_lately = Stock_Price_LSTM_Data_Precesing(df,the_mem_days,pre_days)
                from sklearn.model_selection import train_test_split
                X_train,X_test,y_train,y_test = train_test_split(X,y,shuffle=False,test_size=0.1)
                import tensorflow as tf
                from tensorflow.keras.models import Sequential
                from tensorflow.keras.layers import LSTM,Dense,Dropout
                model = Sequential()
                model.add(LSTM(the_units,input_shape=X.shape[1:],activation='relu',return_sequences=True))
                model.add(Dropout(0.1))
                for i in range(the_lstm_layers):
                    model.add(LSTM(the_units,activation='relu',return_sequences=True))
                    model.add(Dropout(0.1))
                
                model.add(LSTM(the_units,activation='relu'))
                model.add(Dropout(0.1))
                for i in range(the_dense_layers):
                    model.add(Dense(the_units,activation='relu'))
                    model.add(Dropout(0.1))
                
                model.add(Dense(1))
                model.compile(optimizer='adam',
                             loss='mse',
                             metrics=['mape'])
                model.fit(X_train,y_train,batch_size=32,epochs=50,validation_data=(X_test,y_test),callbacks=[checkpoint])
from tensorflow.keras.models import load_model
best_model = load_model('./theLSTMbestmodel1/2.4436men10lstm1dense1unit32.keras')
best_model.summary()
best_model.evaluate(X_test,y_test)
pre = best_model.predict(X_test)
print(len(pre))
import matplotlib.pyplot as plt
df_time = df.index[- len(y_test):]
plt.plot(df_time,y_test,color='red',label='price')
plt.plot(df_time,pre,color='green',label='predict')
plt.show()
 

  • 写回答

3条回答 默认 最新

  • 专家-赤兔[在线] 优质创作者: 编程框架技术领域 2024-05-07 20:19
    关注

    引自免费微信小程序:皆我百晓生

    为了计算决定系数(R^2),我们首先需要从给定的预测值和真实值中构建一个评估函数。在Python中,可以使用sklearn库中的r2_score函数来计算决定系数。让我们将这段代码添加到您提供的示例中。

    首先安装所需的库,如果您还没有安装的话:

    pip install scikit-learn
    

    接下来,在您的代码末尾添加以下部分来计算决定系数:

    from sklearn.metrics import r2_score
    
    # 计算训练集上的决定系数
    y_train_pred = best_model.predict(X_train)
    train_r2 = r2_score(y_train, y_train_pred)
    
    # 计算测试集上的决定系数
    test_r2 = r2_score(y_test, pre)
    
    print(f"Training R^2 score: {train_r2}")
    print(f"Testing R^2 score: {test_r2}")
    

    这将会输出训练集和测试集上的决定系数。决定系数的范围是-∞ 到 1,其中1表示完美拟合,0表示模型没有解释变量的变化,负值表示模型的表现比随机猜测更差。通常来说,决定系数越高,模型的预测效果越好。在这里,我们关注的是测试集上的决定系数,因为它反映了模型在未见过的数据上的泛化性能。

    完整的代码如下:

    # ... (之前的代码保持不变)
    
    from sklearn.metrics import r2_score
    
    # 计算训练集上的决定系数
    y_train_pred = best_model.predict(X_train)
    train_r2 = r2_score(y_train, y_train_pred)
    
    # 计算测试集上的决定系数
    test_r2 = r2_score(y_test, pre)
    
    print(f"Training R^2 score: {train_r2}")
    print(f"Testing R^2 score: {test_r2}")
    
    plt.figure(figsize=(12, 6))
    df_time = df.index[-len(y_test):]
    plt.plot(df_time, y_test, color='red', label='price')
    plt.plot(df_time, pre, color='green', label='prediction')
    plt.legend()
    plt.xlabel("Time")
    plt.ylabel("Price")
    plt.title("Actual vs Predicted Prices - Test Set")
    plt.show()
    

    运行这段代码后,你会得到训练集和测试集的决定系数,以及一个显示实际价格与预测价格的图形。根据这些结果,您可以评估模型的性能。

    评论 编辑记录

报告相同问题?

问题事件

  • 创建了问题 5月7日