一下为原代码,这种情况该如何解决啊
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
def predict_heart_disease(file_path):
# 数据加载
data = pd.read_csv(file_path, encoding='gbk')
# 检查数据形状
print(f'Data shape: {data.shape}')
# 分离特征和目标变量
if '是否患有心血管疾病' in data.columns:
y = data['是否患有心血管疾病'].values
X = data.drop('是否患有心血管疾病', axis=1).values
# 数据预处理
scaler = StandardScaler()
X = scaler.fit_transform(X)
# 检查特征数量
print(f'Feature count: {X.shape[1]}')
# 数据划分
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 构建模型
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# 模型编译
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# 模型训练
model.fit(X_train, y_train, epochs=10, batch_size=32)
# 模型评估
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
# 新数据预测
new_data = np.array([[65, 1, 3, 140, 260, 0, 2, 140, 1, 2.0, 3, 0, 6]]) # 这个数组需要根据你的实际特征来修改
new_data = scaler.transform(new_data)
prediction = model.predict(new_data)
if prediction[0][0] >= 0.5:
print('有心血管疾病')
else:
print('无心血管疾病')
# 文件列表
files = [r"C:\Users\1\Desktop\heart_disease\1-首次病程记录.csv", r"C:\Users\1\Desktop\heart_disease\2-日常病程记录.csv",
r"C:\Users\1\Desktop\heart_disease\3-出院记录.csv", r"C:\Users\1\Desktop\heart_disease\4-检验记录表.csv",
r"C:\Users\1\Desktop\heart_disease\5-检验明细表.csv", r"C:\Users\1\Desktop\heart_disease\6-细菌结果表.csv",
r"C:\Users\1\Desktop\heart_disease\7-影像检查报告表.csv", r"C:\Users\1\Desktop\heart_disease\8-输出结果.csv"]
# 针对每一个文件运行函数
for file in files:
print(f"Processing {file}")
predict_heart_disease(file)