import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split #数据拆分
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
# 1、获取数据
data_path = "data.csv"
data = pd.read_csv(filepath_or_buffer=data_path,usecols=['speed_ave','Direction','day','hour','minute','TransTime_ave'])
# 2、筛选特征值和目标值
x = data.iloc[:,:-1]
y = data["TransTime_ave"]
# 3、数据集划分
x_train, x_test, y_train, y_test = train_test_split(x, y,test_size=0.2)
# 4、特征工程:标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
estimator = RandomForestClassifier(
n_estimators=100,random_state=0,min_samples_split=10,min_samples_leaf=4,max_features=None,min_impurity_decrease=0.2,oob_score=True
)
estimator.fit(x_train, y_train.astype('int'))
y_predict = estimator.predict(x_test)
# 5、预测
Text_set = [100,1,17,4,30]
pre = estimator.predict(np.array(Text_set).reshape(1,-1))
print(pre)