slxbvs7951 2019-10-06 11:49 采纳率: 0%
浏览 1750

偏最小二乘法回归的Python代码看不懂,有大佬可以帮忙解释一下吗?

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#basic module\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"from sklearn import preprocessing\n",
"from sklearn import metrics\n",
"from sklearn.decomposition import PCA \n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"

\n",
"\n&quot;,<br> &quot; .dataframe tbody tr th:only-of-type {\n&quot;,<br> &quot; vertical-align: middle;\n&quot;,<br> &quot; }\n&quot;,<br> &quot;\n&quot;,<br> &quot; .dataframe tbody tr th {\n&quot;,<br> &quot; vertical-align: top;\n&quot;,<br> &quot; }\n&quot;,<br> &quot;\n&quot;,<br> &quot; .dataframe thead th {\n&quot;,<br> &quot; text-align: right;\n&quot;,<br> &quot; }\n&quot;,<br> &quot;\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
"
Q41Q42Q43Q44Q45Q46Q47Q48Q49Q410Q411Q412Q413Q414A1A2A3
033333455553333-1.611.91-1.06
1444442222552221.29-2.49-0.99
244444555553333-0.041.89-1.29
344444555553333-0.041.89-1.29
444443555333443-0.230.77-0.60
\n",
"
"
],
"text/plain": [
" Q41 Q42 Q43 Q44 Q45 Q46 Q47 Q48 Q49 Q410 Q411 Q412 Q413 Q414 \\n",
"0 3 3 3 3 3 4 5 5 5 5 3 3 3 3 \n",
"1 4 4 4 4 4 2 2 2 2 5 5 2 2 2 \n",
"2 4 4 4 4 4 5 5 5 5 5 3 3 3 3 \n",
"3 4 4 4 4 4 5 5 5 5 5 3 3 3 3 \n",
"4 4 4 4 4 3 5 5 5 3 3 3 4 4 3 \n",
"\n",
" A1 A2 A3 \n",
"0 -1.61 1.91 -1.06 \n",
"1 1.29 -2.49 -0.99 \n",
"2 -0.04 1.89 -1.29 \n",
"3 -0.04 1.89 -1.29 \n",
"4 -0.23 0.77 -0.60 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw_data = pd.read_csv(\"257 928.csv\").loc[:,['Q41','Q42','Q43','Q44','Q45',\n",
" 'Q46','Q47','Q48','Q49','Q410',\n",
" 'Q411','Q412','Q413','Q414',\n",
" 'A1','A2','A3']]\n",
"raw_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Q41', 'Q42', 'Q43', 'Q44', 'Q45', 'Q46', 'Q47', 'Q48', 'Q49', 'Q410',\n",
" 'Q411', 'Q412', 'Q413', 'Q414', 'A1', 'A2', 'A3'],\n",
" dtype='object')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw_data.columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def linear_model(X,w):\n",
" ,loop = np.shape(X)\n",
" sum = 0\n",
" for i in range(loop):\n",
" sum += w[i] * X[:,i]\n",
" return sum"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def loss_function(x_train, y_train, w):\n",
" X = x_train\n",
" Y_pred = linear_model(X,w)\n",
" J = metrics.mean_squared_error(y_train, Y_pred)\n",
" return J"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def model(x_train, y_train, mini=1e-10, alpha=0.001, delta = 0.005, max_iter = 100, max_step = 5000): \n",
" step = 0\n",
" _,loop = np.shape(x_train)\n",
" w = np.random.rand(loop)\n",
" while(True):\n",
" weights = np.array(w)\n",
" for i in range(0, len(w)):\n",
" step
= 0\n",
" while (True):\n",
" # simplified gradient descent\n",
" w_backup = w[i] \n",
" loss_present = loss_function(x_train, y_train, w)\n",
" w[i] = w_backup + delta\n",
" loss_plus = loss_function(x_train, y_train, w)\n",
" w[i] = w_backup - delta\n",
" loss_sub = loss_function(x_train, y_train, w)\n",
" if (loss_present < loss_plus and loss_present < loss_sub) or step_ >= max_step:\n",
" break\n",
" # update weights\n",
" w[i] = w_backup - alpha*(loss_plus - loss_sub)/(2*delta)\n",
" #w[i] = w_backup + alpha if loss_plus < loss_sub else w_backup - alpha\n",
" step_ += 1\n",
" offset = np.sum(np.square(np.array(w) - weights))\n",
" # end condition\n",
" step += 1\n",
" if (offset < mini) or (step >= max_iter):\n",
" break\n",
" return w, loss_present"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def obtain_elements(x_data, y_data, num):\n",
" comp_list = []\n",
" metric_list = []\n",
" w_list = []\n",
" pca_list = []\n",
" for comp in range(num):\n",
" n_comp = comp + 1\n",
" # pca\n",
" comp_list.append(n_comp)\n",
" pca = PCA(n_components=n_comp,svd_solver='auto')\n",
" X_ = x_data - np.mean(x_data)\n",
" pca.fit(X_, y_data)\n",
" X = pca.transform(X_)\n",
" # linear reg\n",
" pca_list.append(pca)\n",
" w, metric = model(X, y_data )\n",
" w_list.append(w)\n",
" metric_list.append(metric)\n",
" ind = metric_list.index(min(metric_list))\n",
" print(\"metrics :\" ,metric_list)\n",
" return comp_list[ind], pca_list[ind], w_list[ind]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# 第一组\n",
"x1_data = raw_data.loc[:,['Q41','Q42','Q43','Q44','Q45']]\n",
"y1_data = raw_data.loc[:,['A2']]\n",
"len1 = 5\n",
"# 第二组\n",
"x2_data = raw_data.loc[:,['Q46','Q47','Q48','Q49','Q410']]\n",
"y2_data = raw_data.loc[:,['A1']]\n",
"len2 = 5\n",
"# 第三组\n",
"x3_data = raw_data.loc[:,['Q411','Q412','Q413','Q414']]\n",
"y3_data = raw_data.loc[:,['A3']]\n",
"len3 = 4"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 计算PLSR"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" 第一组"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics : [0.937338177736278, 0.90366935147199, 0.9036905658620544, 0.9028029958313659, 0.9022534567720741]\n",
"第一组的主成分保留5个\n",
"回归系数为: [-0.17477811 0.25885742 -0.01157531 -0.07843881 0.06077944]\n"
]
}
],
"source": [
"comp_1, pca_1, w1 = obtain_elements(x1_data, y1_data, len1)\n",
"print(\"第一组的主成分保留%d个\"%comp_1)\n",
"print(\"回归系数为:\", w1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" 第二组"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics : [0.9216669179495057, 0.9126959565507735, 0.900633447790304, 0.8930927646735571, 0.8824868647839249]\n",
"第二组的主成分保留5个\n",
"回归系数为: [-0.19525742 -0.14575206 -0.20305816 -0.17987887 -0.22941539]\n"
]
}
],
"source": [
"comp_2, pca_2, w2 = obtain_elements(x2_data, y2_data, len2)\n",
"print(\"第二组的主成分保留%d个\"%comp_2)\n",
"print(\"回归系数为:\", w2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" 第三组"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics : [0.07356179930507709, 0.06803873635782488, 0.06747567528450515, 0.06722983639980304]\n",
"第三组的主成分保留4个\n",
"回归系数为: [-0.67029064 -0.10083604 0.0447786 -0.04806739]\n"
]
}
],
"source": [
"comp_3, pca_3, w3 = obtain_elements(x3_data, y3_data, len3)\n",
"print(\"第三组的主成分保留%d个\"%comp_3)\n",
"print(\"回归系数为:\", w3)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

  • 写回答

1条回答 默认 最新

  • 关注
    评论

报告相同问题?

悬赏问题

  • ¥30 python代码,帮调试
  • ¥15 #MATLAB仿真#车辆换道路径规划
  • ¥15 java 操作 elasticsearch 8.1 实现 索引的重建
  • ¥15 数据可视化Python
  • ¥15 要给毕业设计添加扫码登录的功能!!有偿
  • ¥15 kafka 分区副本增加会导致消息丢失或者不可用吗?
  • ¥15 微信公众号自制会员卡没有收款渠道啊
  • ¥100 Jenkins自动化部署—悬赏100元
  • ¥15 关于#python#的问题:求帮写python代码
  • ¥20 MATLAB画图图形出现上下震荡的线条