问题遇到的现象和发生背景
import pandas as pd
import numpy as np
import statsmodels.api as sm
from linearmodels import *
month_return = pd.read_excel("/Users/Desktop/实证/收益率.xlsx", sheet_name = 0, header = 0)
trade_data = pd.read_excel("/Users/Desktop/实证/交易数据.xlsx", sheet_name = 0, header = 0)
finance_data = pd.read_excel("/Users/Desktop/实证/融资融券.xlsx", sheet_name = 0, header = 0)
control_data = pd.read_excel("/Users/tDesktop/实证/控制变量.xlsx", sheet_name = 0, header = 0)
Three_factor = pd.read_excel("/Users/Desktop/实证/三因子.xlsx", sheet_name = 0, header = 0)
carhart_factor = pd.read_excel("/Users/Desktop/实证/四因子.xlsx", sheet_name = 0, header = 0)
month_return['emrwd'] = month_return.groupby(['Stkcd'])['Mretwd'].shift()
Three_factor['RiskPremium'] = Three_factor.groupby(['Date'])['RiskPremium1'].shift()
Three_factor['SMB'] = Three_factor.groupby(['Date'])['SMB1'].shift()
Three_factor['HML'] = Three_factor.groupby(['Date'])['HML1'].shift()
Three_factor['Date_merge'] = pd.to_datetime(Three_factor['Date'])
carhart_factor['Date_merge'] = pd.to_datetime(carhart_factor['Date'])
from pandas.tseries.offsets import *
month_return['Stkcd_merge'] = month_return['Stkcd'].astype(dtype='string')
month_return['Date_merge'] = pd.to_datetime(month_return['Trdmnt'])
trade_data['Stkcd_merge'] = trade_data['Stock'].dropna().astype(dtype='int').astype(dtype='string')
trade_data['Date_merge'] = pd.to_datetime(trade_data['Date'])
trade_data['Date_merge'] += MonthBegin()
finance_data['Stkcd_merge'] = finance_data['Stock'].dropna().astype(dtype='int').astype(dtype='string')
finance_data['Date_merge'] = pd.to_datetime(finance_data['Date'])
finance_data['Date_merge'] += MonthBegin()
control_data['Stkcd_merge'] = control_data['Stkcd'].dropna().astype(dtype='int').astype(dtype='string')
control_data['Date_merge'] = pd.to_datetime(control_data['Date'])
control_data['Date_merge'] += MonthBegin()
month_return = month_return[month_return['Date_merge'] >= '2010-01']
trade_data = trade_data[trade_data['Date_merge'] >= '2010-01']
finance_data = finance_data[finance_data['Date_merge'] >= '2010-01']
control_data = control_data[control_data['Date_merge'] >= '2010-01']
return_company = pd.merge(trade_data, month_return, on=['Stkcd_merge', 'Date_merge'])
return_company = pd.merge(return_company, finance_data, on=['Stkcd_merge', 'Date_merge'])
return_company = pd.merge(return_company, control_data, on=['Stkcd_merge', 'Date_merge'])
return_company = pd.merge(return_company, Three_factor, on=['Date_merge'])
return_company = pd.merge(return_company, carhart_factor, on=['Date_merge'])
return_company = return_company.set_index(['Stkcd_merge','Date_merge'])
test_data_1 = return_company[(return_company['Ndaytrd']>=10)]
test_data_1 = test_data_1[['emrwd', 'IV(FF-3)', 'RiskPremium', 'SMB', 'HML']].dropna()
fm = FamaMacBeth(dependent = test_data_1['emrwd'],exog = sm.add_constant(test_data_1[['IV(FF-3)', ]]),check_rank=False)
res_fm = fm.fit(debiased=False)
res_fm
/Users/tianwenxin/PycharmProjects/fama-macbeth/venv/bin/python /Users/tianwenxin/PycharmProjects/fama-macbeth/main.py
/Users/tianwenxin/PycharmProjects/fama-macbeth/main.py:43: FutureWarning: Passing 'suffixes' which cause duplicate columns {'Date_x'} in the result is deprecated and will raise a MergeError in a future version.
return_company = pd.merge(return_company, Three_factor, on=['Date_merge'])
Traceback (most recent call last):
File "/Users/tianwenxin/PycharmProjects/fama-macbeth/main.py", line 52, in <module>
fm = FamaMacBeth(dependent = test_data_1['emrwd'],exog = sm.add_constant(test_data_1[['IV(FF-3)', ]]),check_rank=False)
File "/Users/tianwenxin/PycharmProjects/fama-macbeth/venv/lib/python3.10/site-packages/linearmodels/panel/model.py", line 2861, in __init__
super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
File "/Users/tianwenxin/PycharmProjects/fama-macbeth/venv/lib/python3.10/site-packages/linearmodels/panel/model.py", line 309, in __init__
self._validate_data()
File "/Users/tianwenxin/PycharmProjects/fama-macbeth/venv/lib/python3.10/site-packages/linearmodels/panel/model.py", line 459, in _validate_data
self._constant, self._constant_index = has_constant(x, rank_of_x)
File "/Users/tianwenxin/PycharmProjects/fama-macbeth/venv/lib/python3.10/site-packages/linearmodels/shared/linalg.py", line 31, in has_constant
loc: Optional[int] = int(np.argwhere(np.all(x == 1, axis=0)))
TypeError: only size-1 arrays can be converted to Python scalars