weixin_40185491 2018-11-30 06:55 采纳率: 0%
浏览 3229

求助 Python报错 TypeError: rank() got an unexpected keyword argument 'numeric_only'

import numpy
import pandas
import matplotlib
#http://www.lfd.uci.edu/~gohlke/pythonlibs/#python-levenshtein
import Levenshtein
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from mpl_toolkits.basemap import Basemap
from matplotlib.collections import PatchCollection

font = {
'family' : 'SimHei'
};
matplotlib.rc('font', **font);

fig = plt.figure()
ax = fig.add_subplot(111)

basemap = Basemap(
llcrnrlon=73.55770111084013,
llcrnrlat=18.159305572509766,
urcrnrlon=134.7739257812502,
urcrnrlat=53.56085968017586
)

chinaAdm1 = basemap.readshapefile(
'D:\PDA\6.6\china\CHN_adm1',
'china'
)

data = pandas.read_csv(
'D:\PDA\6.6\province.csv',
sep="\t"
)

data['总人口'] = data.总人口.str.replace(",", "").astype(int)

data['scala'] = (
data.总人口-data.总人口.min()
)/(
data.总人口.max()-data.总人口.min()
)

#数据处理:如何把JSON格式的数据,转换称为Data Frame的格式化数据
mapData = pandas.DataFrame(basemap.china_info)

#字段匹配第二大招:模糊匹配,从列表中,匹配出最大匹配度的项作为匹配项
def fuzzyMerge(df1, df2, left_on, right_on):
suitSource=[]
suitTarget=[]
suitRatio=[]

df2 = df2.groupby(
    right_on
)[right_on].agg({
    right_on: numpy.size
})

df2[right_on] = df2.index

for df1Index, df1Row in df1.iterrows():
    for df2Index, df2Row in df2.iterrows():
        if Levenshtein.ratio(df2Row[right_on], df1Row[left_on])!=0:
            suitSource.append(df1Row[left_on])
            suitTarget.append(df2Row[right_on])
            suitRatio.append(Levenshtein.ratio(df2Row[right_on], df1Row[left_on]))

suitDataFrame = pandas.DataFrame({
    right_on: suitTarget,
    'suitRatio':suitRatio,
    'suitSource':suitSource
})

suitDataFrame = suitDataFrame.drop_duplicates();

suitDataFrame = suitDataFrame.sort_values(
    ['suitSource', 'suitRatio'], 
    ascending=[1, 0]
)

rnColumn = suitDataFrame.groupby(
    'suitSource'
).rank(
    method='first', 
    numeric_only=True, # TypeError 报错
    ascending=False
)
suitDataFrame['rn'] = rnColumn;

suitDataFrame = suitDataFrame[suitDataFrame.rn==1]

data = df1.merge(
    suitDataFrame, 
    left_on=left_on, 
    right_on="suitSource"
)

del data['rn'];
del data['suitRatio'];
del data['suitSource'];

return data;

fData = fuzzyMerge(data, mapData, '地区', 'NL_NAME_1')

def plotProvince(row):
mainColor = (42/256, 87/256, 141/256, row['scala']);
patches = []
for info, shape in zip(basemap.china_info, basemap.china):

if info['NL_NAME_1']==row['NL_NAME_1']:
patches.append(Polygon(numpy.array(shape), True))
ax.add_collection(
PatchCollection(
patches, facecolor=mainColor,
edgecolor=mainColor, linewidths=1., zorder=2
)
)

fData.apply(lambda row: plotProvince(row), axis=1)

dataLoc = pandas.read_csv('D:\PDA\6.6\provinceLoc.csv');

def plotText(row):
plt.text(row.jd, row.wd, row.city, fontsize=14, fontweight='bold', ha='center',va='center',color='r')

dataLoc.apply(lambda row: plotText(row), axis=1)

plt.show()

图片说明
图片说明

  • 写回答

3条回答 默认 最新

  • 王 不留行 2022-02-10 12:57
    关注

    你用的是groupby对象里的rank方法,这个方法里没有numeric_only参数,所以会报错。Dataframe对象里的rank方法是有numeric_only这个参数。你可以df.groupby()[].apply(lambda x: x.rank())这样写

    评论

报告相同问题?

悬赏问题

  • ¥15 outlook无法配置成功
  • ¥15 Pwm双极模式H桥驱动控制电机
  • ¥30 这是哪个作者做的宝宝起名网站
  • ¥60 版本过低apk如何修改可以兼容新的安卓系统
  • ¥25 由IPR导致的DRIVER_POWER_STATE_FAILURE蓝屏
  • ¥50 有数据,怎么建立模型求影响全要素生产率的因素
  • ¥50 有数据,怎么用matlab求全要素生产率
  • ¥15 TI的insta-spin例程
  • ¥15 完成下列问题完成下列问题
  • ¥15 C#算法问题, 不知道怎么处理这个数据的转换