import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def answer_one():
energy = pd.read_excel('Energy Indicators.xls', skiprows=17,skip_footer= 38) # 读数据,下载下来的表中前面17行和后面38行都没用,读取时跳过
energy = energy[['Unnamed: 1','Petajoules','Gigajoules','%']]
energy.columns = ['Country', 'Energy Supply', 'Energy Supply per Capita', '% Renewable'] # 根据题目要求重命名
energy[['Energy Supply', 'Energy Supply per Capita', '% Renewable']] = energy[['Energy Supply', 'Energy Supply per Capita', '% Renewable']].replace('...',np.NaN).apply(pd.to_numeric) # 根据题目要求将没有数据的值转化为NaN
energy['Energy Supply'] = energy['Energy Supply']*1000000 # 根据题目要求转换单位
energy['Country'] = energy['Country'].replace({'China, Hong Kong Special Administrative Region':'Hong Kong','United Kingdom of Great Britain and Northern Ireland':'United Kingdom','Republic of Korea':'South Korea','United States of America':'United States','Iran (Islamic Republic of)':'Iran'}) # 根据题目要求替换相应国家名字,替换写在字典中,replace函数替换
energy['Country'] = energy['Country'].str.replace(" \(.*\)","") # 根据题目要求替换相应国家名字,去除一些特殊符号
GDP = pd.read_csv('world_bank.csv', skiprows=4)
GDP['Country Name'] = GDP['Country Name'].replace({"Korea, Rep.": "South Korea", "Iran, Islamic Rep.": "Iran", "Hong Kong SAR, China": "Hong Kong"}) # 同样,替换相应国家名字
GDP = GDP[['Country Name','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']] # 题目说只要10年的,就取出这些年的
ScimEn = pd.read_excel('scimagojr country rank 1996-2019.xlsx')
ScimEn = ScimEn[0:15] # 读数据,取出前15个
df = pd.merge(ScimEn, energy, how = 'inner', left_on = 'Country', right_on='Country')
dff = pd.merge(df,GDP, how = 'inner', left_on = 'Country', right_on='Country Name').set_index('Country') # 合并数据,inner方法取交集,最后只有15个国家
dff = dff[['Rank', 'Documents', 'Citable documents', 'Citations', 'Self-citations', 'Citations per document', 'H index', 'Energy Supply', 'Energy Supply per Capita', '% Renewable', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015']]
return dff
answer_one.to_excel('data.xlsx')
answer_one()
print(answer_one())
最后总会出来这一行:
answer_one.to_excel('data.xlsx')
AttributeError: 'function' object has no attribute 'to_excel'
小白求助 刚学不久o(╥﹏╥)o