不同城市分布的数量想要画出一个图。
import xlrd
import matplotlib.pyplot as plt
if __name__ == "__main__":
# 读取表格
data = xlrd.open_workbook("../data_spark/city.xls")
# 获取表格的sheets
table = data.sheets()[0]
# 行
rows = table.nrows
print("xls行数:", rows)
# 列
cols = table.ncols
print("xls列数:", cols)
# 获取第一行数据
row1data = table.row_values(0)
print(row1data) # 输出标题行
# 存储所有表格信息
tables = []
for rows in range(1, table.nrows):
dict_ = {"职位名": "job_id",
"工作城市": "work_city",
"学历要求": "stu",
"最低年薪资": "salary_min",
"最高年薪资": "salary_max",
"平均年薪资": "salary_avg"
}
dict_["job_id"] = table.cell_value(rows, 0)
dict_["work_city"] = table.cell_value(rows, 1)
dict_["stu"] = table.cell_value(rows, 2)
dict_["salary_min"] = table.cell_value(rows, 3)
dict_["salary_max"] = table.cell_value(rows, 4)
dict_["salary_avg"] = table.cell_value(rows, 5)
tables.append(dict_)
def statistical_data(tables):
res_dict = {}
for data in tables:
city = data.get('work_city')
if city not in res_dict:
res_dict[city] = 1 # 初始数量1
else:
res_dict[city] += 1 # 相同key对应的数量+1
return res_dict
result = statistical_data(tables)
print(result)
for i in tables:
data1 = data[i['work_city']]
_x = data1.index
_y = result.values
# 画图
plt.figure(figsize=(20, 8), dpi=80)
plt.bar(range(len(_x)), _y)
plt.xticks(range(len(_x)), _x)
plt.show()