在预备国赛 处理大量的数据 打算用multiprocessing
题目为 2017B 处理两个excel保存的数据信息
代码
```python
import pandas as pd
import xlrd as xl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import time
import multiprocessing as mp
import math
def final_fun(name,param):
global loc1
global loc2
global col1
global col2
result = np.zeros(len(param))
distance = 0.001
k = 1
for i in param:
count = 0
if k == 1:
start = time.time()
for j in range(len(col1)):
if (loc1[i]-col1[j])**2 + (loc2[i]-col2[j])**2 <= distance ** 2:
count += 1
result[param.index(i)] = count
if k == 1:
end = time.time()
print((end-start)*len(param))
k =10
return restlt
dataframe = pd.read_excel("附件三:新项目任务数据.xls","t_tasklaunch",header=0)
col1 = dataframe["任务GPS纬度"][:10]
col2 = dataframe["任务GPS经度"][:10]
dataframe = pd.read_excel("附件二:会员信息数据.xlsx","Sheet1",header=0)
loc = dataframe["会员位置(GPS)"]
loc = [i.split(" ") for i in loc]
loc1 = [float(i[0]) for i in loc][:10]
loc2 = [float(i[1]) for i in loc][:10]
#plt.scatter(loc1,loc2,s=5)
#plt.scatter(col1,col2,s=5,c="red")
#plt.show()
len_loc = len(loc)
n_len_loc = math.floor(len_loc/16)
num_cores = int(mp.cpu_count())
pool = mp.Pool(num_cores)
param_dict = {"task"+str(i):[j for j in range(i*n_len_loc,(i+1)*n_len_loc)] for i in range(15)}
param_dict["taks15"] = [j for j in range(15*n_len_loc,len_loc-15 * n_len_loc)]
results = [pool.apply_async(final_fun,args=(name,param)) for name,param in param_dict.items()]
results = [p.get() for p in results]
f = open("information.txt","w")
f.writelines(results)
f.close()
报错:
