背景:利用xlwings库读取EXCEL文件,逐个获取sheet名和sheet数据,对应组成字典返回;当Excel内sheet较多并且数据量较大时,返回字典效率太低,现思考通过多线程同时获取sheet数据.
问题:当加入threading多线程时代码报错,error:pywintypes.com_error: (-2147221008, '尚未调用 CoInitialize。', None, None)
Python代码如下:
import time
import pandas as pd
import os
import xlwings as xw
import threading
def read_sht(sht):
if sht.range('a1').value:
print('Sheet名:', sht.name)
last_cell = sht.used_range.last_cell
last_row = last_cell.row
last_col = last_cell.column
header = sht.range((1, 1), (1, last_col)).value
if sht.range('a3').value:
data = sht.range((2, 1), (last_row, last_col)).value
else:
data = [sht.range((2, 1), (last_row, last_col)).value]
df = pd.DataFrame(data, columns=header)
return df
def multi_read_sht(path):
thread_list = []
dic = {}
with xw.App(visible=False, add_book=False) as app:
for root, dirs, files in os.walk(path):
for file in files:
excel = os.path.join(root, file)
if excel.endswith(('.xls', '.xlsx')):
print('\n%s 正在打开 %s 读取Sheet名和数据......' % (time.strftime('%Y-%m-%d %H:%M:%S'), file))
wb = app.books.open(excel)
for sht in wb.sheets:
t = threading.Thread(target=lambda x : dic.update({sht.name:read_sht(x)}),args=(sht,))
t.start()
thread_list.append(t)
for thread in thread_list:
thread.join()
return dic
if __name__ == "__main__":
path = os.getcwd()
multi_read_sht(path)