问题遇到的现象和发生背景
有一个excel表里有1000项数据,要在十几个word文档里找这些数据,excel中数据能在word里找到就标记一下。问题:把遍历表格那段注释掉以后运行速度还行,加上遍历表格就巨慢,怎么能提升效率啊?
问题相关代码,请勿粘贴截图
from openpyxl import workbook
from openpyxl import load_workbook
from docx import Document
import os
def search(docname):
document = Document(docname)
for i in range(1038):
a=i+1
#print(ws.cell(a,4).value)
searchtarget = ws.cell(a,4).value
for paragraph in document.paragraphs:
#print(paragraph.text)
p=str(searchtarget)
pp=str(paragraph.text)
if p in pp:
print(ws.cell(a,4).value)
ws.cell(a,13).value=docname
tables=document.tables #遍历表格
for tb in document.tables:
ii=0
jj=0
for i in tb.rows:
jj=0
for j in tb.columns:
p=str(searchtarget)
pp=str(tb.cell(ii,jj).text)
try:
if p in pp:
print(ws.cell(a,4).value)
ws.cell(a,13).value=docname
except:
print("error")
break
if jj<len(tb.columns)-1:
jj=jj+1
if ii<len(tb.rows)-1:
ii=ii+1
if __name__ == "__main__":
excel=load_workbook('新建 Microsoft Excel 工作表.xlsx')
ws=excel['Sheet1']
search('卷).docx')
search('卷).docx')
search('卷).docx')
search('册二).docx')
search('册三).docx')
search('册一).docx')
search('册).docx')
search('明书.docx')
search('书.docx')
search('册.docx')
search('册.docx')
search('书.docx')