import openpyxl
from docx import Document
from docx.shared import Inches
from docxtpl import DocxTemplate, InlineImage
from openpyxl import load_workbook
import os
import re
def load_excel_data():
location='C:\\Users\\caoxiangting\\Desktop\\Test.xlsx'
wb = openpyxl.load_workbook(location) # 需要填入Word的Excel工作簿的地址
ws = wb['Sheet1'] # 工作簿中表格的名称
contexts = []
for row in range(2, ws.max_row + 1):
b_content = ws["A" + str(row)].value # 字母代表表格中对应的列,顺序和列名一定要对应上
e_content = ws["F" + str(row)].value
f_content = ws["H" + str(row)].value
g_content = ws["I" + str(row)].value
if not b_content and not e_content and not f_content and not g_content:
break
context = {"no": b_content, "e_content": e_content, "f_content": f_content,
"g_content": g_content}
contexts.append(context)
return contexts
def package_doc(handle_data):
title_first = ''
title_first_arr = []
title_second = ''
title_second_arr = []
title_third = ''
title_third_arr = []
end = True
doc_item_list = []
for index, v in enumerate(handle_data):
doc_item = {}
if not v['no'] and not v['e_content'] and not v['f_content'] and not v['g_content']:
break
no_str = str(v['no']) if v['no'] else None
e_content_str = str(v['e_content']) if v['e_content'] else None
if no_str and re.match(r'\d+[.\d]*[.\d]*', no_str) and not re.match(r'^\d+$', no_str):
if not end:
end = True
title_first = ''
title_second = ''
title_third = ''
# 一级标题和二级标题识别
if title_first:
title_second = no_str
doc_item['title_second'] = no_str
if no_str in title_second_arr:
continue
else:
title_second_arr.append(no_str)
else:
title_first = no_str
doc_item['title_first'] = no_str
if no_str in title_first_arr:
continue
else:
title_first_arr.append(no_str)
else:
if not no_str and v['f_content']:
# 二级或三级标题
if not title_second:
title_second = v['f_content']
doc_item['title_second'] = v['f_content']
if no_str in title_second_arr:
continue
else:
title_second_arr.append(v['f_content'])
else:
title_third = v['f_content']
doc_item['title_third'] = v['f_content']
if no_str in title_third_arr:
continue
else:
title_third_arr.append(v['f_content'])
else:
# 正式数据
if title_third:
res = re.match(r'\d+[.\d]*[.\d]*', title_third)
i = res.group()
doc_item['title_fourth'] = str(i) + '.' + no_str + ' ' + e_content_str
else:
res = re.match(r'\d+[.\d]*[.\d]*', title_second)
i = res.group()
doc_item['title_third'] = str(i) + '.' + no_str + ' ' + e_content_str
doc_item['c1'] = v['f_content']
doc_item['c2'] = v['g_content']
end = False
doc_item_list.append(doc_item.copy())
return doc_item_list
def print_doc(resource):
location="C:\\Users\\caoxiangting\\Desktop\\Test.docx"
location2='C:\\Program Files\\test'
path = location2
tpl = DocxTemplate(location) # 需要填入的Word文档的的地址
if not os.path.exists(path):
os.mkdir(path)
context = {'data': resource}
tpl.render(context, autoescape=True)
tpl.save(location2 + '.\\1002.docx')
if __name__ == '__main__':
data = load_excel_data()
doc_data = package_doc(data)
print_doc(doc_data)
在改代码中第94这里,我跑的时候提示没有对象属性group??没太明白,代码是朋友发我的。我自己研究目前还没太懂
代码的目的是想自动把Excel中部分列的内容自动转移去Work文档去