用字典记录关键字出现的次数
# 读关键字
def get_key_list(txt_file):
try:
file_data = open(txt_file, 'r', encoding='utf8')
list1 = file_data.readlines()
key_list = [k.strip('\n') for k in list1]
return key_list
except:
pass
return []
# 按关键字列表遍历EXCEL , 写入统计表
def xls_select_proc(xls_sr, xls_tg, key_list):
from openpyxl import Workbook
from openpyxl import load_workbook
# 只处理第一个工作表 , 待匹配数据在第一列
wb = load_workbook(xls_sr)
ws = wb[wb.sheetnames[0]]
wb_tg = Workbook()
sheet_tg = wb_tg.active
sheet_tg.title = "关键词统计"
save_i = 0
sr_i = 0
# 表格头
sheet_tg[chr(ord('A') + 0) + '%d' % (save_i + 1)] = '关键词'
sheet_tg[chr(ord('A') + 1) + '%d' % (save_i + 1)] = '出现次数'
save_i = save_i + 1
# 关键词统计字典
key_dict = {}
for row in ws.rows:
sr_i = sr_i + 1
if row[0] is None:
break
for k in key_list:
key_sum = str(row[0].value).count(k[1])
if key_sum > 0:
if k[1] in key_dict:
key_cnt = key_dict[k[1]]
key_dict[k[1]] = key_cnt + key_sum
else:
key_dict[k[1]] = key_sum
# 写入Excel
for k in key_dict:
sheet_tg[chr(ord('A') + 0) + '%d' % (save_i + 1)] = k
sheet_tg[chr(ord('A') + 1) + '%d' % (save_i + 1)] = key_dict[k]
save_i = save_i + 1
# print(k,key_dict[k])
# 保存文件
wb_tg.save(xls_tg)
wb.close()
wb_tg.close()
def xls_select3(xls_sr, xls_tg, key_file_data):
tg_file = "{}.xlsx".format(xls_tg)
key_list = []
for kf in key_file_data:
for k in kf[1]:
key_list.append([kf[0][:-4],k])
xls_select_proc(xls_sr, tg_file, key_list)
key_file = ['关键词.txt']
key_file_data = []
print('开始处理')
try:
for kf in key_file:
key_file_data.append([kf, get_key_list(kf)])
xls_select3('文本.xlsx', '结果', key_file_data)
except Exception as e:
print('处理出错:\n',repr(e))
print('处理完成')