我尝试使用Python搜索一特定目录下所有文件中的关键词。代码如下:
#!/usr/bin/python
#encoding:UTF-8
import os
import docx
from docx import *
#判断文件中是否包含关键字,是则将文件路径打印出来
def is_file_contain_word(file_list, query_word):
for _file in file_list:
if query_word in open(_file).read():
print (_file)
print("Finish searching.")
#返回指定目录的所有文件(包含子目录的文件)
def get_all_file(floder_path):
file_list = []
if floder_path is None:
raise Exception("floder_path is None")
for dirpath, dirnames, filenames in os.walk(floder_path):
for name in filenames:
file_list.append(dirpath + '\' + name)
return file_list
query_word = input("Please input the key word that you want to search:")
basedir = input("Please input the directory:")
is_file_contain_word(get_all_file(basedir), query_word)
input("Press Enter to quit.")
测试的目录为D:\test。内含一个word文档和一个子文件夹,子文件夹下有一个word文档。
输入关键词和目录后,得到如下信息:
Please input the key word that you want to search:'Shengaiwei'
Please input the directory:D:\test
Traceback (most recent call last):
File "C:\Users\c*\AppData\Local\Programs\Python\Python38\kword\kword7.py", line 29, in
is_file_contain_word(get_all_file(basedir), query_word)
File "C:\Users\c*\AppData\Local\Programs\Python\Python38\kword\kword7.py", line 11, in is_file_contain_word
if query_word in open(_file).read():
UnicodeDecodeError: 'gbk' codec can't decode byte 0xa2 in position 50: illegal multibyte sequence
烦请各位大侠帮助指导,谢谢!