book="Harry Potter and the Sorcerer's Stone.txt"
from nltk import word_tokenize,pos_tag #分词、词性标注
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer #词性还原
book_datas=''
with open(book,encoding='utf-8') as fd:
for line in fd.readlines():
book_datas +=line
words= word_tokenize(book_datas)
这是准备用nltk将单词割分,然后减去标点符号和停用词,但是在交互窗口运行了一下发现出现了'that.When','nonsense.Mr', 'anywhere.The',之类很显然的错误呢,我又该怎么办呢