def load_data(self):
reviews = pd.read_csv('tuniu.csv')
data = reviews['content']
#with open('r.json') as file:
#data = file.read()
#data = json.loads(data)
#print(type(data))
return data
def fenci_data(self):
text = []
data = self.load_data()
with open("stoplist.txt", 'r',encoding='UTF-8') as file:
stop_word_list = file.read()
for weibo_item in tqdm(data):
tmp = []
sentence=''.join(re.findall(r'[\u4e00-\u9fa5]+',weibo_item['content']))
for word in jieba.lcut(sentence):
if word not in stop_word_list:
tmp.append(word)
text.append(tmp)
return text
报错:
sentence=''.join(re.findall(r'[\u4e00-\u9fa5]+',weibo_item['content']))
TypeError: string indices must be integers