在抓取新闻标题时,用article.h2可以显示出新闻标题,但加上article.h2.text, 就出现如标题错误
import pandas as pd
import requests
import re
from bs4 import BeautifulSoup
from newspaper import Article
URL = "https://www.thepaper.cn/channel_25950"
r = requests.get(URL)
soup = BeautifulSoup(r.content, 'html5lib')
contents = [] # a list to store contents
table = soup.find('div', {'id': 'mainContent'})

# print(table) #for verifying result
news = table.findAll('div',attrs={'class': 'news_li'})
# print(news)
for article in news:
try:
content = {}
content['Title'] = article.h2
contents.append(content)
except IndexError:
pass
print(contents)