import requests
import re
url = "https://maoyan.com/board/4"
headers = {
"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}
response = requests.get(url,headers=headers)
html=response.content.decode("utf-8")
print(html)
pattern = re.compile(
'<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</ a>.*?star.*?>(.*?)</p >.*?releasetime.*?>(.*?)</p >.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S
)
items = re.findall(pattern,html)
print(items)
得到了网页的源代码,但是用正则化解析的时候为什么返回的是空?刚开始学爬虫,请多多指教。