
怎么爬取!
"""抓取 Top 小吃"""
top_xiaochi_url = 'http://www.mafengwo.cn/cy/{}/tese.html'.format(city_code)
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Content-Type': 'application/x-www-form-urlencoded',
'Accept-Encoding': 'gzip, deflate, compress',
'Accept-Language': 'en-us;q=0.5,en;q=0.3',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Host': 'www.mafengwo.cn',
'Cookie': Bs%3A11%3A%22cn.bing.com%22%3Bs%3A6%3A%22f_host%22%3Bs%3A3%3A%22www%22%3B%7D; __mfwuuid=660662ca-c6e3-d405-6c90-46376d2426ca; Hm_lvt_8288b2ed37e5bc9b4c9f7008798d2de0=1711694541; bottom_ad_status=0; __jsluid_h=e43cb7c52f99bcbaafcd83180cdcc680; __jsl_clearance_s=1711697831.314|0|p0FZSEozJNa5UJtKbTc53cvqEcE%3D; __mfwb=1873ff55f9bb.35.direct; __mfwlt=1711698446; Hm_lpvt_8288b2ed37e5bc9b4c9f7008798d2de0=1711698446',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0',
}
response = requests.get(top_xiaochi_url, headers=headers)
response.encoding = 'utf8'
soup = BeautifulSoup(response.text, 'lxml')
list = soup.select('ol.list-rank')[0]
items = list.select('li')
top_xiaochi = []
for item in items:
img = item.img['src']
name = item.h3.text.strip()
top_xiaochi.append({'小吃': name, '图片': img})
return top_xiaochi