from typing import Text
import requests
from lxml import etree
import time
headers= {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
}
url = 'https://www.baidu.com/s?ie=UTF-8&wd=%E7%99%BE%E5%BA%A6%E7%83%AD%E6%90%9C'
page = requests.get(url,headers=headers)
if len(page)>100:
print("解析成功")
html = page.content
tree = etree.HTML(html).xpath('//*[@id="1"]/div/div[2]/div/div[2]/div[1]/div/div/div[1]/div/div[2]/div[@class="hot-item_1473U"]')
for i in tree:
r = i.xpath("./a/text()")
t = i.xpath("./a/@href")
print(str(r)+'\n'+str(t) + '\n' +"-------------------------------------------------------------")
with open('./text.text','w',encoding='utf-8') as f:
f.write(str(r))
f.write('\n')
f.write(str(t))
这里会要求以TypeError: write() argument must be str, not list 、str写入
结果写出空白或者只有一列数据