import requests
from lxml import etree
def main():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3883.400 QQBrowser/10.8.4559.400'}
url = "https://pic.sogou.com/pics?query=%E4%BA%94%E6%98%9F%E7%BA%A2%E6%97%97%E5%9B%BE%E7%89%87&rawQuery=%E4%BA%94%E6%98%9F%E7%BA%A2%E6%97%97%E5%9B%BE%E7%89%87&st=255&mood=5&dm=0&mode=1"
resp = requests.get(url, headers=headers)
resp.encoding = "utf-8"
child_tree = etree.HTML(resp.text)
imgs = child_tree.xpath('//div[@class="figure-result"]//a/img')
for img in imgs:
src = img.xpath('./@src')
print(src)
if __name__ == '__main__':
main()
xpath爬取图片,得不到src ,python求解决
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
4条回答 默认 最新
- CSDN专家-showbo 2022-01-27 13:55关注
图片是js解析出来的,xpath无效,数据在js变量里面,正则提取下数据用json.loads加载获取
代码如下import requests import re import json def main(): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3883.400 QQBrowser/10.8.4559.400'} url = "https://pic.sogou.com/pics?query=%E4%BA%94%E6%98%9F%E7%BA%A2%E6%97%97%E5%9B%BE%E7%89%87&rawQuery=%E4%BA%94%E6%98%9F%E7%BA%A2%E6%97%97%E5%9B%BE%E7%89%87&st=255&mood=5&dm=0&mode=1" resp = requests.get(url, headers=headers) resp.encoding = "utf-8" jsonstr=re.findall(r'window\.__INITIAL_STATE__=(.+?);\(function\(\){var s;',resp.text,re.S)[0] data=json.loads(jsonstr) for item in data['searchList']['searchList']: print(item['title']) print(item['picUrl']) ''' child_tree = etree.HTML(resp.text) #imgs = child_tree.xpath('//div[@class="figure-result"]//a/img') #for img in imgs: # src = img.xpath('./@src') print(src) ''' if __name__ == '__main__': main()
本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 2无用 1
悬赏问题
- ¥15 SQLServer怎么录入下标
- ¥100 无网格伽辽金方法研究裂纹扩展的程序
- ¥15 错误于library(org.Hs.eg.db): 不存在叫‘org.Hs.eg.db’这个名称的程序包,如何解决?
- ¥60 求一个图片处理程序,要求将图像大小跟现实生活中的大小按比例联系起来的
- ¥50 求一位精通京东相关开发的专家
- ¥100 求懂行的大ge给小di解答下!
- ¥15 pcl运行在qt msvc2019环境运行效率低于visual studio 2019
- ¥15 MAUI,Zxing扫码,华为手机没反应。可提高悬赏
- ¥15 python运行报错 ModuleNotFoundError: No module named 'torch'
- ¥100 华为手机私有App后台保活