import json
import requests
from lxml import etree
# 确定url
url = "https://search.51job.com/list/000000,000000,0000,00,9,99,python,2,1.html?"
# 用户代理,cookie请求
headers_ = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
"Cookie": "guid=a241d07465f8008b2848758fbf9b5374; adv=ad_logid_url%3Dhttps%253A%252F%252Ftrace.51job.com%252Ftrace.php%253Fpartner%253Dsem_pcbaidu5_153412%2526ajp%253DaHR0cHM6Ly9ta3QuNTFqb2IuY29tL3RnL3NlbS9MUF8yMDIwXzEuaHRtbD9mcm9tPWJhaWR1YWQ%253D%2526k%253Dd946ba049bfb67b64f408966cbda3ee9%2526bd_vid%253D10363334084442851705%26%7C%26; partner=www_baidu_com; privacy=1638699948; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; search=jobarea%7E%60000000%7C%21ord_field%7E%600%7C%21recentSearch0%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FApython%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21; ssxmod_itna=WqUhDK4IxRx+hxl4iwdSD9ADBGt75DOtCjQxBuOY4iNDnD8x7YDv+Gv9t+Kq5tSCSv+dqWGgnKQTbOeohiht17SpYDU4i8DCTrAdsxee=D5xGoDPxDeDADYE6DAqiOD7w=DEDm+8DaxDoDYb=RDitD4qDBCodDKqGgWTwqWd0j3CUKwThDdr=DjMrD/8DrEQ2rdZDZthQDzuaDtLUgWqUKx0Pz73mo+7vi9GvW77++57G=W0yot7Git84ijYeQC98AF0f1jBDxD=; _ujz=MTU0NDQ2MTU1MA%3D%3D; slife=lowbrowser%3Dnot%26%7C%26lastlogindate%3D20211205%26%7C%26; ps=needv%3D0; 51job=cuid%3D154446155%26%7C%26cusername%3DFK1zjcttpZ0l1NOtYGOGUBYjhBM3MvUoyohCJxQN5ao%253D%26%7C%26cpassword%3D%26%7C%26cname%3DsS8XAQ03ZcxGDBPlr5yJig%253D%253D%26%7C%26cemail%3DjCM6qDMmoXxlxqn9MaMYw3ugw3hFJMgWlnc46URfQyo%253D%26%7C%26cemailstatus%3D0%26%7C%26cnickname%3D%26%7C%26ccry%3D.0%252FpAZtbnU3bo%26%7C%26cconfirmkey%3D31XXih4sh4iJQ%26%7C%26cautologin%3D1%26%7C%26cenglish%3D0%26%7C%26sex%3D0%26%7C%26cnamekey%3D31PipFyU6.dSc%26%7C%26to%3Df4d5298ec4fcfa6086dbba0847f28a6061acada7%26%7C%26",
"Referer": "https://www.51job.com/"
}
# 发送请求
response = requests.get(url, headers=headers_)
# 字符串类型
str_data = response.text
# 数据提取
html_ = etree.HTML(str_data)
# 职位
name_ = html_.xpath("/html/body/div[2]/div[3]/div/div[2]/div[4]/div[1]/div[1]/a/p[1]/span[1]")
# 工资
print(name_)
salary = html_.xpath("/html/body/div[2]/div[3]/div/div[2]/div[4]/div[1]/div[1]/a/p[2]/span[1]")
# print(salary)
dict_ = {}
for i in range(len(name_)):
dict_[name_[i]] = salary[i]
print(dict_)
with open("前程无忧.json", 'w',encoding="gbk") as f:
json.dump(dict_,f,ensure_ascii=False)
前程无忧网址数据练习抓取,为什么会返回空值(语言-python)
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
1条回答 默认 最新
- CSDN专家-天际的海浪 2021-12-05 21:41关注
你输出下str_data 看看源代码中有你需要爬取的内容吗
你检查下这个网页中的内容是不是通过js代码读取外部json数据来动态更新的。
requests只能获取网页的静态源代码,动态更新的内容取不到。
对于动态更新的内容要用selenium 来爬取。或者是通过F12控制台分析页面数据加载的链接,找到真正json数据的地址进行爬取。
在页面上点击右键,右键菜单中选 "查看网页源代码"。
这样看到的才是网页的静态源代码。
如果这个网页的静态源代码中有你需要爬取的内容,就说明该页面没有动态内容,可以用requests爬取。
否则就说明该页面的内容是动态更新的,要用selenium 来爬取.
你题目的解答代码如下:
from selenium import webdriver import time import json from lxml import etree driver = webdriver.Chrome() # 确定url url = "https://search.51job.com/list/000000,000000,0000,00,9,99,python,2,1.html?" # 用户代理,cookie请求 driver.get(url) time.sleep(5) str_data = driver.page_source # 数据提取 html_ = etree.HTML(str_data) # 职位 name_ = html_.xpath("/html/body/div[2]/div[3]/div/div[2]/div[4]/div[1]/div/a/p[1]/span[1]/text()") # 工资 print(name_) salary = html_.xpath("/html/body/div[2]/div[3]/div/div[2]/div[4]/div[1]/div/a/p[2]/span[1]/text()") print(salary) dict_ = {} for i in range(len(name_)): dict_[name_[i]] = salary[i] print(dict_) with open("前程无忧.json", 'w',encoding="gbk") as f: json.dump(dict_,f,ensure_ascii=False)
如有帮助,望采纳!谢谢!
本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 1无用
悬赏问题
- ¥15 输入的char字符转为int类型,不是对应的ascall码,如何才能使之转换为对应ascall码?或者使输入的char字符可以正常与其他字符比较?
- ¥15 解决websocket跟c#客户端通信
- ¥30 Python调用dll文件输出Nan重置dll状态
- ¥15 浮动div的高度控制问题。
- ¥66 换电脑后应用程序报错
- ¥50 array数据同步问题
- ¥15 pic16F877a单片机的外部触发中断程序仿真失效
- ¥15 Matlab插值拟合差分微分规划图论
- ¥15 keil5 target not created
- ¥15 C/C++数据与算法请教