job_name= []
company_name =[]
workarea =[]
providesalary =[]
attribute =[]
def get_job_list(keyword=None,page=1):
key = urllib.parse.quote(urllib.parse.quote(keyword))
url='https://search.51job.com/list/000000,000000,0000,00,9,99,' +key+',2,'+str(page)+'.html'
headers = {
'Host': 'search.51job.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3877.400 QQBrowser/10.8.4506.400',
}
response = requests.get(url,headers=headers)
if response.status_code == 200:
html = response.text
with open('/home/aistudio/work/index.html', mode='w',encoding='gbk') as f:
f.write(html)
pattern ='window\._SEARCH_RESULT_ =({.+?}) </script>'
data =re.findall(patten,html)[0]
job_list = json.loads(data)
for job in job_list["engine_jds"]:
job_name.append(job["job_name"])
company_name.append(job["company_name"])
workarea.append(job["workarea_text"])
salary.append(job["providesalary_text"]
attribute.append(",".join(job["attribute_text"]))
爬虫代码出现问题,不知道改哪里了
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
1条回答 默认 最新
- chuifengde 2021-10-26 21:16关注
import requests import json import urllib import re job_name= [] company_name =[] workarea =[] providesalary =[] attribute =[] def get_job_list(keyword=None,page=1): key = urllib.parse.quote(urllib.parse.quote(keyword)) url='https://search.51job.com/list/000000,000000,0000,00,9,99,' +key+',2,'+str(page)+'.html' headers = { 'Host': 'search.51job.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3877.400 QQBrowser/10.8.4506.400', } response = requests.get(url,headers=headers) if response.status_code == 200: html = response.text with open('/index.html', mode='w',encoding='gbk') as f: f.write(html) pattern ='window\.__SEARCH_RESULT__ = ({.*?})</script>' data =re.findall(pattern,html, re.DOTALL) job_list = json.loads(data[0]) for job in job_list["engine_jds"]: job_name.append(job["job_name"]) company_name.append(job["company_name"]) workarea.append(job["workarea_text"]) # salary.append(job["providesalary_text"]) attribute.append(",".join(job["attribute_text"])) get_job_list("计算机") print(job_name)
解决 无用评论 打赏 举报
悬赏问题
- ¥15 java面向对象程序设计
- ¥20 Keil uVision5创建project没反应
- ¥15 mmseqs内存报错
- ¥15 vika文档如何与obsidian同步
- ¥15 华为手机相册里面的照片能够替换成自己想要的照片吗?
- ¥15 陆空双模式无人机飞控设置
- ¥15 sentaurus lithography
- ¥100 求抖音ck号 或者提ck教程
- ¥15 关于#linux#的问题:子进程1等待子进程A、B退出后退出(语言-c语言)
- ¥20 web页面如何打开Outlook 365的全球离线通讯簿功能