需要采取数据
而且为了后面的做题,采集的数据必须包含这些城市和这些职位
import requests
from bs4 import BeautifulSoup
import csv
import os
# 模拟请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
# Boss直聘搜索URL (示例)
url = 'https://www.zhipin.com/c101010100/?query=python'
proxies = {
"http": None,
"https": None,
}
response = requests.get(url, headers=headers, proxies=proxies)
soup = BeautifulSoup(response.text, 'html.parser')
# 获取职位信息
jobs = soup.find_all('div', class_='job-primary')
job_list = []
for job in jobs:
title = job.find('div', class_='job-title').text
company = job.find('div', class_='company-text').text.strip()
salary = job.find('span', class_='red').text
location = job.find('span', class_='job-area').text
job_list.append([title, company, salary, location])
# 保存到本地CSV文件
file_path = 'jobs.csv'
with open(file_path, 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['Title', 'Company', 'Salary', 'Location'])
writer.writerows(job_list)
print(f"Data saved to {file_path}")
这段代码始终报错requests.exceptions.ProxyError: HTTPSConnectionPool(host='www.zhipin.com', port=443): Max retries exceeded with url: /web/common/security-check.html?seed=fXtv3Wx5EcKOfCsStJTXhP%2BsoJGhSDCga0c5cJsZ3XM%3D&name=c71cff0c&ts=1718454925401&callbackUrl=%2Fc101010100%2F%3Fquery%3Dpython&srcReferer= (Caused by ProxyError('Unable to connect to proxy', OSError(0, 'Error')))
无论我如何修改