这个运行出来什么文件都没有,直接就结束了,能看下是哪里出现了问题吗?
# 需求:下载简历模板(分页)https://sc.chinaz.com/jianli/
import requests
from lxml import etree
import os
# 爬取首页源码
if not os.path.exists('./all_resume_jar'):
os.mkdir('./all_resume_jar')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) Gecko/20100101 Firefox/96.0'
}
count = 1
for count in range(1, 3):
url = 'https://aspx.sc.chinaz.com/query.aspx?keyword=%E5%85%8D%E8%B4%B9&issale=&classID=864&page={}'.format(count)
page_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(page_text)
div_list = tree.xpath('//div[@class="main_list jl_main masonry"]/div')
for div in div_list:
detail_url = 'https:'+div.xpath('./a/@href')[0]
title = div.xpath('./p/a/text()')[0]+'.rar'
title = title.encode('iso-8859-1').decode('gbk') # 解决中文乱码问题
detail_page_text = requests.get(url=detail_url, headers=headers).text
detail_tree = etree.HTML(detail_page_text)
rar_download_href = detail_tree.xpath('//ul[@class="clearfix"]/li[9]/a/@href')[0]
rar_download_data = requests.get(url=rar_download_href, headers=headers).content
rar_path = './all_resume_jar/'+title
f1 = open(rar_path, 'wb')
f1.write(rar_download_data)
print(title, '下载完成第'+str(count)+'页')
f1.close()