#爬取猎聘网信息
#!python 3.8
import requests,re
from lxml import etree
def gethtml(url):
header={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.69 Safari/537.36'}
r=requests.get(url,headers=header,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
print(r.text)
&d_headId=5f0adc1c7d251bcead6fe5877c4df1c2&curPage=2
def paserPage(result,html):
net=etree.HTML(html)
urls=net.xpath('//div[@class="job-info"]/h3/a/@href')
job=net.xpath('//div[@class="job-info"]/h3/@title')
year=net.xpath('//div[@class="job-info"]/p[@class="condition clearfix"]/p/span[3]/text()')
company=net.xpath('//div[@class="company-info nohover"]/p[@class="company-name"]/a/@title')
#print(company)
# print(job)
# print(company)
# print(year)
# print(urls)
for i in range(len(job)):
result.append([job[i],year[i],company[i],urls[i]])
return result
def main(n):
#
#print(gethtml(url))
key='有色'
results=[]
for i in range(n):
urls='https://www.liepin.com/zhaopin/?isAnalysis=&dqs=&pubTime=&salary=&subIndustry=&industryType=&compscale=&key='+'key'+'&curPage='+str(i)
html=gethtml(urls)
lit=paserPage(results,html)
print(lit)
main(10)