疑问:我想要爬取智联招聘上面一个二级页面的招聘信息,用xpath匹配该html的时候,能够出现该信息,但是代码中却是出现空列表
匹配的xpath: '//button[@class="company__industry"]/text()'
import requests
from lxml import etree
import time
import csv
import random
job_list = []
def get_page(url):
begin = time.time()
ua_list = [
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
]
headers = {'User-Agent':random.choice(ua_list)}
html = requests.get(url=url,headers=headers).content.decode('utf-8')
time.sleep(random.randint(1,2))
parse_html = etree.HTML(html)
# xpath来匹配
industry = parse_html.xpath('//button[@class="company__industry"]/text()')
job_list.append(industry)
print(job_list)
finish = time.time()
print('执行时间为:%2f' % (finish-begin))
# 该页面的url为 ”https://jobs.zhaopin.com/CC196911813J00181501315.html“
get_page('https://jobs.zhaopin.com/CC196911813J00181501315.html')