需求:1:数据保存csv中 格式正确
2# 福利信息每项用 # 分隔)保存到D://新闻列表.csv文件中。
# 岗位信息,地区,工作,经验,学历,福利。
import requests
def gethtml(url):
header = {
'User - Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'
}
res = requests.get(url, headers=header)
return res.text
# print(res.text)
from lxml import etree
def getParse(url):
tree = gethtml(url)
html = etree.HTML(tree)
d_i = html.xpath('//*[@id="area"]/div[4]//div')
for i in d_i:
# 福利信息每项用 # 分隔)保存到D://新闻列表.csv文件中。
# print(i)
information = i.xpath('./div[1]/a/img/@alt')
gw=[]
for j in information:
# print(j)
gz = i.xpath("./div[2]/div[1]/a/text()")
diqu = i.xpath("normalize-space(./div[2]/div[1]/span/text())")
qian = i.xpath("./div[2]/p/span[1]/text()")
jinyan = i.xpath("./div[2]/p/span[2]/text()")
xl = i.xpath("./div[2]/p/span[2]/span/text()")
fuli = i.xpath("./div[2]/ul//li/text()")
# fuli = i.xpath("./div[2]/ul//li/text()").replace(',','#') TODO 实现不了
print(j, diqu, gz, qian, jinyan, fuli)
dit = {
'岗位信息': j,
'地区': diqu,
'工作': gz,
'经验,学历': jinyan,
'福利': fuli
}
import csv
f = open("./新闻列表.csv", 'a', encoding='utf-8', newline='')
csv_writer = csv.DictWriter(f, fieldnames=['岗位信息', '地区', '工作', '经验,学历', '福利'])
csv_writer.writeheader()
csv_writer.writerow(dit)
getParse("http://www.pjob.net/china.htm")