爬虫爬的结果不一样,重复爬取一页的结果好像,想做到下方教程一样,b站的图灵教程,
import requests
import time
from lxml import etree
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,ru;q=0.7',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Origin': 'https://changsha.taoche.com',
'Pragma': 'no-cache',
'Referer': 'https://changsha.taoche.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Google Chrome";v="108"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
def get_city():
params = {
'group': '26',
}
response = requests.get(
'https://proconsumer.taoche.com/c-city-consumer/city/get-filter-city-by-group',
params=params,
headers=headers,
).json()
#print(response)
datas=response
datas=response['data']
results=[]
for data in datas:
cityList=data['cityList']
for city in cityList:
result={
'cityName':city['cityName'],
'citySpell':city['citySpell']
}
results.append(result)
return results
def main():
cities=get_city()
print(cities)
for city in cities:
results=[]
for page in range(1,3):
print(city,page)
params={
'page':str(page),
}
response=requests.get(f'https://{city["citySpell"]}.taoche.com/all/',params=params,headers=headers,)
#response = requests.get('https://changsha.taoche.com/all/', params=params, headers=headers)
content=response.text
html=etree.HTML(content)
divs=html.xpath('//*[@id="container_base"]/ul/li/div[@class="gongge_main"]')
for div in divs:
title=div.xpath('//*[@id="container_base"]/ul/li/div[@class="gongge_main"]/a/span/text()')
#titles = ''.join(title)
#buy_car_time=divs.xpath("/p/i[1]/text()")
#car_km=divs.xpath("/p/i[2]/text()")
buy_car_time = div.xpath('//*[@id="container_base"]/ul/li/div[@class="gongge_main"]/p/i[1]/text()')
car_km = div.xpath('//*[@id="container_base"]/ul/li/div[@class="gongge_main"]/p/i[2]/text()')
price=div.xpath('./div/i[@class="Total brand_col"]/text()')+div.xpath('./div/i[@class="Total brand_col"]/em/text()')
prices= ''.join(price)
result={
'名称':title ,
'时间':buy_car_time,
'公里':car_km,
'价格': prices,
'城市':city["cityName"]
}
print(result)
results.append(result)
time.sleep(3)
if __name__ == '__main__':
main()