from lxml import etree
import requests
import csv
import time
def spider():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0'
}
pre_url = 'https://hefei.qfang.com/rent/f'
for x in range(1, 13):
html = requests.get(pre_url + str(x), headers=headers)
time.sleep(2) # 在每一次GET后,等待2秒
selector = etree.HTML(html.text)
# 先获取房源列表
house_list = selector.xpath("//*[@id='cycleListings']/ul/li")
for house in house_list:
xiaoqu = house.xpath("div[2]/div[3]/div/a/text()")[0]
huxing = house.xpath("div[2]/div[2]/p[1]/text()")[0]
area = house.xpath("div[2]/div[2]/p[2]/text()")[0]
month_price = house.xpath("div[3]/p/span[1]/text()")[0]
item = [xiaoqu, huxing, area, month_price]
data_writer(item)
print('正在抓取', xiaoqu)
def data_writer(item):
with open('qfang_chuzufang.csv','a+',encoding='utf-8',newline='')as csvfile:
writer = csv.writer(csvfile)
writer.writerow(item)
if __name__ == '__main__':
spider()
python爬取房源,可以运行,但是每次只爬出一条是什么原因?
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
3条回答 默认 最新
关注 已修改,爬到不止一页,就是你信息提取包括了一些其他符号,自己再处理一下就好了
from lxml import etree import requests import csv import time def spider(): print("stater") headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0' } pre_url = 'https://hefei.qfang.com/rent/f' for x in range(1, 13): html = requests.get(pre_url + str(x), headers=headers) time.sleep(2) # 在每一次GET后,等待2秒 selector = etree.HTML(html.text) # 先获取房源列表 house_list = selector.xpath("//*[@id='cycleListings']/ul/li") for house in house_list: xiaoqu = house.xpath("div[2]/div[3]/div/a/text()")[0] huxing = house.xpath("div[2]/div[2]/p[1]/text()")[0] area = house.xpath("div[2]/div[2]/p[2]/text()")[0] month_price = house.xpath("div[3]/p/span[1]/text()")[0] item = [xiaoqu, huxing, area, month_price] print(item) # data_writer(item) print('正在抓取', xiaoqu) # def data_writer(item): # with open('qfang_chuzufang.csv', 'a+', encoding='utf-8', newline='') as csvfile: # writer = csv.writer(csvfile) # writer.writerow(item) if __name__ == '__main__': spider()
本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 1无用
悬赏问题
- ¥15 微信公众平台自制会员卡可以通过收款码收款码收款进行自动积分吗
- ¥15 随身WiFi网络灯亮但是没有网络,如何解决?
- ¥15 gdf格式的脑电数据如何处理matlab
- ¥20 重新写的代码替换了之后运行hbuliderx就这样了
- ¥100 监控抖音用户作品更新可以微信公众号提醒
- ¥15 UE5 如何可以不渲染HDRIBackdrop背景
- ¥70 2048小游戏毕设项目
- ¥20 mysql架构,按照姓名分表
- ¥15 MATLAB实现区间[a,b]上的Gauss-Legendre积分
- ¥15 delphi webbrowser组件网页下拉菜单自动选择问题