from lxml import etree
import requests
import csv
import time
def spider():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0'
}
pre_url = 'https://hefei.qfang.com/rent/f'
for x in range(1, 13):
html = requests.get(pre_url + str(x), headers=headers)
time.sleep(2) # 在每一次GET后,等待2秒
selector = etree.HTML(html.text)
# 先获取房源列表
house_list = selector.xpath("//*[@id='cycleListings']/ul/li")
for house in house_list:
xiaoqu = house.xpath("div[2]/div[3]/div/a/text()")[0]
huxing = house.xpath("div[2]/div[2]/p[1]/text()")[0]
area = house.xpath("div[2]/div[2]/p[2]/text()")[0]
month_price = house.xpath("div[3]/p/span[1]/text()")[0]
item = [xiaoqu, huxing, area, month_price]
data_writer(item)
print('正在抓取', xiaoqu)
def data_writer(item):
with open('qfang_chuzufang.csv','a+',encoding='utf-8',newline='')as csvfile:
writer = csv.writer(csvfile)
writer.writerow(item)
if __name__ == '__main__':
spider()
python爬取房源,可以运行,但是每次只爬出一条是什么原因?
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
3条回答 默认 最新
关注 已修改,爬到不止一页,就是你信息提取包括了一些其他符号,自己再处理一下就好了
from lxml import etree import requests import csv import time def spider(): print("stater") headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0' } pre_url = 'https://hefei.qfang.com/rent/f' for x in range(1, 13): html = requests.get(pre_url + str(x), headers=headers) time.sleep(2) # 在每一次GET后,等待2秒 selector = etree.HTML(html.text) # 先获取房源列表 house_list = selector.xpath("//*[@id='cycleListings']/ul/li") for house in house_list: xiaoqu = house.xpath("div[2]/div[3]/div/a/text()")[0] huxing = house.xpath("div[2]/div[2]/p[1]/text()")[0] area = house.xpath("div[2]/div[2]/p[2]/text()")[0] month_price = house.xpath("div[3]/p/span[1]/text()")[0] item = [xiaoqu, huxing, area, month_price] print(item) # data_writer(item) print('正在抓取', xiaoqu) # def data_writer(item): # with open('qfang_chuzufang.csv', 'a+', encoding='utf-8', newline='') as csvfile: # writer = csv.writer(csvfile) # writer.writerow(item) if __name__ == '__main__': spider()
本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 1无用
悬赏问题
- ¥20 西门子S7-Graph,S7-300,梯形图
- ¥50 用易语言http 访问不了网页
- ¥50 safari浏览器fetch提交数据后数据丢失问题
- ¥15 matlab不知道怎么改,求解答!!
- ¥15 永磁直线电机的电流环pi调不出来
- ¥15 用stata实现聚类的代码
- ¥15 请问paddlehub能支持移动端开发吗?在Android studio上该如何部署?
- ¥20 docker里部署springboot项目,访问不到扬声器
- ¥15 netty整合springboot之后自动重连失效
- ¥15 悬赏!微信开发者工具报错,求帮改