hyggest
hyggest
2020-01-11 23:17

请问为什么当我试图储存爬到的多个页面的数据时,只能保存最后一页的数据

  • python

import pandas as pd
import re
import requests
from requests import RequestException
from bs4 import BeautifulSoup

def getHTMLText(url):
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
for i in range(2,5):
url = "https://bj.lianjia.com/xiaoqu/pg" + str(i) +"/?from=rec"
print(url)
html = getHTMLText(url)
pattern = re.compile('

.*?(.*?).*?
(.*?)', re.S)
items = re.findall(pattern, html)
print(items)
name = []
price = []
info = []
for item in items:
print(item)
name.append(item[0])
name
price.append(item[1])
info = list(zip(name,price))
headers = ['小区', '价格']
filen_name = 'C:\Users\86157\Desktop\1.csv'
data3 = pd.DataFrame(columns = headers,data = info)
data3.to_csv(file_name, encoding='utf_8_sig')
pd.read_csv(file_name)
这是我写的代码

  • 点赞
  • 回答
  • 收藏
  • 复制链接分享

1条回答