源码:
# -*- coding:utf-8 -*-
from urllib import request
from bs4 import BeautifulSoup
import bs4
def get_data():
url = 'https://nj.lianjia.com/ershoufang/gulou/'
headers = {
'User_agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
req = request.Request(url, headers=headers)
response = request.urlopen(req)
if response.getcode() == 200:
data = response.read()
data = str(data, encoding='utf-8')
print(data)
with open('index.html', mode='w', encoding='utf-8') as f:
f.write(data)
def parse_data():
with open('index.html', mode='r', encoding='utf-8') as f:
html = f.read()
bs = BeautifulSoup(html, 'html.parser') # 析HTML文件的时候就是用parser的
divs = bs.find_all(class_='clear LOGCLICKDATA')[0].get_text()
for div in divs[0:]:
div1 = div.select('.address')[0]
print(div1)
if __name__ == '__main__':
# get_data()
parse_data()
报错显示是
div1 = div.select('.address')[0]
这一行出了问题
还请各位帮忙看看是怎么了