import requests
import xml.etree.ElementTree as ET
from xml.parsers.expat import ParserCreate
class DefaultSaxHandler(object):
def __init__(self,provinces):
self.provinces =provinces
def start_element(self,name,attrs):
pass
def end_element(self,name):
pass
def char_data(self,text):
self.provinces.append((text))
def get_provinces_entry(url):
content = requests.get(url).content.decode('gb2312')
start = content.find('<div class="area">')
end = content.find('<div id="head-login" class="right login">')
content = content[start:end].strip()
provinces = []
handler = DefaultSaxHandler(provinces)
parser = ParserCreate()
parser.StartElementHandler = handler.start_element
parser.EndElementHandler = handler.end_element
parser.CharacterDataHandler = handler.char_data
parser.parse(content)
return provinces
provinces = get_provinces_entry('https://www.sohu.com/a/134099257_654351')
print(provinces)
新手,刚写爬虫,但是一直显示错误,不知道哪里错了,怎么改正