import requests
import xml.etree.ElementTree as ET
from xml.parsers.expat import ParserCreate
class DefaultSaxHandler(object):
def __init__(self, provinces):
self.provinces = provinces
def start_element(self, name, attrs):
pass
def end_element(self, name):
pass
def char_data(self, text):
self.provinces.append((text))
def get_province_entry(url):
content = requests.get(url).content.decode('gb2312')
start = content.find('<table height="22" cellSpacing="0" cellPadding="0" width="710" border="0">')
end = content.find('<tr align="middle">')
content = content[start:end ].strip()
print(content)
provinces = []
handler = DefaultSaxHandler(provinces)
parser = ParserCreate()
parser.StartElementHandler = handler.start_element
parser.EndElementHandler = handler.end_element
parser.CharacterDataHandler = handler.char_data
parser.Parse(content)
return provinces
provinces = get_province_entry('http://www.ip138.com/post/')
print(provinces)
我想问下为什么打印出空值,我觉得是这部分错了,但是说不出哪里错
def char_data(self, text):
self.provinces.append((text))