问题相关代码
import requests
import re
from lxml import etree
from parsel import Selector
# 获取svg css文件
url = 'http://www.porters.vip/confusion/food.html'
svg_url = 'http://www.porters.vip/confusion/font/food.svg'
css_url = 'http://www.porters.vip/confusion/css/food.css'
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
}
svg_text = requests.get(svg_url).text
css_text = requests.get(css_url).text
# print(svg_text)
# print(css_text)
# 清除转行和空格,方便匹配
css_text = css_text.replace('\n','').replace(' ','')
# css坐标提取
css_class_name = 'vhkbvu'
pattern = re.compile('.%s{background:-(\d+)px-(\d+)px;}'%css_class_name)
coord = pattern.findall(css_text)
print(coord)
if coord:
x_ = coord[0][0]
y_ = coord[0][1]
x = int(x_)
y = int(y_)
print(x,y)
find_xy = ((x),(y))
# svg解析
text = re.findall('y="(.*?)">(.*?)</text>',svg_text,re.S)
list_y = []
list_text = []
for i in range(len(text)):
list_y.append(text[i][0])
list_text.append(text[i][1])
print(list_y)
print(list_text)
# 寻找svg文件中定义的字符大小
find_font_size = re.compile(r'font-size:(\d+)px')
size = find_font_size.findall(svg_text)
print(size)
soup = Selector(svg_text).xpath('//text')
print(soup)
list_y = [i.get('y_') for i in soup]
list_text = [i.text for i in soup]
svg_font_size = find_font_size.findall(svg_text)
find_xy = ((x),(y))
real_y = [i for i in text if y <= i][-1]
real_text = list_text[list_y.index(real_y)]
print(real_text)
Traceback (most recent call last):
list_y = [i.get('y_') for i in soup]
TypeError: get() takes 1 positional argument but 2 were given
我想要达到的结果
需求:
通过映射的方式来获取其中的服务评分以及电话号码,并能够进行完整的结果打印即可