import urllib.request
import re
def getHtml(url):
response = urllib.request.urlopen(url)
html = response.read()
return html
# (2)下面代码获取帖子内所有图片地址
def getImg(html):
reg = r'src="([.*\s]*\.jpg)" pic_ext="jpeg"'
imgre = re.compile(reg)
imglist = re.findall(imgre.html)
return imglist
# (3)使用getHtml()输入任意帖子的URL地址
html = getHtml("http://tieba.baidu.com/p/3205263090")
# (4)修改html对象内的字符编码为UTF-8
html = html.decode('UTF-8')
# (5)使用下面代码循环保存图片
imgList = getImg(html)
print(imgList)
报错:D:\python\项目储存\untitled\venv\Scripts\python.exe D:/python/项目储存/untitled/dome.py
Traceback (most recent call last):
File "D:/python/项目储存/untitled/dome.py", line 21, in
imgList = getImg(html)
File "D:/python/项目储存/untitled/dome.py", line 12, in getImg
imglist = re.findall(imgre.html)
AttributeError: 're.Pattern' object has no attribute 'html'
Process finished with exit code 1