#爬取京东上鞋子的图片,代码如下:
import urllib.request
import re
def craw(url,page):
html1=urllib.request.urlopen(url).read().decode('utf-8','ignore')
html1=str(html1)
pat1='<div id="J_goodsList"(.*?)<ul class="clearfix" data-x="ab">'
result1=re.compile(pat1).findall(html1)
result1=result1[0]
pat2='<img class="err-product" data-img="1" data-img="1" src="//(.*?).jpg"'
imagelist=re.compile(pat2).findall(result1)
x=1
for imageurl in imagelist:
imagename='E:/PyCharm/python_pycharm/爬取数据/img/'+str(page)+str(x)+'.jpg'
imageurl='http://'+imageurl
try:
urllib.request.urlretrieve(imageurl,filename=imagename)
except urllib.error.URLError as e:
if hasattr(e,'code'):
x+=1
if hasattr(e,'reason'):
x+=1
x+=1
for i in range(1,32):
url='http://coll.jd.com/list.html?sub=51044&page='+str(i)
craw(url,i)
运行后,报错:
========== RESTART: E:\PyCharm\python_pycharm\爬取数据\IDLE代码\shoes.py ==========
Traceback (most recent call last):
File "E:\PyCharm\python_pycharm\爬取数据\IDLE代码\shoes.py", line 25, in <module>
craw(url,i)
File "E:\PyCharm\python_pycharm\爬取数据\IDLE代码\shoes.py", line 8, in craw
result1=result1[0]
IndexError: list index out of range
>>>
求大神解答,谢谢!