代码如下,问题在最后:
import urllib.request
import socket
import re
import sys
import os
targetDir = r"D:\test" #文件保存路径
def destFile(path):
if not os.path.isdir(targetDir):
os.mkdir(targetDir)
pos = path.rindex('/')
t = os.path.join(targetDir, path[pos+1:])
return t
if name == "__main__": #程序执行入口
weburl = "http://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=%E8%8C%83%E5%86%B0%E5%86%B0"
webheaders = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
req = urllib.request.Request(url=weburl, headers=webheaders) #构造请求报头
webpage = urllib.request.urlopen(req) #发送请求报头
contentBytes = webpage.read()
x=0
for link, t in set(re.findall(r'(http:[^\s]*?(jpg|png|gif))', str(contentBytes))): #正則表達式查找全部的图片
print(link)
try:
urllib.request.urlretrieve(link, 'D:\\test\\%s.jpg' % x)
x=x+1
except:
print('失败') #异常抛出
问题是:for link, t in set(re.findall(r'(http:[^\s]*?(jpg|png|gif))', str(contentBytes))):
这句为什么要用set处理?for link ,t是啥意思?