python爬虫执行报错!! 5C

写了个爬虫,从百度想从百度爬照片,爬10页时没问题,爬100页就报错了!求大神解惑!!
下面是完整代码:
#!/usr/bin/env python
#coding:utf8
import requests
import os
import sys
reload(sys)
sys.setdefaultencoding('utf8')

def getManyPages(keyword,pages):
params=[]
for i in range(30,30*pages+30,30):
params.append({
'tn':'resultjson_com',
'ipn':'rj',
'ct':201326592,
'is':'',
'fp':'result',
'queryWord':keyword,
'cl':2,
'lm':-1,
'ie':'utf-8',
'oe':'utf-8',
'adpicid':'',
'st':-1,
'z':'',
'ic': 0,
'word': keyword,
's': '',
'se': '',
'tab': '',
'width': '',

'height': '',

'face': 0,
'istype': 2,
'qc': '',
'nc': 1,
'fr': '',
'pn': i,
'rn': 30,
'gsm': '1e',
'1507548959747':''

})
url = 'https://image.baidu.com/search/acjson'
print url
urls = []
for i in params:
#print i
urls.append(requests.get(url,params=i).json().get('data'))
return urls
print urls
def getImg(dataList,localPath):
if not os.path.exists(localPath):
os.mkdir(localPath)

x = 0
for list in dataList:
    for i in list:
        if i.get('thumbURL') != None:
            print ('正在下载:%s' % i.get('thumbURL'))
            ir = requests.get(i.get('thumbURL'))
            open(localPath + '%d.jpg' % x, 'wb').write(ir.content)
            x += 1
        else:
            print ('图片链接不存在')

if name == "__main__":
dataList = getManyPages('美女',1000)
getImg(dataList,'/home/ubuntu/photo/')

执行报错:
ValueError: Invalid \escape: line 29 column 218 (char 27827)

1个回答

#!/usr/bin/env python
#coding:utf8
import requests
import os
import sys
reload(sys)
sys.setdefaultencoding('utf8')

def getManyPages(keyword,pages):
params=[]
for i in range(30,30*pages+30,30):
params.append({
'tn':'resultjson_com',
'ipn':'rj',
'ct':201326592,
'is':'',
'fp':'result',
'queryWord':keyword,
'cl':2,
'lm':-1,
'ie':'utf-8',
'oe':'utf-8',
'adpicid':'',
'st':-1,
'z':'',
'ic': 0,
'word': keyword,
's': '',
'se': '',
'tab': '',
'width': '',

'height': '',

'face': 0,
'istype': 2,
'qc': '',
'nc': 1,
'fr': '',
'pn': i,
'rn': 30,
'gsm': '1e',
'1507548959747':''

})
url = 'https://image.baidu.com/search/acjson'
print url
urls = []
for i in params:
#print i
urls.append(requests.get(url,params=i).json().get('data'))
return urls
print urls
def getImg(dataList,localPath):
if not os.path.exists(localPath):
os.mkdir(localPath)

x = 0
for list in dataList:
    for i in list:
        if i.get('thumbURL') != None:
            print ('正在下载:%s' % i.get('thumbURL'))
            ir = requests.get(i.get('thumbURL'))
            open(localPath + '%d.jpg' % x, 'wb').write(ir.content)
            x += 1
        else:
            print ('图片链接不存在')

if name == "__main__":
dataList = getManyPages('美女',1000)
getImg(dataList,'/home/ubuntu/photo/')

Csdn user default icon
上传中...
上传图片
插入图片
抄袭、复制答案,以达到刷声望分或其他目的的行为,在CSDN问答是严格禁止的,一经发现立刻封号。是时候展现真正的技术了!
立即提问