Operation_sharing 2017-10-10 07:15 采纳率: 0%
浏览 1283
已结题

python爬虫执行报错!!

写了个爬虫,从百度想从百度爬照片,爬10页时没问题,爬100页就报错了!求大神解惑!!
下面是完整代码:
#!/usr/bin/env python
#coding:utf8
import requests
import os
import sys
reload(sys)
sys.setdefaultencoding('utf8')

def getManyPages(keyword,pages):
params=[]
for i in range(30,30*pages+30,30):
params.append({
'tn':'resultjson_com',
'ipn':'rj',
'ct':201326592,
'is':'',
'fp':'result',
'queryWord':keyword,
'cl':2,
'lm':-1,
'ie':'utf-8',
'oe':'utf-8',
'adpicid':'',
'st':-1,
'z':'',
'ic': 0,
'word': keyword,
's': '',
'se': '',
'tab': '',
'width': '',

'height': '',

'face': 0,
'istype': 2,
'qc': '',
'nc': 1,
'fr': '',
'pn': i,
'rn': 30,
'gsm': '1e',
'1507548959747':''

})
url = 'https://image.baidu.com/search/acjson'
print url
urls = []
for i in params:
#print i
urls.append(requests.get(url,params=i).json().get('data'))
return urls
print urls
def getImg(dataList,localPath):
if not os.path.exists(localPath):
os.mkdir(localPath)

x = 0
for list in dataList:
    for i in list:
        if i.get('thumbURL') != None:
            print ('正在下载:%s' % i.get('thumbURL'))
            ir = requests.get(i.get('thumbURL'))
            open(localPath + '%d.jpg' % x, 'wb').write(ir.content)
            x += 1
        else:
            print ('图片链接不存在')

if name == "__main__":
dataList = getManyPages('美女',1000)
getImg(dataList,'/home/ubuntu/photo/')

执行报错:
ValueError: Invalid \escape: line 29 column 218 (char 27827)

  • 写回答

1条回答 默认 最新

  • Operation_sharing 2017-10-10 07:31
    关注

    #!/usr/bin/env python
    #coding:utf8
    import requests
    import os
    import sys
    reload(sys)
    sys.setdefaultencoding('utf8')

    def getManyPages(keyword,pages):
    params=[]
    for i in range(30,30*pages+30,30):
    params.append({
    'tn':'resultjson_com',
    'ipn':'rj',
    'ct':201326592,
    'is':'',
    'fp':'result',
    'queryWord':keyword,
    'cl':2,
    'lm':-1,
    'ie':'utf-8',
    'oe':'utf-8',
    'adpicid':'',
    'st':-1,
    'z':'',
    'ic': 0,
    'word': keyword,
    's': '',
    'se': '',
    'tab': '',
    'width': '',

    'height': '',

    'face': 0,
    'istype': 2,
    'qc': '',
    'nc': 1,
    'fr': '',
    'pn': i,
    'rn': 30,
    'gsm': '1e',
    '1507548959747':''

    })
    url = 'https://image.baidu.com/search/acjson'
    print url
    urls = []
    for i in params:
    #print i
    urls.append(requests.get(url,params=i).json().get('data'))
    return urls
    print urls
    def getImg(dataList,localPath):
    if not os.path.exists(localPath):
    os.mkdir(localPath)

    x = 0
    for list in dataList:
        for i in list:
            if i.get('thumbURL') != None:
                print ('正在下载:%s' % i.get('thumbURL'))
                ir = requests.get(i.get('thumbURL'))
                open(localPath + '%d.jpg' % x, 'wb').write(ir.content)
                x += 1
            else:
                print ('图片链接不存在')
    

    if name == "__main__":
    dataList = getManyPages('美女',1000)
    getImg(dataList,'/home/ubuntu/photo/')

    评论

报告相同问题?

悬赏问题

  • ¥15 如何在scanpy上做差异基因和通路富集?
  • ¥20 关于#硬件工程#的问题,请各位专家解答!
  • ¥15 关于#matlab#的问题:期望的系统闭环传递函数为G(s)=wn^2/s^2+2¢wn+wn^2阻尼系数¢=0.707,使系统具有较小的超调量
  • ¥15 FLUENT如何实现在堆积颗粒的上表面加载高斯热源
  • ¥30 截图中的mathematics程序转换成matlab
  • ¥15 动力学代码报错,维度不匹配
  • ¥15 Power query添加列问题
  • ¥50 Kubernetes&Fission&Eleasticsearch
  • ¥15 報錯:Person is not mapped,如何解決?
  • ¥15 c++头文件不能识别CDialog