陵听 2019-06-25 10:54 采纳率: 0%
浏览 3086

Python爬取出现Internal Server Error问题

import re
import urllib.request
fh=open('C:\\Users\\Hear-H\\Desktop\\汽车企业数据\\新建文件夹\\298.txt','w',encoding='utf-8')
area='<li><span>公司地区</span>(.*?)</li>'
area1=area.encode('utf-8')
time='<span>成立时间</span>(.*?)</li>'
time1=time.encode('utf-8')
address='<span>地址</span>(.*?)</li>'
address1=address.encode('utf-8') 
client='<p id=\"maintypicClient\">(.*?)</p>'
product='<p id=\"product\">(.*?)</p>'
i=0
pat='<a target=\"_blank\" href=\"(http://i.gasgoo.com/supplier/.*?)\">'
headers=('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36')
opener=urllib.request.build_opener()
opener.addheaders=[headers]
while i<100:
    i+=1
    url="http://i.gasgoo.com/supplier/c-298/index-"+str(i)+".html"
    web=opener.open(url).read().decode('utf-8')
    rst=re.compile(pat).findall(web)
    rst1=list()
    for a in rst:
        if a not in rst1:
            rst1.append(a)
    rst1.pop(0)
    for b in rst1:
        pat1=b+'\">(.*?)</a>'
        name=re.compile(pat1).findall(web)
        name_d=''.join(name)
        url1=b
        website1=opener.open(url1).read().decode('utf-8').encode('utf-8')
        website2=opener.open(url1).read().decode('utf-8')
        result1=re.compile(area1).findall(website1)
        for c in result1:
            result1_d=c.decode('utf-8')
        result2=re.compile(time1).findall(website1)
        for d in result2:
            result2_d=d.decode('utf-8')
        result3=re.compile(address1).findall(website1)
        for e in result3:
            result3_d=e.decode('utf-8')
        result4=re.compile(client).findall(str(website2))
        result4_d=''.join(result4)
        result5=re.compile(product).findall(str(website2))
        result5_d=''.join(result5)
        print(name_d+'?'+result1_d+'?'+result2_d+'?'+result3_d+'?'+result4_d+'?'+result5_d+'\n')
        fh1=fh.write(name_d+'?'+result1_d+'?'+result2_d+'?'+result3_d+'?'+result4_d+'?'+result5_d+'\n')
fh.close

就是我在爬取汽车企业数据网站的时候出现了HTTPError: Internal Server Error的问题,但是我上网查的时候一般说Internal Server Error出现的时候都会有500之类的数字提示,这里也没有,所以请问各位大佬一下,出现这种情况是不是只能用代理了呢?或者还有其他的方法

Traceback (most recent call last):

  File "<ipython-input-1-7c05d0a2c578>", line 1, in <module>
    runfile('C:/Users/Hear-H/Desktop/汽车企业数据/汽车企业数据挖掘.py', wdir='C:/Users/Hear-H/Desktop/汽车企业数据')

  File "D:\Anaconda\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 786, in runfile
    execfile(filename, namespace)

  File "D:\Anaconda\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "C:/Users/Hear-H/Desktop/汽车企业数据/汽车企业数据挖掘.py", line 39, in <module>
    website1=opener.open(url1).read().decode('utf-8').encode('utf-8')

  File "D:\Anaconda\lib\urllib\request.py", line 531, in open
    response = meth(req, response)

  File "D:\Anaconda\lib\urllib\request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)

  File "D:\Anaconda\lib\urllib\request.py", line 569, in error
    return self._call_chain(*args)

  File "D:\Anaconda\lib\urllib\request.py", line 503, in _call_chain
    result = func(*args)

  File "D:\Anaconda\lib\urllib\request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)

HTTPError: Internal Server Error
  • 写回答

0条回答 默认 最新

    报告相同问题?

    悬赏问题

    • ¥20 iqoo11 如何下载安装工程模式
    • ¥15 flask项目,怎么使用AJAX传数据库数据到echarts图表的data里,实现异步加载数据。
    • ¥15 本题的答案是不是有问题
    • ¥15 关于#r语言#的问题:(svydesign)为什么在一个大的数据集中抽取了一个小数据集
    • ¥15 C++使用Gunplot
    • ¥15 这个电路是如何实现路灯控制器的,原理是什么,怎么求解灯亮起后熄灭的时间如图?
    • ¥15 matlab数字图像处理频率域滤波
    • ¥15 在abaqus做了二维正交切削模型,给刀具添加了超声振动条件后输出切削力为什么比普通切削增大这么多
    • ¥15 ELGamal和paillier计算效率谁快?
    • ¥15 蓝桥杯单片机第十三届第一场,整点继电器吸合,5s后断开出现了问题