江jh 2020-03-02 12:28 采纳率: 87.5%
浏览 534
已采纳

python多线下载为什么下载不了?starting failed

代码:

from downloader import Downloader #, cStringIO, cPickle
from threading import Thread
from time import sleep
import log2 as log
from os.path import basename
import requests as req

import pickle
from os.path import exists
db='E:/tmp/download.data'

def append(obj):
  try:
    if exists(db):
      with open(db,'rb') as f:
        data=pickle.load(f)
    else: data={}
  except:
    data={}
  data[obj['url']]=obj
  with open(db,'wb') as f:
    pickle.dump(data,f)

def load(url):
  if not exists(db): return None
  try:
    with open(db,'rb') as f:
      data=pickle.load(f)
    return data.get(url)
  except:
    return None
def out(msg):
  print(msg)
import time
from os.path import basename, exists, getsize
from queue import Queue
from threading import Lock, Thread, current_thread

import requests as req
import random as rand

import conf



class Downloader:
  KB=1024
  MB=KB*KB
  GB=KB*MB
  range_size=MB
  max_workers=10
  spd_refresh_interval=1
  user_agents=[
    'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246',
    'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
    'Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36'
    'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36',
    'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0'
  ]
  chunk_size=KB
  max_error=0.1 #单线程允许最大出错率
  max_error_one_worker=0.5 #仅剩一个线程时允许的最大出错率
  home='E:/tmp/' #下载目录
  def __init__(self,c):
    self.__locks={i:Lock() for i in ('file','worker_info','itr_job','download_info')}
    self.__config=c
    self.__alive=False
    self.__fails=Queue()
    self.__conf=c
    c=conf.load(c['url'])
    if c:
      self.__conf=c
      self.__init_from_conf()
    else: self.__init_task()

  def __init_from_conf(self):
    self.__download_offset=self.__conf['offset']
    for i in self.__conf['fails']: self.__fails.put(i)

  def __get_agent(self):
    return self.user_agents[rand.randint(0,len(self.user_agents)-1)]

  def __init_task(self):
    headers={'Range':'bytes=0-0'}
    headers = {'Host': 'https://files.pythonhosted.org/packages/','User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER','Referer': 'https://pypi.org/',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch, br',
        'Accept-Language': 'zh-CN,zh;q=0.8'} 
    headers['User-Agent']=self.__get_agent()
    print(headers)
    try:
      r=req.get(self.__conf['url'],headers=headers,stream=True)
      self.__conf['name'] = basename(self.__conf['url']) or str(int(round(time.time()*1000)))
      self.__conf['206'] = r.status_code == 206 or r.headers.get('Accept-Ranges')=='bytes'
      if self.__conf['206']:
        self.__conf['len']=int(r.headers['Content-Range'].split('/')[-1])
      elif r.status_code!=200:
        out('init task err')
        return
      else:
        self.__conf['len']=int(r.headers['Content-Length'])
      r.close()
      self.__download_offset=0
      self.__conf['init']=True
    except Exception as e:
      out(e)

  def __itr_job(self):
    if self.__locks['itr_job'].acquire():
      if not self.__fails.empty():
        ans=self.__fails.get()
      elif self.__download_offset<self.__conf['len']:
        o=self.__download_offset
        ans=(o,min(self.__conf['len']-1,o+self.range_size-1))
        self.__download_offset+=self.range_size
      else:
        ans=(-1,-1)
      self.__locks['itr_job'].release()
    return ans

  def __has_job(self):
    if self.__locks['itr_job'].acquire():
      ans=self.__download_offset<self.__conf['len'] or  not self.__fails.empty()
      self.__locks['itr_job'].release()
    return ans

  def __download_no_206(self):
    headers={'User-Agent':self.__get_agent()}
    r=req.get(self.__conf['url'],headers=headers,stream=True)
    self.__download_offset=0
    if r.status_code != 200:
      r.close()
      self.__stopped()
      return
    try:
      for con in r.iter_content(chunk_size=self.chunk_size):
        if self.__kill_signal: break
        self.__file.write(con)
        l=len(con)
        self.__down_bytes+=l
        self.__download_offset+=l
        t0=time.time()
        t=t0-self.__last_time
        if t>=self.spd_refresh_interval:
          self.__down_spd=self.__down_bytes/t
          out('downspd: %d KB/s'%(self.__down_spd/self.KB))
          self.__last_time=t0
          self.__down_bytes=0
    except:
      pass
    r.close()
    self.__stopped()

  def __download_206(self):
    file_len=self.__conf['len']
    total=0
    error=0
    kill=False
    with req.session() as sess:
      while True:
        s,e=self.__itr_job()
        if s==-1:
          out('no job stop')
          break
        headers={'Range':'bytes=%d-%d'%(s,e)}
        headers['User-Agent']=self.__get_agent()
        try:
          r=sess.get(self.__conf['url'],headers=headers,stream=True)
          total+=1
          if r.status_code!=206:
            self.__fails.put((s,e))
            error+=1
            if error>self.max_error*total:
              if self.__locks['worker_info'].acquire():
                num=self.__current_workers
                self.__locks['worker_info'].release() 
                if error>self.max_error_one_worker*total or num>1:
                  break           
            continue
          for con in r.iter_content(chunk_size=self.chunk_size):
            if self.__locks['worker_info'].acquire():
              if self.__kill_signal:
                self.__locks['worker_info'].release()
                kill=True
                break
              self.__locks['worker_info'].release()

            if self.__locks['file'].acquire():
              self.__file.seek(s)
              self.__file.write(con)
              l=len(con)
              s+=l
              self.__locks['file'].release()

              if self.__locks['download_info'].acquire():
                self.__down_bytes+=l
                t0=time.time()
                t=t0-self.__last_time
                if t>=self.spd_refresh_interval:
                  out('downspd: %d KB/s'%(self.__down_spd/self.KB))
                  self.__down_spd=self.__down_bytes/t
                  self.__down_bytes=0
                  self.__last_time=t0
                self.__locks['download_info'].release()

          if s<=e and s<file_len:
            self.__fails.put((s,e))
          if kill:
            break
        except  :
          self.__fails.put((s,e))
          error+=1
          if error>self.max_error*total:
            if self.__locks['worker_info'].acquire():
              num=self.__current_workers
              self.__locks['worker_info'].release() 
              if error>self.max_error_one_worker*total or num>1:
                break 

      self.__stopped()

  def __start_worker(self,target):
    if self.__locks['worker_info'].acquire():
      if self.__kill_signal: 
        self.__locks['worker_info'].release()
        return False
      if self.__current_workers<self.max_workers:
        Thread(target=target).start()
        self.__current_workers+=1
        out('new worker started,current workers %d'%self.__current_workers)
      self.__locks['worker_info'].release()
    return True

  def __start_workers(self):
    for _ in range(self.max_workers):
      if not self.__start_worker(self.__download_206): break
      time.sleep(0.8)

  def start(self):
    if self.__alive:
      out('already started!')
      return
    if self.__conf.get('status')=='done':
      out('already done')
      return
    self.__alive=True
    self.__kill_signal=False
    self.__conf['status']='working'
    self.__down_bytes=0
    self.__down_spd=0
    self.__last_time=0
    self.__current_workers=0
    self.__start_time=time.time()

    try:
      path=self.home+self.__conf['name']
      self.__file=open(path,(exists(path) and 'rb+') or 'wb' )
      if not self.__conf['206']:
        Thread(target=self.__start_workers).start()
      else: self.__start_worker(self.__download_no_206)
      out('starting done!')
    except: out('starting failed')

  def stop(self):
    if self.__kill_signal:
      return
    out('stopping')
    if self.__locks['worker_info'].acquire():
      self.__kill_signal=True
      if self.__conf['status']=='working':
        self.__conf['status']='stopped'
      self.__locks['worker_info'].release()

  def __after_stopped(self):
    if not self.__kill_signal:
      self.__kill_signal=True
    __alive=False
    self.__file.close()
    out('total time: %.2f'%(time.time()-self.__start_time))
    self.__conf['offset']=self.__download_offset
    if not self.__has_job():
      self.__conf['status']='done'
    elif self.__conf.get('status')!='stopped': self.__conf['status']='error'
    leak=0
    ls=[]
    while not self.__fails.empty():
      i=self.__fails.get()
      leak+=i[1]-i[0]+1
      ls.append(i)
    self.__conf['fails']=ls
    leak+=max(self.__conf['len']-self.__download_offset,0)
    out('total leak:  %d'%leak)
    conf.append(self.__conf)

  def __stopped(self):
    if self.__locks['worker_info'].acquire():
      self.__current_workers-=1
      out('%s stopped'%current_thread().name)
      if self.__current_workers==0:
        self.__after_stopped()
      self.__locks['worker_info'].release()
#!/usr/bin/env python
# coding=utf-8
#import importlib,sys
#import sys
#sys.setdefaultencoding('gbk')

'''import sys
import imp
import sys
reload(sys)
sys.setdefaultencoding('utf8')
'''
'''
import sys
sys.setdefaultencoding('utf-8')
import jieba
import json'''

def main():
    from bs4 import BeautifulSoup
    import urllib.request

    import urllib.parse as parse
    import ssl
    import re
    import os,os.path
    import codecs
    import requests 

    def getHtml(url):
        global html
        page = urllib.request.urlopen(url)
        html = page.read()
        return html

    def file(url1,file_name,name):
        print(url1)

        #file(name,save_path,filename)

        #url1= +'/' + filename
        url1=url1.encode()  
        #file = open(name ,'wb+')
        #file.write(url1 )  
        #file.close()
        #print(file_name)

        headers = {'Host': 'https://files.pythonhosted.org/packages/','User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER','Referer': 'https://pypi.org/',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch, br',
        'Accept-Language': 'zh-CN,zh;q=0.8'} 
        #req = urllib.urlretrieve(download_url,headers=headers)
        #urllib.request.urlopen('https://www.lfd.uci.edu/~gohlke/pythonlibs/')
        #req = urllib.request.Request(url=url,headers=header)
        #request =   urllib.request.urlopen(url1)
        #response = urllib.request.urlopen(request)
        import socket
        import urllib.request
        #设置超时时间为30s
        socket.setdefaulttimeout(5)
        #解决下载不完全问题且避免陷入死循环
        '''try:

            urllib.request.urlretrieve(url1.decode(),name)
        except socket.timeout:'''
        count = 1
        while count <= 1:
            import time

            # 格式化成2016-03-20 11:45:39形式
            print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) 

            # 格式化成Sat Mar 28 22:24:24 2016形式
            print(time.strftime("%a %b %d %H:%M:%S %Y", time.localtime())) 

            # 将格式字符串转换为时间戳
            a = "Sat Mar 28 22:24:24 2016"
            print(time.mktime(time.strptime(a,"%a %b %d %H:%M:%S %Y")))
            try:
                urllib.request.urlretrieve(url1.decode(),name)
                print('\nchangshi'+str(count)+'over\n')
                break
            except socket.timeout:
                err_info = 'Reloading for %d time'%count if count == 1 else 'Reloading for %d times'%count
                print(err_info)
                count += 1
            except urllib.error.HTTPError:
                print('urllib.error.HTTPError')
            except urllib.error.URLError:
                print('urllib.error.URLError')
            except ssl.SSLWantReadError:
                print('ssl.SSLWantReadError')

        if count > 1:
            print("downloading picture fialed!")

        #urllib.request.urlretrieve(url1.decode(),name)

        global i     
        i += 1 

        print(url1.decode())
        #file = open(name ,'wt+')
        #file.write(str(req.content())) 
        #file.close()
        print(file_name)
        global x
        print("Completed : .... %d  ..." % x)
        '''for i in range(len(name_list)):

            j=0
            if name_list[i-24:i+1]=='https://pypi.org/project/':

                 name_list1.append(name_list[i+1:i+60])'''

        print('\n........'+name+'..........complete\n')

        '''headers = {'Host': 'download.lfd.uci.edu','User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER','Referer': 'https://www.lfd.uci.edu/~gohlke/pythonlibs/',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch, br',
        'Accept-Language': 'zh-CN,zh;q=0.8'} 
        #req = urllib.urlretrieve(download_url,headers=headers)     








        #urllib.request.urlopen('https://www.lfd.uci.edu/~gohlke/pythonlibs/')
        #req = urllib.request.Request(url=url,headers=header)
        request =   requests.get(url=url1,headers=headers)


        #response = urllib.request.urlopen(request)
        global i
        i += 1 
        file = open(name ,'wb+')
        file.write(request.content)


        file.close()
        print(file_name)
        print("Completed : .... %d  ..." % x)'''

    save_path = os.getcwd()
    url = 'https://www.lfd.uci.edu/'
    html = getHtml(url)
    html='''

    </li>
    <li><a id="kwant"></a><strong><a href="http://kwant-project.org/">Kwant</a></strong>: quantum transport simulations made easy.<br>
     Requires <a href="https://www.lfd.uci.edu/~gohlke/pythonlibs/#numpy">numpy+mkl</a> and <a href="https://www.lfd.uci.edu/~gohlke/pythonlibs/#tinyarray">tinyarray</a>.
    <ul>
    <li><a href="javascript:;" onclick=" javascript:dl([101,116,54,104,51,56,113,108,46,99,118,106,49,119,109,45,50,110,115,95,112,107,47,105,97,53,52,100], &quot;A?:5C9H0ED&lt;G@0&gt;;7I7;&gt;8C34&gt;8C34&gt;&lt;F@BG=J1I7&lt;26&quot;); &quot;javascript: dl(&quot;" title="[2.5 MB] [Jul 06, 2019]">kwant‑1.4.1‑cp38‑cp38‑win_amd64.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,45,107,108,97,47,116,113,110,99,56,49,118,46,104,50,115,105,53,112,106,119,52,51], &quot;?&gt;C6B;A541D3750:&lt;E&lt;:08BF908BF90D@7F&gt;&lt;D=2&quot;); &quot;javascript: dl(&quot;" title="[2.1 MB] [Jul 06, 2019]">kwant‑1.4.1‑cp38‑cp38‑win32.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,46,48,105,104,110,51,107,108,99,115,118,109,113,55,100,53,47,54,50,49,119,45,116,112,97,95,52,106], &quot;9BK&lt;G:?F@6DH4FEC0J01E8G5=E8G5=;ED24IH;&gt;AJ0D37&quot;); &quot;javascript: dl(&quot;" title="[2.4 MB] [Feb 28, 2019]">kwant‑1.4.0‑cp37‑cp37m‑win_amd64.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,52,112,106,45,109,51,99,108,48,104,107,46,53,118,97,105,116,113,119,47,55,50,110,49,115], &quot;HE2A1=&lt;@C:B&gt;F@3G;0;83615D3615D43B?F5E;B97&quot;); &quot;javascript: dl(&quot;" title="[2.1 MB] [Feb 28, 2019]">kwant‑1.4.0‑cp37‑cp37m‑win32.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,112,99,97,49,105,54,113,115,108,109,53,52,116,51,118,106,107,110,104,50,95,47,48,45,119,100,46], &quot;7C?60&gt;:&lt;E@H2A&lt;G3J;JFG10=5G10=59GH4AD29I5;JHB8&quot;); &quot;javascript: dl(&quot;" title="[2.4 MB] [Feb 28, 2019]">kwant‑1.4.0‑cp36‑cp36m‑win_amd64.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,47,48,107,53,108,49,119,52,105,110,115,50,104,112,106,116,45,51,113,99,97,46,118,109,54], &quot;:;&gt;B=F3?026D9?@5E7E1@C=AH@C=AHG@689A;E6&lt;4&quot;); &quot;javascript: dl(&quot;" title="[2.1 MB] [Feb 28, 2019]">kwant‑1.4.0‑cp36‑cp36m‑win32.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,97,51,53,50,107,46,105,54,49,47,104,52,109,100,115,118,119,108,48,112,116,45,99,113,110,106,95], &quot;&gt;3IGC?2D9FC1294@0HDE85;5BEFC12EFC12&lt;E@6HJ0&lt;=7;5@:A&quot;); &quot;javascript: dl(&quot;" title="[2.4 MB] [Feb 28, 2019]">kwant‑1.4.0‑cp35‑cp35m‑win_amd64.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,46,52,50,104,49,118,119,99,48,107,113,97,115,47,51,45,105,110,112,53,108,106,116,109], &quot;&lt;2E:B5CF=7B&gt;C=96;AF?40108?7B&gt;C?7B&gt;CG?6@A&gt;2063D&quot;); &quot;javascript: dl(&quot;" title="[2.0 MB] [Feb 28, 2019]">kwant‑1.4.0‑cp35‑cp35m‑win32.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,49,108,109,119,46,113,107,97,95,99,105,110,53,51,104,100,116,112,54,50,52,115,45,118,47,106], &quot;ECI5AG&lt;@H9A=DH637;@F04=4CF9A=DF9A=D2F3:;872?BD43&gt;1&quot;); &quot;javascript: dl(&quot;" title="[2.1 MB] [Jan 06, 2018]">kwant‑1.3.2‑cp34‑cp34m‑win_amd64.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,106,99,112,47,113,97,116,52,51,49,107,53,109,104,105,110,108,119,45,115,46,50,118], &quot;CE042F;6312873:A5?6B9D8DEB1287B1287&lt;BA&gt;?8EDA=@&quot;); &quot;javascript: dl(&quot;" title="[1.8 MB] [Jan 06, 2018]">kwant‑1.3.2‑cp34‑cp34m‑win32.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,51,110,97,105,113,45,47,104,108,99,112,46,100,109,55,53,95,119,49,106,50,107,54,118,116,52,115], &quot;JDC4:G?H69:D&gt;6EA21H5B;B;059:D&gt;59:D&gt;=5A31@2=&lt;FI;A78&quot;); &quot;javascript: dl(&quot;" title="[13.5 MB] [May 15, 2017]">kwant‑1.1.3‑cp27‑cp27m‑win_amd64.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,55,113,108,46,104,107,106,49,115,118,99,50,119,47,45,51,97,116,110,53,109,112,105], &quot;8;61E9CA=:E;0=5&lt;@BA&gt;7373?&gt;:E;0&gt;:E;0D&gt;&lt;FB?;3&lt;42&quot;); &quot;javascript: dl(&quot;" title="[6.7 MB] [May 15, 2017]">kwant‑1.1.3‑cp27‑cp27m‑win32.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,113,99,45,52,119,95,49,111,100,116,48,46,47,118,55,97,112,108,107,115,54,110,105,50,101,104,53,106,109], &quot;CGK0@=J9&lt;1@G&gt;&lt;B4?E926;:;J21@G&gt;2E7EH24FE5?L8D3;4IA&quot;); &quot;javascript: dl(&quot;" title="[13.4 MB] [Sep 11, 2015]">kwant‑1.0.5‑cp27‑none‑win_amd64.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,106,47,105,101,113,111,97,108,50,107,45,112,104,118,48,49,110,119,99,115,53,51,46,55,116], &quot;C804;=DH1B;8G19A6@H:?F&gt;FD:B;8G:@5@3:A2@E8FA&lt;7&quot;); &quot;javascript: dl(&quot;" title="[6.7 MB] [Sep 11, 2015]">kwant‑1.0.5‑cp27‑none‑win32.whl</a></li>
    </ul>
    </li>
    <li><a id="la"></a><strong><a href="https://github.com/kwgoodman/la">La</a></strong>: aka larry, the labeled numpy array.
    <ul>
    <li><a href="javascript:;" onclick=" javascript:dl([101,97,109,99,108,48,51,46,110,54,50,105,47,95,53,104,113,55,45,100,112,118,52,101,115,106,116,119], &quot;G9H?CD=I;2C5=;30A46@646BFD4A2C5=A2C5=1AJ:7&lt;01B8E6J&gt;3&quot;); &quot;javascript: dl(&quot;" title="[139 KB] [Apr 11, 2016]">la‑0.7.0.dev0‑cp35‑cp35m‑win_amd64.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,108,55,110,99,112,48,47,116,106,109,115,119,118,100,53,51,104,45,105,113,46,101,50,97], &quot;:F8C4&lt;&gt;7634?&gt;60GA5D1D5D=E&lt;5A34?&gt;A34?&gt;9A;B2?FD;@0&quot;); &quot;javascript: dl(&quot;" title="[137 KB] [Apr 11, 2016]">la‑0.7.0.dev0‑cp35‑cp35m‑win32.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,109,55,118,110,115,50,106,104,113,116,108,53,97,100,54,101,47,52,105,112,46,95,51,45,99,119,48], &quot;4568C2;9@HCFA@:&lt;GJD1DJD=?2JGHCFAGHCFA0GIB3E&lt;0=&gt;ADI7:&quot;); &quot;javascript: dl(&quot;" title="[137 KB] [Apr 11, 2016]">la‑0.7.0.dev0‑cp34‑cp34m‑win_amd64.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,105,48,55,112,101,52,53,99,51,115,50,109,97,47,110,118,113,106,116,108,46,119,104,100,45], &quot;9:A@3?6B=7385=C&lt;H1D2D1DG4?1H7385H7385;HE0&gt;8:DEFC&quot;); &quot;javascript: dl(&quot;" title="[136 KB] [Apr 11, 2016]">la‑0.7.0.dev0‑cp34‑cp34m‑win32.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,45,105,55,100,116,97,99,106,113,101,95,54,50,110,104,47,115,119,118,109,108,52,46,53,48,112], &quot;@&lt;78IBG4?6I&lt;2?D50HF2FHF39BH06I&lt;206I&lt;2C0A1=:5C3;EFA&gt;D&quot;); &quot;javascript: dl(&quot;" title="[137 KB] [Apr 11, 2016]">la‑0.7.0.dev0‑cp27‑cp27m‑win_amd64.whl</a></li>
    <li><a href="javascript:;" onclick=" javascript:dl([101,119,55,53,48,105,118,112,47,115,45,108,116,97,113,104,99,100,106,51,110,109,46,50,101], &quot;8FA=652;7?6F17:&lt;93E1E3E@G539?6F19?6F1D904CBFE0&gt;:&quot;); &quot;javascript: dl(&quot;" title="[136 KB] [Apr 11, 2016]">la‑0.7.0.dev0‑cp27‑cp27m‑win32.whl</a></li>
    </ul>
    </li>


    '''
    print('html done')
    #html.decode('utf-8')
    #print(html)
    '''headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1)AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11'} 
    r = requests.get(url, headers = headers)

    r.encoding = "utf-8"

    soup = BeautifulSoup(r.text, "html.parser")
    #html_mod=re.sub(pattern=".",repl=".",string=html.decode('utf-8'))
    for link in soup.find_all('a'): #soup.find_all返回的为列表
        print(link.get('href'))
        #name_list+=link

    '''

    name_list = html#soup.find_all('a')#re.findall(r']">*-cp38-win_amd64.whl',html.decode('utf-8'))
    x=1

    files=os.listdir(save_path)
    print(files)
    print(type(name_list))
    name_list=str(name_list)
    name_list1=[]
    #print(name_list)
    #for name in name_list:


    k=0
       # name[k]=str(name1[k])
    for i in range(len(name_list)):
        j=0
        if name_list[i-2:i+1]==']">':

            name_list1.append(name_list[i+1:i+60])

            global m

            if k<len(name_list1):
                for l in range(len(name_list1[k])):
                    if l-9>=0:
                        if name_list1[k][l-4:l]=='.whl' or name_list1[k][l-3:l]=='.gz' or name_list1[k][l-4:l]=='.zip':

                            j=1

                            m=l
            if j==1:
                name_list1[k]=name_list1[k][0:m]
                k+=1



        '''if j==0:
        name_list.remove(name)'''


     #file_name = os.path.join(save_path ,name)

        i=0

    #print(name)
    print(name_list1)
    for name in name_list1:
        j=0
        for l in range(len(name)):
            if l-9>=0:
                 if name[l-4:l]=='.whl' or name[l-3:l]=='.gz' or name[l-4:l]=='.zip':

                     j=1

                     m=l
        if j==1:
            name=name[0:m]
            k+=1


        if name in files:
            continue
        '''if name=='Delny‑0.4.1‑cp27‑none‑win_amd64.whl</a></li>\n<li>' or name==Delny‑0.4.1‑cp27‑none‑win32.whl</a></li>
    </ul>
    </:
            continue

        '''

        print('no:'+str(x))
        print('\ndownload '+name)
       # importlib.reload(sys)
        #imp.reload(sys)
        for l in range(len(name)):
            if l-9>=0:
                 if name[l-4:l]=='.whl' or name[l-3:l]=='.gz' or name[l-4:l]=='.zip':

                     j=1

                     m=l
        if j==1:
            name=name[0:m]
            k+=1


        string='https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/' + name#[0:4+name.find('.whl')]#https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/
        print('00'+save_path)
        count=0
        v=0
        for p in range(len(string)):
            if string[p]=='\\':
                if v==0:
                    string=string[:6]+'//'+string[7:]
                else:
                    string=string[:p]+'/'+string[p+1:]
                v+=1

            if string[p-3:p]=='win':
                string=string[:p-4]+'-'+string[p-3:]
            if p<len(string):

                if (string[p]=='\u2011')==True:
                    if p+1<len(string):
                        string=string[:p]+'-'+string[p+1:]

            '''if string[p-2]>='0' and string[p-2]<='9' and string[p-1]>='0' and string[p-1]<='9':
                if (string[p]>='a'and string[p]<='z') or (string[p]>='A'and string[p]<='Z'):
                    string=string[:p]+string[p+1:]'''
            if p>=len(string):
                break
        '''if name[:9]=='ad3‑2.2.1':
            print('aaa')
            continue'''

        conf={'url':string}
        d=Downloader(conf)
        d.start()

        #file(string,save_path,name)

        x=x+1
    print('09'+name_list)
    print('finished')


if __name__ == '__main__':
  main()

求高手解决

  • 写回答

1条回答

  • threenewbee 2020-03-02 13:15
    关注

    看上去代码
    try:
    path=self.home+self.__conf['name']
    self.__file=open(path,(exists(path) and 'rb+') or 'wb' )
    if not self.__conf['206']:
    Thread(target=self.__start_workers).start()
    else: self.__start_worker(self.__download_no_206)
    out('starting done!')
    except: out('starting failed')
    丢出了异常
    把except修改为
    except Exception, e:
    print ('str(e):\t\t', str(e))
    print ('repr(e):\t', repr(e))
    print ('e.message:\t', e.message)
    print ('traceback.print_exc():'; traceback.print_exc())
    print ('traceback.format_exc():\n%s' % traceback.format_exc())
    先输出下具体的错误信息,把错误信息贴出来看看

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 树莓派与pix飞控通信
  • ¥15 自动转发微信群信息到另外一个微信群
  • ¥15 outlook无法配置成功
  • ¥30 这是哪个作者做的宝宝起名网站
  • ¥60 版本过低apk如何修改可以兼容新的安卓系统
  • ¥25 由IPR导致的DRIVER_POWER_STATE_FAILURE蓝屏
  • ¥50 有数据,怎么建立模型求影响全要素生产率的因素
  • ¥50 有数据,怎么用matlab求全要素生产率
  • ¥15 TI的insta-spin例程
  • ¥15 完成下列问题完成下列问题