代码:
from downloader import Downloader #, cStringIO, cPickle
from threading import Thread
from time import sleep
import log2 as log
from os.path import basename
import requests as req
import pickle
from os.path import exists
db='E:/tmp/download.data'
def append(obj):
try:
if exists(db):
with open(db,'rb') as f:
data=pickle.load(f)
else: data={}
except:
data={}
data[obj['url']]=obj
with open(db,'wb') as f:
pickle.dump(data,f)
def load(url):
if not exists(db): return None
try:
with open(db,'rb') as f:
data=pickle.load(f)
return data.get(url)
except:
return None
def out(msg):
print(msg)
import time
from os.path import basename, exists, getsize
from queue import Queue
from threading import Lock, Thread, current_thread
import requests as req
import random as rand
import conf
class Downloader:
KB=1024
MB=KB*KB
GB=KB*MB
range_size=MB
max_workers=10
spd_refresh_interval=1
user_agents=[
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
'Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36'
'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0'
]
chunk_size=KB
max_error=0.1 #单线程允许最大出错率
max_error_one_worker=0.5 #仅剩一个线程时允许的最大出错率
home='E:/tmp/' #下载目录
def __init__(self,c):
self.__locks={i:Lock() for i in ('file','worker_info','itr_job','download_info')}
self.__config=c
self.__alive=False
self.__fails=Queue()
self.__conf=c
c=conf.load(c['url'])
if c:
self.__conf=c
self.__init_from_conf()
else: self.__init_task()
def __init_from_conf(self):
self.__download_offset=self.__conf['offset']
for i in self.__conf['fails']: self.__fails.put(i)
def __get_agent(self):
return self.user_agents[rand.randint(0,len(self.user_agents)-1)]
def __init_task(self):
headers={'Range':'bytes=0-0'}
headers = {'Host': 'https://files.pythonhosted.org/packages/','User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER','Referer': 'https://pypi.org/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch, br',
'Accept-Language': 'zh-CN,zh;q=0.8'}
headers['User-Agent']=self.__get_agent()
print(headers)
try:
r=req.get(self.__conf['url'],headers=headers,stream=True)
self.__conf['name'] = basename(self.__conf['url']) or str(int(round(time.time()*1000)))
self.__conf['206'] = r.status_code == 206 or r.headers.get('Accept-Ranges')=='bytes'
if self.__conf['206']:
self.__conf['len']=int(r.headers['Content-Range'].split('/')[-1])
elif r.status_code!=200:
out('init task err')
return
else:
self.__conf['len']=int(r.headers['Content-Length'])
r.close()
self.__download_offset=0
self.__conf['init']=True
except Exception as e:
out(e)
def __itr_job(self):
if self.__locks['itr_job'].acquire():
if not self.__fails.empty():
ans=self.__fails.get()
elif self.__download_offset<self.__conf['len']:
o=self.__download_offset
ans=(o,min(self.__conf['len']-1,o+self.range_size-1))
self.__download_offset+=self.range_size
else:
ans=(-1,-1)
self.__locks['itr_job'].release()
return ans
def __has_job(self):
if self.__locks['itr_job'].acquire():
ans=self.__download_offset<self.__conf['len'] or not self.__fails.empty()
self.__locks['itr_job'].release()
return ans
def __download_no_206(self):
headers={'User-Agent':self.__get_agent()}
r=req.get(self.__conf['url'],headers=headers,stream=True)
self.__download_offset=0
if r.status_code != 200:
r.close()
self.__stopped()
return
try:
for con in r.iter_content(chunk_size=self.chunk_size):
if self.__kill_signal: break
self.__file.write(con)
l=len(con)
self.__down_bytes+=l
self.__download_offset+=l
t0=time.time()
t=t0-self.__last_time
if t>=self.spd_refresh_interval:
self.__down_spd=self.__down_bytes/t
out('downspd: %d KB/s'%(self.__down_spd/self.KB))
self.__last_time=t0
self.__down_bytes=0
except:
pass
r.close()
self.__stopped()
def __download_206(self):
file_len=self.__conf['len']
total=0
error=0
kill=False
with req.session() as sess:
while True:
s,e=self.__itr_job()
if s==-1:
out('no job stop')
break
headers={'Range':'bytes=%d-%d'%(s,e)}
headers['User-Agent']=self.__get_agent()
try:
r=sess.get(self.__conf['url'],headers=headers,stream=True)
total+=1
if r.status_code!=206:
self.__fails.put((s,e))
error+=1
if error>self.max_error*total:
if self.__locks['worker_info'].acquire():
num=self.__current_workers
self.__locks['worker_info'].release()
if error>self.max_error_one_worker*total or num>1:
break
continue
for con in r.iter_content(chunk_size=self.chunk_size):
if self.__locks['worker_info'].acquire():
if self.__kill_signal:
self.__locks['worker_info'].release()
kill=True
break
self.__locks['worker_info'].release()
if self.__locks['file'].acquire():
self.__file.seek(s)
self.__file.write(con)
l=len(con)
s+=l
self.__locks['file'].release()
if self.__locks['download_info'].acquire():
self.__down_bytes+=l
t0=time.time()
t=t0-self.__last_time
if t>=self.spd_refresh_interval:
out('downspd: %d KB/s'%(self.__down_spd/self.KB))
self.__down_spd=self.__down_bytes/t
self.__down_bytes=0
self.__last_time=t0
self.__locks['download_info'].release()
if s<=e and s<file_len:
self.__fails.put((s,e))
if kill:
break
except :
self.__fails.put((s,e))
error+=1
if error>self.max_error*total:
if self.__locks['worker_info'].acquire():
num=self.__current_workers
self.__locks['worker_info'].release()
if error>self.max_error_one_worker*total or num>1:
break
self.__stopped()
def __start_worker(self,target):
if self.__locks['worker_info'].acquire():
if self.__kill_signal:
self.__locks['worker_info'].release()
return False
if self.__current_workers<self.max_workers:
Thread(target=target).start()
self.__current_workers+=1
out('new worker started,current workers %d'%self.__current_workers)
self.__locks['worker_info'].release()
return True
def __start_workers(self):
for _ in range(self.max_workers):
if not self.__start_worker(self.__download_206): break
time.sleep(0.8)
def start(self):
if self.__alive:
out('already started!')
return
if self.__conf.get('status')=='done':
out('already done')
return
self.__alive=True
self.__kill_signal=False
self.__conf['status']='working'
self.__down_bytes=0
self.__down_spd=0
self.__last_time=0
self.__current_workers=0
self.__start_time=time.time()
try:
path=self.home+self.__conf['name']
self.__file=open(path,(exists(path) and 'rb+') or 'wb' )
if not self.__conf['206']:
Thread(target=self.__start_workers).start()
else: self.__start_worker(self.__download_no_206)
out('starting done!')
except: out('starting failed')
def stop(self):
if self.__kill_signal:
return
out('stopping')
if self.__locks['worker_info'].acquire():
self.__kill_signal=True
if self.__conf['status']=='working':
self.__conf['status']='stopped'
self.__locks['worker_info'].release()
def __after_stopped(self):
if not self.__kill_signal:
self.__kill_signal=True
__alive=False
self.__file.close()
out('total time: %.2f'%(time.time()-self.__start_time))
self.__conf['offset']=self.__download_offset
if not self.__has_job():
self.__conf['status']='done'
elif self.__conf.get('status')!='stopped': self.__conf['status']='error'
leak=0
ls=[]
while not self.__fails.empty():
i=self.__fails.get()
leak+=i[1]-i[0]+1
ls.append(i)
self.__conf['fails']=ls
leak+=max(self.__conf['len']-self.__download_offset,0)
out('total leak: %d'%leak)
conf.append(self.__conf)
def __stopped(self):
if self.__locks['worker_info'].acquire():
self.__current_workers-=1
out('%s stopped'%current_thread().name)
if self.__current_workers==0:
self.__after_stopped()
self.__locks['worker_info'].release()
#!/usr/bin/env python
# coding=utf-8
#import importlib,sys
#import sys
#sys.setdefaultencoding('gbk')
'''import sys
import imp
import sys
reload(sys)
sys.setdefaultencoding('utf8')
'''
'''
import sys
sys.setdefaultencoding('utf-8')
import jieba
import json'''
def main():
from bs4 import BeautifulSoup
import urllib.request
import urllib.parse as parse
import ssl
import re
import os,os.path
import codecs
import requests
def getHtml(url):
global html
page = urllib.request.urlopen(url)
html = page.read()
return html
def file(url1,file_name,name):
print(url1)
#file(name,save_path,filename)
#url1= +'/' + filename
url1=url1.encode()
#file = open(name ,'wb+')
#file.write(url1 )
#file.close()
#print(file_name)
headers = {'Host': 'https://files.pythonhosted.org/packages/','User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER','Referer': 'https://pypi.org/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch, br',
'Accept-Language': 'zh-CN,zh;q=0.8'}
#req = urllib.urlretrieve(download_url,headers=headers)
#urllib.request.urlopen('https://www.lfd.uci.edu/~gohlke/pythonlibs/')
#req = urllib.request.Request(url=url,headers=header)
#request = urllib.request.urlopen(url1)
#response = urllib.request.urlopen(request)
import socket
import urllib.request
#设置超时时间为30s
socket.setdefaulttimeout(5)
#解决下载不完全问题且避免陷入死循环
'''try:
urllib.request.urlretrieve(url1.decode(),name)
except socket.timeout:'''
count = 1
while count <= 1:
import time
# 格式化成2016-03-20 11:45:39形式
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
# 格式化成Sat Mar 28 22:24:24 2016形式
print(time.strftime("%a %b %d %H:%M:%S %Y", time.localtime()))
# 将格式字符串转换为时间戳
a = "Sat Mar 28 22:24:24 2016"
print(time.mktime(time.strptime(a,"%a %b %d %H:%M:%S %Y")))
try:
urllib.request.urlretrieve(url1.decode(),name)
print('\nchangshi'+str(count)+'over\n')
break
except socket.timeout:
err_info = 'Reloading for %d time'%count if count == 1 else 'Reloading for %d times'%count
print(err_info)
count += 1
except urllib.error.HTTPError:
print('urllib.error.HTTPError')
except urllib.error.URLError:
print('urllib.error.URLError')
except ssl.SSLWantReadError:
print('ssl.SSLWantReadError')
if count > 1:
print("downloading picture fialed!")
#urllib.request.urlretrieve(url1.decode(),name)
global i
i += 1
print(url1.decode())
#file = open(name ,'wt+')
#file.write(str(req.content()))
#file.close()
print(file_name)
global x
print("Completed : .... %d ..." % x)
'''for i in range(len(name_list)):
j=0
if name_list[i-24:i+1]=='https://pypi.org/project/':
name_list1.append(name_list[i+1:i+60])'''
print('\n........'+name+'..........complete\n')
'''headers = {'Host': 'download.lfd.uci.edu','User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER','Referer': 'https://www.lfd.uci.edu/~gohlke/pythonlibs/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch, br',
'Accept-Language': 'zh-CN,zh;q=0.8'}
#req = urllib.urlretrieve(download_url,headers=headers)
#urllib.request.urlopen('https://www.lfd.uci.edu/~gohlke/pythonlibs/')
#req = urllib.request.Request(url=url,headers=header)
request = requests.get(url=url1,headers=headers)
#response = urllib.request.urlopen(request)
global i
i += 1
file = open(name ,'wb+')
file.write(request.content)
file.close()
print(file_name)
print("Completed : .... %d ..." % x)'''
save_path = os.getcwd()
url = 'https://www.lfd.uci.edu/'
html = getHtml(url)
html='''
</li>
<li><a id="kwant"></a><strong><a href="http://kwant-project.org/">Kwant</a></strong>: quantum transport simulations made easy.<br>
Requires <a href="https://www.lfd.uci.edu/~gohlke/pythonlibs/#numpy">numpy+mkl</a> and <a href="https://www.lfd.uci.edu/~gohlke/pythonlibs/#tinyarray">tinyarray</a>.
<ul>
<li><a href="javascript:;" onclick=" javascript:dl([101,116,54,104,51,56,113,108,46,99,118,106,49,119,109,45,50,110,115,95,112,107,47,105,97,53,52,100], "A?:5C9H0ED<G@0>;7I7;>8C34>8C34><F@BG=J1I7<26"); "javascript: dl("" title="[2.5 MB] [Jul 06, 2019]">kwant‑1.4.1‑cp38‑cp38‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,45,107,108,97,47,116,113,110,99,56,49,118,46,104,50,115,105,53,112,106,119,52,51], "?>C6B;A541D3750:<E<:08BF908BF90D@7F><D=2"); "javascript: dl("" title="[2.1 MB] [Jul 06, 2019]">kwant‑1.4.1‑cp38‑cp38‑win32.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,46,48,105,104,110,51,107,108,99,115,118,109,113,55,100,53,47,54,50,49,119,45,116,112,97,95,52,106], "9BK<G:?F@6DH4FEC0J01E8G5=E8G5=;ED24IH;>AJ0D37"); "javascript: dl("" title="[2.4 MB] [Feb 28, 2019]">kwant‑1.4.0‑cp37‑cp37m‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,52,112,106,45,109,51,99,108,48,104,107,46,53,118,97,105,116,113,119,47,55,50,110,49,115], "HE2A1=<@C:B>F@3G;0;83615D3615D43B?F5E;B97"); "javascript: dl("" title="[2.1 MB] [Feb 28, 2019]">kwant‑1.4.0‑cp37‑cp37m‑win32.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,112,99,97,49,105,54,113,115,108,109,53,52,116,51,118,106,107,110,104,50,95,47,48,45,119,100,46], "7C?60>:<E@H2A<G3J;JFG10=5G10=59GH4AD29I5;JHB8"); "javascript: dl("" title="[2.4 MB] [Feb 28, 2019]">kwant‑1.4.0‑cp36‑cp36m‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,47,48,107,53,108,49,119,52,105,110,115,50,104,112,106,116,45,51,113,99,97,46,118,109,54], ":;>B=F3?026D9?@5E7E1@C=AH@C=AHG@689A;E6<4"); "javascript: dl("" title="[2.1 MB] [Feb 28, 2019]">kwant‑1.4.0‑cp36‑cp36m‑win32.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,97,51,53,50,107,46,105,54,49,47,104,52,109,100,115,118,119,108,48,112,116,45,99,113,110,106,95], ">3IGC?2D9FC1294@0HDE85;5BEFC12EFC12<E@6HJ0<=7;5@:A"); "javascript: dl("" title="[2.4 MB] [Feb 28, 2019]">kwant‑1.4.0‑cp35‑cp35m‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,46,52,50,104,49,118,119,99,48,107,113,97,115,47,51,45,105,110,112,53,108,106,116,109], "<2E:B5CF=7B>C=96;AF?40108?7B>C?7B>CG?6@A>2063D"); "javascript: dl("" title="[2.0 MB] [Feb 28, 2019]">kwant‑1.4.0‑cp35‑cp35m‑win32.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,49,108,109,119,46,113,107,97,95,99,105,110,53,51,104,100,116,112,54,50,52,115,45,118,47,106], "ECI5AG<@H9A=DH637;@F04=4CF9A=DF9A=D2F3:;872?BD43>1"); "javascript: dl("" title="[2.1 MB] [Jan 06, 2018]">kwant‑1.3.2‑cp34‑cp34m‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,106,99,112,47,113,97,116,52,51,49,107,53,109,104,105,110,108,119,45,115,46,50,118], "CE042F;6312873:A5?6B9D8DEB1287B1287<BA>?8EDA=@"); "javascript: dl("" title="[1.8 MB] [Jan 06, 2018]">kwant‑1.3.2‑cp34‑cp34m‑win32.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,51,110,97,105,113,45,47,104,108,99,112,46,100,109,55,53,95,119,49,106,50,107,54,118,116,52,115], "JDC4:G?H69:D>6EA21H5B;B;059:D>59:D>=5A31@2=<FI;A78"); "javascript: dl("" title="[13.5 MB] [May 15, 2017]">kwant‑1.1.3‑cp27‑cp27m‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,55,113,108,46,104,107,106,49,115,118,99,50,119,47,45,51,97,116,110,53,109,112,105], "8;61E9CA=:E;0=5<@BA>7373?>:E;0>:E;0D><FB?;3<42"); "javascript: dl("" title="[6.7 MB] [May 15, 2017]">kwant‑1.1.3‑cp27‑cp27m‑win32.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,113,99,45,52,119,95,49,111,100,116,48,46,47,118,55,97,112,108,107,115,54,110,105,50,101,104,53,106,109], "CGK0@=J9<1@G><B4?E926;:;J21@G>2E7EH24FE5?L8D3;4IA"); "javascript: dl("" title="[13.4 MB] [Sep 11, 2015]">kwant‑1.0.5‑cp27‑none‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,106,47,105,101,113,111,97,108,50,107,45,112,104,118,48,49,110,119,99,115,53,51,46,55,116], "C804;=DH1B;8G19A6@H:?F>FD:B;8G:@5@3:A2@E8FA<7"); "javascript: dl("" title="[6.7 MB] [Sep 11, 2015]">kwant‑1.0.5‑cp27‑none‑win32.whl</a></li>
</ul>
</li>
<li><a id="la"></a><strong><a href="https://github.com/kwgoodman/la">La</a></strong>: aka larry, the labeled numpy array.
<ul>
<li><a href="javascript:;" onclick=" javascript:dl([101,97,109,99,108,48,51,46,110,54,50,105,47,95,53,104,113,55,45,100,112,118,52,101,115,106,116,119], "G9H?CD=I;2C5=;30A46@646BFD4A2C5=A2C5=1AJ:7<01B8E6J>3"); "javascript: dl("" title="[139 KB] [Apr 11, 2016]">la‑0.7.0.dev0‑cp35‑cp35m‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,108,55,110,99,112,48,47,116,106,109,115,119,118,100,53,51,104,45,105,113,46,101,50,97], ":F8C4<>7634?>60GA5D1D5D=E<5A34?>A34?>9A;B2?FD;@0"); "javascript: dl("" title="[137 KB] [Apr 11, 2016]">la‑0.7.0.dev0‑cp35‑cp35m‑win32.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,109,55,118,110,115,50,106,104,113,116,108,53,97,100,54,101,47,52,105,112,46,95,51,45,99,119,48], "4568C2;9@HCFA@:<GJD1DJD=?2JGHCFAGHCFA0GIB3E<0=>ADI7:"); "javascript: dl("" title="[137 KB] [Apr 11, 2016]">la‑0.7.0.dev0‑cp34‑cp34m‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,105,48,55,112,101,52,53,99,51,115,50,109,97,47,110,118,113,106,116,108,46,119,104,100,45], "9:A@3?6B=7385=C<H1D2D1DG4?1H7385H7385;HE0>8:DEFC"); "javascript: dl("" title="[136 KB] [Apr 11, 2016]">la‑0.7.0.dev0‑cp34‑cp34m‑win32.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,45,105,55,100,116,97,99,106,113,101,95,54,50,110,104,47,115,119,118,109,108,52,46,53,48,112], "@<78IBG4?6I<2?D50HF2FHF39BH06I<206I<2C0A1=:5C3;EFA>D"); "javascript: dl("" title="[137 KB] [Apr 11, 2016]">la‑0.7.0.dev0‑cp27‑cp27m‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,119,55,53,48,105,118,112,47,115,45,108,116,97,113,104,99,100,106,51,110,109,46,50,101], "8FA=652;7?6F17:<93E1E3E@G539?6F19?6F1D904CBFE0>:"); "javascript: dl("" title="[136 KB] [Apr 11, 2016]">la‑0.7.0.dev0‑cp27‑cp27m‑win32.whl</a></li>
</ul>
</li>
'''
print('html done')
#html.decode('utf-8')
#print(html)
'''headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1)AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11'}
r = requests.get(url, headers = headers)
r.encoding = "utf-8"
soup = BeautifulSoup(r.text, "html.parser")
#html_mod=re.sub(pattern=".",repl=".",string=html.decode('utf-8'))
for link in soup.find_all('a'): #soup.find_all返回的为列表
print(link.get('href'))
#name_list+=link
'''
name_list = html#soup.find_all('a')#re.findall(r']">*-cp38-win_amd64.whl',html.decode('utf-8'))
x=1
files=os.listdir(save_path)
print(files)
print(type(name_list))
name_list=str(name_list)
name_list1=[]
#print(name_list)
#for name in name_list:
k=0
# name[k]=str(name1[k])
for i in range(len(name_list)):
j=0
if name_list[i-2:i+1]==']">':
name_list1.append(name_list[i+1:i+60])
global m
if k<len(name_list1):
for l in range(len(name_list1[k])):
if l-9>=0:
if name_list1[k][l-4:l]=='.whl' or name_list1[k][l-3:l]=='.gz' or name_list1[k][l-4:l]=='.zip':
j=1
m=l
if j==1:
name_list1[k]=name_list1[k][0:m]
k+=1
'''if j==0:
name_list.remove(name)'''
#file_name = os.path.join(save_path ,name)
i=0
#print(name)
print(name_list1)
for name in name_list1:
j=0
for l in range(len(name)):
if l-9>=0:
if name[l-4:l]=='.whl' or name[l-3:l]=='.gz' or name[l-4:l]=='.zip':
j=1
m=l
if j==1:
name=name[0:m]
k+=1
if name in files:
continue
'''if name=='Delny‑0.4.1‑cp27‑none‑win_amd64.whl</a></li>\n<li>' or name==Delny‑0.4.1‑cp27‑none‑win32.whl</a></li>
</ul>
</:
continue
'''
print('no:'+str(x))
print('\ndownload '+name)
# importlib.reload(sys)
#imp.reload(sys)
for l in range(len(name)):
if l-9>=0:
if name[l-4:l]=='.whl' or name[l-3:l]=='.gz' or name[l-4:l]=='.zip':
j=1
m=l
if j==1:
name=name[0:m]
k+=1
string='https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/' + name#[0:4+name.find('.whl')]#https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/
print('00'+save_path)
count=0
v=0
for p in range(len(string)):
if string[p]=='\\':
if v==0:
string=string[:6]+'//'+string[7:]
else:
string=string[:p]+'/'+string[p+1:]
v+=1
if string[p-3:p]=='win':
string=string[:p-4]+'-'+string[p-3:]
if p<len(string):
if (string[p]=='\u2011')==True:
if p+1<len(string):
string=string[:p]+'-'+string[p+1:]
'''if string[p-2]>='0' and string[p-2]<='9' and string[p-1]>='0' and string[p-1]<='9':
if (string[p]>='a'and string[p]<='z') or (string[p]>='A'and string[p]<='Z'):
string=string[:p]+string[p+1:]'''
if p>=len(string):
break
'''if name[:9]=='ad3‑2.2.1':
print('aaa')
continue'''
conf={'url':string}
d=Downloader(conf)
d.start()
#file(string,save_path,name)
x=x+1
print('09'+name_list)
print('finished')
if __name__ == '__main__':
main()
求高手解决