代码如下:
#!/usr/bin/env python
# coding=utf-8
#import importlib,sys
#import sys
#sys.setdefaultencoding('gbk')
from urllib.parse import quote
'''import sys
import imp
import sys
reload(sys)
sys.setdefaultencoding('utf8')
'''
'''
import urllib
import urllib2
import requests
import sys
sys.setdefaultencoding('utf-8')
import jieba
import json'''
#from bs4 import BeautifulSoup
import urllib.request
import urllib.parse as parse
import ssl
import re
import os,os.path
import codecs
import requests
def getText(html):
'''headers = {'Host': 'https://pypi.org','User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER','Referer': 'https://pypi.org/search/?c=Programming+Language+%3A%3A+Python+%3A%3A+3',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch, br',
'Accept-Language': 'zh-CN,zh;q=0.8'}
#req = urllib.urlretrieve(download_url,headers=headers)
'''
#urllib.request.urlopen('https://www.lfd.uci.edu/~gohlke/pythonlibs/')
#req = urllib.request.Request(url=url,headers=header)
#headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
#import requests
res = requests.get(html)
res.encoding = 'utf-8'
#print(res.text)
words=res.text
'''
soup = BeautifulSoup(res.text, "html.parser")
words = ""
for a1 in soup.find_all("a"):
words = words + str(a1.string)
'''
return words
def file(url1,file_name,name):
print(url1)
headers = {'Host': 'https://files.pythonhosted.org/packages/','User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER','Referer': 'https://pypi.org/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch, br',
'Accept-Language': 'zh-CN,zh;q=0.8'}
#req = urllib.urlretrieve(download_url,headers=headers)
#urllib.request.urlopen('https://www.lfd.uci.edu/~gohlke/pythonlibs/')
#req = urllib.request.Request(url=url,headers=header)
request = requests.get(url=url1,headers=headers)
#response = urllib.request.urlopen(request)
global i
i += 1
print(request.content)
file = open(name ,'wb+')
file.write(request.content)
file.close()
print(file_name)
print("Completed : .... %d ..." % x)
'''for i in range(len(name_list)):
j=0
if name_list[i-24:i+1]=='https://pypi.org/project/':
name_list1.append(name_list[i+1:i+60])'''
def get(url):
global name_list1
res=getText(url)
#print('\n\n\n\n\n\nok\n\n\n\n\n\n\n\n\n\n')
#name_list = getText(url)
#print(res)
print('html done,page:'+str(count)+'\n')
for i in range(len(res)):
#j=0
if (res[i-8:i+1]=='/project/')==True:
name_list1.append('https://pypi.org'+res[i-8:i+20])
#print(name_list1)
def trim(list1):
k=0
list2=[]
for i in list1:
j=25
while j<len(list1[k]):
if list1[k][j]=='/':
list2.append(list1[k][0:j])
break
j+=1
k+=1
return list2
def get1(url):
"""o=0
for n in len(url):
if url[n]=='"':
url=url[0:n-1]+'#files'
"""
global namelist
url=url+'#files'
#import requests
res = requests.get(url)
res.encoding = 'utf-8'
#print(res.text)
html=res.text
for p in range(len(html)):
stri='https://files'
if html[p-len(stri):p]==stri:
namelist.append(html[p-len(stri):p+170])
import httplib2 as httplib
httplib.HTTPConnection._http_vsn = 10
httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'
'''
#-*- coding:utf-8 -*-
import time
import hmac
import hashlib
import requests
import json
import mysql.connector
import requests
import httplib2 as httplib
import urllib
from urllib import unquote
import json
def query_total_flow():
header = {"Content-Type": "application/json", 'Connection': 'close', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
post_data = {
"operator": "xxxxxx", # 调用系统的名字
"type": "set",
"set_id": [1],
"set_name": [],
"feature_type": ["入流量"],
"date": "2019-06-15",
"begintime": "23:55",
"endtime": "23:59",
}
url = "http://xxx.xxx.xxx.xxx:80/xxxxx/xxxxx/xxxxx.cgi/json?"
post_data = json.dumps(post_data, ensure_ascii=False, separators=(',',':'))
print (post_data)
# url = url + post_data
url = url + urllib.urlencode({'data': post_data})
# data = urllib.urlencode({'data': post_data})
# print post_data
# print data
data = unquote(url)
try:
# print data
print (data)
response = requests.get(url, json=data, timeout=60, headers=header)
print (response.headers)
print (response.url)
print (response.text.encode('utf-8'))
if response['code'] != 0:
result_dict = json.loads(response.text)
data = result_dict["data"]
print(data)
print(data)
set_info = []
return response
raise exp_with_err_instance(err.RESULT_ERROR, 'can not find inst info')
print ("none!")
return []
except Exception as e:
print ("Exception")
raise
if __name__ == "__main__":
query_total_flow()
'''
save_path = os.getcwd()
'''
url = 'https://pypi.org/search/?c=Programming+Language+%3A%3A+Python+%3A%3A+3'
name_list = getText(url)
print(name_list)
print('html done')
#html.decode('utf-8')
#print(name_list)'''
x=1
files=os.listdir(save_path)
#print(files)
#print(type(name_list))
name_list1=[]
#print(name_list)
#for name in name_list:
k=0
# name[k]=str(name1[k])
'''for i in range(len(name_list)):
j=0
if name_list[i-25:i+1]=='https://pypi.org/project/':
name_list1.append(name_list[i-25:i+20])
for u in range(len(name_list1[len(name_list1)])):
if name_list1[len(name_list1)][u]==' ':
name_list1[len(name_list1)]=name_list1[len(name_list1)][0:u-1]
'''
global count
count=2
name_list1=[]
for count in range(51):
get('https://pypi.org/search/?c=Programming+Language+%3A%3A+Python+%3A%3A+3&page='+str(count))
""" global m
if k<len(name_list1):
for l in range(len(name_list1[k])):
if l-9>=0:
if name_list1[k][l-4:l]=='.whl' or name_list1[k][l-3:l]=='.gz' or name_list1[k][l-4:l]=='.zip':
j=1
m=l
if j==1:
name_list1[k]=name_list1[k][0:m]
k+=1"""
'''if j==0:
name_list.remove(name)'''
#file_name = os.path.join(save_path ,name)
#i=0
#print(name)
#print(name_list1)
namelist=[]
h=0
for y in trim(name_list1):
get1(y)
#print(namelist)
'''if h==3:
break'''
h+=1
i=0
for name in namelist:
j=0
for l in range(len(name)):
if l-9>=0:
if name[l-4:l]=='.whl' or name[l-3:l]=='.gz' or name[l-4:l]=='.zip':
j=1
m=l
break
if j==1:
name=name[0:m]
k+=1
while m>0:
if m<len(name):
if name[m]=='/':
filename=name[m+1:len(name)]#p]
break
m-=1
if filename in files:
continue
'''if name=='Delny‑0.4.1‑cp27‑none‑win_amd64.whl</a></li>\n<li>' or name==Delny‑0.4.1‑cp27‑none‑win32.whl</a></li>
</ul>
</:
continue
'''
print('no:'+str(x))
print('\ndownload '+name)
# importlib.reload(sys)
#imp.reload(sys)
for l in range(len(name)):
if l-9>=0:
if name[l-4:l]=='.whl' or name[l-3:l]=='.gz' or name[l-4:l]=='.zip':
j=1
m=l
break
if j==1:
name=name[0:m]
k+=1
p=m
#string='https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/' + name#[0:4+name.find('.whl')]#https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/
print('00'+save_path)
#file(name,save_path,filename)
url1=name +'/' + filename
url1=url1.encode()
name=filename
file_name=save_path
#file = open(name ,'wb+')
#file.write(url1 )
#file.close()
#print(file_name)
headers = {'Host': 'https://files.pythonhosted.org/packages/','User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER','Referer': 'https://pypi.org/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch, br',
'Accept-Language': 'zh-CN,zh;q=0.8'}
#req = urllib.urlretrieve(download_url,headers=headers)
#urllib.request.urlopen('https://www.lfd.uci.edu/~gohlke/pythonlibs/')
#req = urllib.request.Request(url=url,headers=header)
#request = urllib.request.urlopen(url1)
#response = urllib.request.urlopen(request)
urllib.request.urlretrieve(url1.decode(),name)
i += 1
print(url1.decode())
#file = open(name ,'wt+')
#file.write(str(req.content()))
#file.close()
print(file_name)
print("Completed : .... %d ..." % x)
'''for i in range(len(name_list)):
j=0
if name_list[i-24:i+1]=='https://pypi.org/project/':
name_list1.append(name_list[i+1:i+60])'''
print('\n........'+filename+'..........complete\n')
x=x+1
print('09')
print('finished')
报错:
Python 3.8.0 (tags/v3.8.0:fa919fd, Oct 14 2019, 19:37:50) [MSC v.1916 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license()" for more information.
===================== RESTART: E:\2345Downloads\版本2下载whl.py ====================
Traceback (most recent call last):
File "E:\2345Downloads\版本2下载whl.py", line 154, in
httplib.HTTPConnection._http_vsn = 10
AttributeError: module 'httplib2' has no attribute 'HTTPConnection'如果不加
import httplib2 as httplib
httplib.HTTPConnection._http_vsn = 10
httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'
就会
Traceback (most recent call last):
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 665, in urlopen
httplib_response = self._make_request(
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
six.raise_from(e, None)
File "", line 3, in raise_from
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 416, in _make_request
httplib_response = conn.getresponse()
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 1322, in getresponse
response.begin()
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 303, in begin
version, status, reason = self._read_status()
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 264, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\socket.py", line 669, in readinto
return self._sock.recv_into(b)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\contrib\pyopenssl.py", line 318, in recv_into
raise SocketError(str(e))
OSError: (10060, 'WSAETIMEDOUT')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\adapters.py", line 439, in send
resp = conn.urlopen(
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 719, in urlopen
retries = retries.increment(
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\util\retry.py", line 400, in increment
raise six.reraise(type(error), error, _stacktrace)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\packages\six.py", line 734, in reraise
raise value.with_traceback(tb)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 665, in urlopen
httplib_response = self._make_request(
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
six.raise_from(e, None)
File "", line 3, in raise_from
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 416, in _make_request
httplib_response = conn.getresponse()
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 1322, in getresponse
response.begin()
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 303, in begin
version, status, reason = self._read_status()
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 264, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\socket.py", line 669, in readinto
return self._sock.recv_into(b)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\contrib\pyopenssl.py", line 318, in recv_into
raise SocketError(str(e))
urllib3.exceptions.ProtocolError: ('Connection aborted.', OSError("(10060, 'WSAETIMEDOUT')"))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "E:\2345Downloads\版本2下载whl.py", line 282, in
get1(y)
File "E:\2345Downloads\版本2下载whl.py", line 141, in get1
res = requests.get(url)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', OSError("(10060, 'WSAETIMEDOUT')"))
求高手解决