python代码:
#!/usr/bin/env python
# coding=utf-8
#import importlib,sys
#import sys
#sys.setdefaultencoding('gbk')
import sys
import imp
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import jieba
import json
from bs4 import BeautifulSoup
import urllib.request
import urllib3
import re
import os,os.path
import codecs
import requests
def getHtml(url):
global html
page = urllib.request.urlopen(url)
html = page.read()
return html
def download_file(download_url,file_name):
print(download_url)
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1)AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11'}
#req = urllib.request(download_url,headers=headers)
response = urllib.request.urlopen(download_url)#(req)
file = open(file_name, 'wb')
file.write(response.read())
file.close()
print(file_name)
print("Completed : .... %d ..." % x)
save_path = 'E:\\2345Downloads'
url = 'https://www.lfd.uci.edu/'
html = getHtml(url)
html='''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<!-- saved from url=(0048)https://www.lfd.uci.edu/~gohlke/pythonlibs/#lxml -->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<li><a id="aiohttp"></a><strong><a href="https://github.com/KeepSafe/aiohttp/">Aiohttp</a></strong>: a http client/server for asyncio.
<ul>
<li><a href="javascript:;" onclick=" javascript:dl([101,111,51,46,118,106,97,54,53,100,95,119,52,110,56,45,99,104,112,116,47,113,108,109,105,50,115], "IH4DA37BC5G0@BBA>1262H>?A1=>?A1=>:G<95F86;2:@E"); "javascript: dl("" title="[631 KB] [Oct 10, 2019]">aiohttp‑3.6.2‑cp38‑cp38‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,118,53,99,112,47,113,51,106,105,45,54,116,110,50,56,111,104,119,46,108,115,97], "D=75301;4E8?@;;396B:B=9236>9236>9A8<6=BA@C"); "javascript: dl("" title="[600 KB] [Oct 10, 2019]">aiohttp‑3.6.2‑cp38‑cp38‑win32.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,118,47,100,45,97,109,52,99,115,53,104,106,108,54,116,51,55,95,105,46,110,112,50,111,119,113], "8F;IE09>14BG:>>E3?C=CF37E?@37E?@53HBDA452=6CH:<"); "javascript: dl("" title="[624 KB] [Oct 10, 2019]">aiohttp‑3.6.2‑cp37‑cp37m‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,118,99,45,104,112,116,119,47,113,109,97,110,50,106,111,105,115,108,46,53,55,51,54], "@<=840C57:?>35542EBFB<214ED214ED926?;E<B63A"); "javascript: dl("" title="[596 KB] [Oct 10, 2019]">aiohttp‑3.6.2‑cp37‑cp37m‑win32.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,50,109,54,104,47,112,52,115,51,99,110,108,118,119,97,46,95,113,53,100,45,106,116,111,105], "70EA5<BF4>HG3FF5D8?2?0D9582D95821D=H:@>1C26?=3;"); "javascript: dl("" title="[617 KB] [Oct 10, 2019]">aiohttp‑3.6.2‑cp36‑cp36m‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,113,111,54,115,47,109,46,45,104,118,119,50,105,99,108,97,106,110,53,116,112,51], "3;@0D9BC4?<18CCD7E626;7=DE27=DE257:<AE;6:8>"); "javascript: dl("" title="[590 KB] [Oct 10, 2019]">aiohttp‑3.6.2‑cp36‑cp36m‑win32.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,115,51,118,52,113,50,110,97,95,54,112,100,104,47,119,53,106,109,46,105,108,111,116,45,99], "05@4:2?F=H:1?=7CE<FF:G1B9B5GH:1?GH:1?AG>C687A;93B><D"); "javascript: dl("" title="[612 KB] [Oct 10, 2019]">aiohttp‑3.6.2‑cp35‑cp35m‑win_amd64.whl</a></li>
<li><a href="javascript:;" onclick=" javascript:dl([101,106,54,118,108,104,116,110,111,105,97,99,113,115,50,47,112,51,46,119,45,109,53], "<=0;?2E5>:?@E>987455?C@A1A=C:?@EC:?@EDCB86@=AB43"); "javascript: dl("" title="[584 KB] [Oct 10, 2019]">aiohttp‑3.6.2‑cp35‑cp35m‑win32.whl</a></li>
'''
print('html done')
#html.decode('utf-8')
#print(html)
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1)AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11'}
r = requests.get(url, headers = headers)
r.encoding = "utf-8"
soup = BeautifulSoup(r.text, "html.parser")
#html_mod=re.sub(pattern=".",repl=".",string=html.decode('utf-8'))
for link in soup.find_all('a'): #soup.find_all返回的为列表
print(link.get('href'))
#name_list+=link
name_list = html#soup.find_all('a')#re.findall(r']">*-cp38-win_amd64.whl',html.decode('utf-8'))
x=1
files=os.listdir(save_path)
print(files)
print(type(name_list))
name_list=str(name_list)
name_list1=[]
#print(name_list)
#for name in name_list:
global k
k=0
# name[k]=str(name1[k])
for i in range(len(name_list)):
j=0
if name_list[i-2:i+1]==']">':
name_list1.append(name_list[i+1:i+50])
global m
if k<len(name_list1):
for l in range(len(name_list1[k])):
if l-9>=0:
if name_list1[k][l-9:l]=='amd64.whl':
j=1
m=l
if j==1:
name_list1[k]=name_list1[k][0:m]
k+=1
'''if j==0:
name_list.remove(name)'''
#file_name = os.path.join(save_path ,name)
#print(name)
print(name_list1)
for name in name_list1:
if name in files:
continue
print('no:'+str(x))
print('\ndownload'+name)
# importlib.reload(sys)
#imp.reload(sys)
download_file('https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/'+name,save_path)
x=x+1
print(name_list)
print('finished')
运行之后就像这样
Python 2.7.17 (v2.7.17:c2f86d86e6, Oct 19 2019, 21:01:17) [MSC v.1500 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license()" for more information.
================== RESTART: E:\2345Downloads\111 - 副本.py ==================
运行helloworld没问题:
代码:
print('helloworld')
运行结果:
======================= RESTART: E:/2345Downloads/2.py =======================
helloworld