2条回答 默认 最新
素影·流年 2023-01-13 20:53关注参考一下,望采纳
import requests import re import os import wget import threading import time import random Lock = threading.Lock() post_dict = { 'catalogue_id': '', 'name':'', 'comments':'', 'var_nonvariable': 'on', 'var_variable':'on', 'var_periodic':'on', 'var_pmin':'', 'var_pmax':'', 'action':'search', 'type_0':'on', 'type_1':'on', 'type_2':'on', 'type_3':'on', 'type_4':'on', 'type_5':'on', 'type_6':'on', 'type_7':'on', 'orb_incl_min':'', 'orb_incl_max':'', 'orb_period_min':'', 'orb_period_max':'' } header = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'Cookie':'csrftoken=9jhoS62afWsXqv1DisuneTFkmWySczcN', 'Host':'mmt.favor2.info', 'Referer': 'http://mmt.favor2.info/satellites', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.76' } def SetDict(id): global post_dict post_dict['catalogue_id'] = str(id) def Download(url,savedir): print('%s 正在下载 将保存至 %s\n' % (url.strip(),savedir),end = '') wget.download(url,out = savedir) print('%s 已下载完毕 已保存至 %s\n' % (url.strip(),savedir),end = '') def checkStatus(track_id): print('正在检查: %s\n' % (track_id),end = '') headers= { "User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; Tablet PC 2.0; wbx 1.0.0; wbxapp 1.0.0; Zoom 3.6.0)" } response = requests.get('http://mmt.favor2.info/satellites/track/%s' % (str(track_id)),headers = headers) text = response.text.encode(response.encoding).decode(response.apparent_encoding) tag = "<span class=\"text-default\">Periodic</span></td></tr><tr><td>Lightcurve period" if tag in text: global Lock global download print('%s 已通过检查\n' % (track_id),end = '') Lock.acquire() download.append(track_id) Lock.release() else: print('%s 未通过检查\n' % (track_id),end = '') def LoadPage(url,savedir): headers= { "User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; Tablet PC 2.0; wbx 1.0.0; wbxapp 1.0.0; Zoom 3.6.0)" } response = requests.get(url,headers = headers) text = response.text.encode(response.encoding).decode(response.apparent_encoding) result = re.findall('<a href=\"/satellites/track/(.*)/download\" title=\"Downoad track\">T</a>',text) print('%s 中的下载地址已获取未筛选id(共%d项)' % (url,len(result))) threads = [] global download download = [] for sid in result: time.sleep(random.randint(1,10) / 10) thd = threading.Thread(target = checkStatus,args = (sid,)) thd.start() threads.append(thd) while len(threads) != 0: threads[0].join() threads.pop(0) print('%s 中的下载地址已获取以筛选的id(共%d项)' % (url,len(download))) for sid in download: time.sleep(random.randint(1,10) / 10) thd = threading.Thread(target = Download,args = ('http://mmt.favor2.info/satellites/track/%s/download\n' % (sid),os.path.join(savedir,'track_%s.txt' % (sid)),)) thd.start() threads.append(thd) while len(threads) != 0: threads[0].join() threads.pop(0) def LoadDownSatelites(id,savedir): SetDict(id) global post_dict response = requests.post("http://mmt.favor2.info/satellites",post_dict) ## with open("test.html","w") as f: ## f.write(response.text) ## with open("test.html","r") as f: ## text = f.read() text = response.text.encode(response.encoding).decode(response.apparent_encoding) sid = re.findall('<a href=\"/accounts/login/\?next=/satellites/(.*)\">Log in</a></li>',text)[0] page = len(re.findall('/satellites/%s\?page=.' % (sid),text)) + 1 print('查找到%d对应的编号%s,共%d页准备下载' % (id,sid,page)) if not os.path.exists(os.path.join(savedir,str(id))): print('%s不存在,程序已自动创建' % (os.path.join(savedir,str(id)))) os.makedirs(os.path.join(savedir,str(id))) for pg in range(1,page + 1): LoadPage('http://mmt.favor2.info/satellites/%s?page=%d' % (sid,pg),os.path.join(savedir,str(id))) LoadDownSatelites(163,'./Data')本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 无用评论 打赏 举报