import requests
import re
url = "https://www.dytt8.net/index2.htm"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62"
}
resp = requests.get(url, headers=headers)
resp.encoding = "gb2312"
# print(resp.text)
obj1 = re.compile(r"最新影片推荐(?P<ul>.*?)</ul>", re.S)
obj2 = re.compile(r"<a href='(?P<href>.*?)'", re.S)
obj3 = re.compile(r'"◎译 名(?P<title>.*?)<br />.*?<font color=red>下载地址2:<a href="(?P<download>.*?)" '
r'target="_blank" title="迅雷电影">', re.S)
result1 = obj1.finditer(resp.text)
child_href_list = []
for it in result1:
ul = it.group("ul")
# print(ul)
result2 = obj2.finditer(ul)
for itt in result2:
child_href = url.strip("index2.htm") + itt.group('href').strip("/")
child_href_list.append(child_href)
# print(itt.group("href"))
for href in child_href_list:
child_resp = requests.get(href)
child_resp.encoding = "gb2312"
result3 = obj3.finditer(child_resp.text)
print(result3.group("download"))
报错信息如下
Traceback (most recent call last):
File "C:/Users/15348/PycharmProjects/untitled5/爬虫/ziji.py", line 27, in <module>
child_resp = requests.get(href)
File "C:\Python\Python37\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Python\Python37\lib\site-packages\requests\api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Python\Python37\lib\site-packages\requests\sessions.py", line 529, in request
resp = self.send(prep, **send_kwargs)
File "C:\Python\Python37\lib\site-packages\requests\sessions.py", line 639, in send
adapter = self.get_adapter(url=request.url)
File "C:\Python\Python37\lib\site-packages\requests\sessions.py", line 732, in get_adapter
raise InvalidSchema("No connection adapters were found for {!r}".format(url))
requests.exceptions.InvalidSchema: No connection adapters were found for 'ps://www.dytt8.net/html/gndy/dyzz/20211225/62153.html'
求哥们看看是啥问题,报错的网站点击打不开,但是复制到浏览器却能打开。。不知道咋回事,求解