from bs4 import BeautifulSoup #网页解析,获取数据
import re #正则表达式,进行文字匹配
import urllib.request,urllib.error #制定URL,获取网页数据
import xlwt #进行excel操作
import sqlite3 #进行SQLite数据库操作
def main():
baserl = 'https://movie.douban.com/top250?start='
url1 = getat(baserl)
fike = re.compile(r'<a href="(.*?)">')
def getat(baserl):
for i in range(0,10):
url = baserl+str(25*i)
html = gat(url)
soup = BeautifulSoup(html,'html.parser')
for item in soup('div',class_='item'):
item = str(item)
save = []
like = re.findall(fike,item)[0]
print(like)
def gat(url):
# global html
head = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36 Edg/91.0.864.48"
''}
a =urllib.request.Request(url,headers=head)
html = ''
try:
response = urllib.request.urlopen(a)
html = response.read().docode('utf-8')
except :
print('14')
return html
if __name__ == '__main__':
main()
print('爬完')
简单的except报错问题,次代码一直报错无法正常获取网址
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
2条回答 默认 最新
- 一只爱编程的书虫 2021-09-20 19:15关注
分析过程:
使用以下代码,可以追踪错误信息。from bs4 import BeautifulSoup #网页解析,获取数据 import re #正则表达式,进行文字匹配 import urllib.request,urllib.error #制定URL,获取网页数据 import xlwt #进行excel操作 import sqlite3 #进行SQLite数据库操作 def main(): baserl = 'https://movie.douban.com/top250?start=' url1 = getat(baserl) fike = re.compile(r'<a href="(.*?)">') def getat(baserl): for i in range(0,10): url = baserl+str(25*i) html = gat(url) soup = BeautifulSoup(html,'html.parser') for item in soup('div',class_='item'): item = str(item) save = [] like = re.findall(fike,item)[0] print(like) def gat(url): # global html head = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36 Edg/91.0.864.48" ''} a =urllib.request.Request(url,headers=head) html = '' try: response = urllib.request.urlopen(a) html = response.read().docode('utf-8') except Exception as e: print(e) return html if __name__ == '__main__': main() print('爬完')
输出:
'bytes' object has no attribute 'docode' 'bytes' object has no attribute 'docode' 'bytes' object has no attribute 'docode' 'bytes' object has no attribute 'docode' 'bytes' object has no attribute 'docode' 'bytes' object has no attribute 'docode' 'bytes' object has no attribute 'docode' 'bytes' object has no attribute 'docode' 'bytes' object has no attribute 'docode' 'bytes' object has no attribute 'docode' 爬完
一看就知道是打错了。
改正后代码:from bs4 import BeautifulSoup #网页解析,获取数据 import re #正则表达式,进行文字匹配 import urllib.request,urllib.error #制定URL,获取网页数据 import xlwt #进行excel操作 import sqlite3 #进行SQLite数据库操作 def main(): baserl = 'https://movie.douban.com/top250?start=' url1 = getat(baserl) fike = re.compile(r'<a href="(.*?)">') def getat(baserl): for i in range(0,10): url = baserl+str(25*i) html = gat(url) soup = BeautifulSoup(html,'html.parser') for item in soup('div',class_='item'): item = str(item) save = [] like = re.findall(fike,item)[0] print(like) def gat(url): # global html head = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36 Edg/91.0.864.48" ''} a =urllib.request.Request(url,headers=head) html = '' try: response = urllib.request.urlopen(a) html = response.read().decode('utf-8') except Exception as e: print(e) return html if __name__ == '__main__': main() print('爬完')
本人实测可正常执行。
本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 无用评论 打赏 举报
悬赏问题
- ¥15 lammps拉伸应力应变曲线分析
- ¥15 C++ 头文件/宏冲突问题解决
- ¥15 用comsol模拟大气湍流通过底部加热(温度不同)的腔体
- ¥50 安卓adb backup备份子用户应用数据失败
- ¥20 有人能用聚类分析帮我分析一下文本内容嘛
- ¥15 请问Lammps做复合材料拉伸模拟,应力应变曲线问题
- ¥30 python代码,帮调试
- ¥15 #MATLAB仿真#车辆换道路径规划
- ¥15 java 操作 elasticsearch 8.1 实现 索引的重建
- ¥15 数据可视化Python