m0_60607535 2021-08-06 11:29 采纳率: 100%
浏览 41
已结题

python爬取需要登陆的网站的url应该是哪个?

我想要用cookie爬取一个网站,在输入cookie后,我的目标url应该是登陆界面(./login/)还是登陆后的界面(./htm-vessel/)(这是我想要的,但是爬取后是这样的

<!DOCTYPE html><html><head><meta charset=utf-8><meta http-equiv=X-UA-Compatible content="IE=edge,chrome=1"><meta name=viewport content="width=device-width,initial-scale=1"><meta name=description content=æºå­¦ç½æ以学ç¨çº¿å­¦ä¹ åäºé
å·æå¡ä¸ºä¸»ä½ï¼ä¸ºå¹¿å¤§å¸ç¾
µ·é颺ãé««æé
å·ãå¨çº¿è¯
          æµãé颧£æ学縪æ§å­¦ä¹ 中å¿ç­å½ç
                                           å¨çº¿æè²å¹³å°ä¸åºç¨å·¥å
·ã><link 
type=image/x-icon href=//static.zhixue.com/zhixue.ico rel="shortcut icon"><title></title><link href=//static.zhixue.com/static-vessel/1.0.1044/static/css/app.33179a9cf98ae8e6e8f0b1916bfe5766.css rel=stylesheet></head><body><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><style>a,body,dd,div,dl,dt,h1,i,img,li,span,ul{margin:0;padding:0}body,html{height:100%}body{background:#f2f2f2!important}.clearfix:after,.clearfix:before{content:"";display:table}.clearfix:after{clear:both;overflow:hidden}a,a:hover{text-decoration:none}dl,ul{list-style:none}img{border:0 none}img[src=""]{display:none}div[zxfebs=mainApp]{display:block;box-sizing:border-box;min-height:600px!important;min-height:calc(100% - 162px)!important}.header-app-container{width:100%;min-width:1200px;height:62px;font-family:"Microsoft YaHei",arial,SimSun,sans-serif,tahoma;font-size:14px;color:#333;background:#fff;border-bottom:3px solid #0dc2b3;box-sizing:border-box}.header-app-container .header-main{width:1200px;height:100%;margin:0 auto}.header-app-container .header-logo{float:left;margin-right:25px}.header-app-container .header-logo a{display:block;height:60px;overflow:hidden}.header-app-container .header-logo a span{display:table-cell;height:60px;vertical-align:middle}.header-app-container .header-logo a span img{width:auto;height:auto;vertical-align:middle;max-width:200px;max-height:50px}.header-app-container .header-nav{float:left;max-width:930px;height:100%;padding-right:32px;box-sizing:border-box;overflow:hidden}.header-app-container .header-nav li{position:relative;float:left;margin-right:25px}.header-app-container .header-nav li:last-child{margin-right:0}.header-app-container .header-nav li>a{display:inline-block;line-height:60px;font-size:16px;color:#333}.header-app-container .header-nav li>a.on,.header-app-container .header-nav li>a:hover{color:#0fc3b4}.header-app-container .header-nav li>i.hot{display:inline-block;position:absolute;top:3px;min-width:15px;height:18px;line-height:18px;padding:0 2px;font-size:12px;font-style:normal;color:#fff;background:#ff5e34;border-radius:3px;white-space:nowrap;text-align:center;z-index:2;margin-left:-10px}.header-app-container .header-nav li>i.hot:before{content:"";display:inline-block;position:absolute;top:18px;left:6px;width:0;height:0;border-top:3px solid #ff5e34;border-right:5px dashed transparent}.header-app-container .header-info{float:right;height:60px;position:relative;z-index:99}.header-app-container .header-info .user-info{display:block;font-size:14px;color:#333;padding:15px 0}.header-app-container .header-info .user-info>img{width:30px;height:30px;border-radius:15px;vertical-align:middle}.header-app-container .header-info .user-info>span{display:inline-block;max-width:70px;line-height:30px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;vertical-align:middle;margin-left:5px;margin-right:5px}.header-app-container .header-info .user-info>i{display:inline-block;width:0;height:0;vertical-align:middle;border-top:6px solid #999;border-left:6px solid transparent;border-right:6px solid transparent}.header-app-container .header-info .user-info>img[src=""]~i,.header-app-container .header-info .user-info>span:empty{display:none}.header-app-container .header-info .msg-tip{display:none;position:absolute;top:11px;left:24px;width:10px;height:10px;box-shadow:0 1px 2px #eee;background:#ff673e;border-radius:100%}.header-app-container .header-drop-more{display:none;font-size:14px;position:absolute;width:130px;background:#fff;border:1px solid #0dc2b3;border-radius:3px;text-align:center;left:50%;top:54px;margin-left:-66px;white-space:nowrap}.header-app-container .header-drop-more dt{display:none;text-align:center;margin-top:5px;background:#f5f8f9;white-space:normal}.header-app-container .header-drop-more dt a{display:inline-block;width:42px;line-height:24px;color:#222;border-radius:3px;margin:0 2px;vertical-align:middle}.header-app-container .header-drop-more dt a.on,.header-app-container .header-drop-more dt a:hover{color:#fff;background:#0dc2b3}.header-app-container .header-drop-more dd{position:relative;border-bottom:1px solid #eee}.header-app-container .header-drop-more dd:last-child{border-bottom:none}.header-app-container .header-drop-more dd>a{display:block;line-height:38px;color:#333}.header-app-container .header-drop-more dd>a:hover{color:#fff;background:#0dc2b3}.header-app-container .header-drop-more dd>a.msg.on::after{content:'';display:inline-block;position:absolute;width:5px;height:5px;background:#ff673e;border-radius:100%;top:5px;right:20px}.header-app-container .header-package{display:none;float:right;font-size:14px;color:#fff;padding:0 5px;line-height:22px;background:#fdcb65;overflow:hidden;border-radius:3px;margin-top:19px;margin-right:15px}.header-app-container .header-info:hover .header-drop-more{display:block}.header-app-container .header-info:hover .drop-arrow{border-top:0;border-bottom:6px solid #999}</style><div id="headerApp" class="header-app-container" style="display:none"><div class="header-main clearfix"><h1 class="header-logo"><a id="headerLogo" href="javascript:"><span><img id="headerLogoImg" src></span></a></h1><ul class="header-nav" id="headerNav"></ul><div class="header-info"><a class="user-info" href="javascript:;"><img id="userFace" src> <span id="userName"></span> <i 
class="drop-arrow"></i> </a><span class="msg-tip" id="headerSysMsg"></span><dl class="header-drop-more" id="headerDropMore"><dt id="roleSwitch"></dt><dd data-type="account"><a class="account" target="_blank">è´¦å·è®¾ç½®</a></dd><dd data-type="school"><a class="school" target="_blank">å­
¦æ ¡ç®¡ç</a></dd><dd data-type="person"><a class="person" href="/portalcenter/home/index/?from=web-container_top" target="_blank">å
¶å®è®¾ç½®
</a></dd><dd data-type="msg"><a class="msg" href="/htm-msg-center/#/message-center" target="_blank">æ¶æ¯ä¸­å¿</a></dd><dd data-type="central"
><a href="/container/container/identifyIndex" target="_blank">å®è®¤è¯</a></dd><dd><a class="out" href="javascript:;">éåºç»å½</a></dd><
/dl></div><div class="header-package" id="headerPackage"></div></div></div><script src="https://www.jyyun.com/ebgnavigation/ebgnavigation.min.js"></script><script>!function(){"use strict";function t(){return-1<["localhost","zhixue.com","zxct.zhixue.com","test.zhixue.com","test02.zhixue.com","onpre.zhixue.com","www.zhixue.com","edc.zhixue.com","exam.zhixue.com","examtest.zhixue.com"].indexOf(window.location.hostname)}function u(t){if("string"!=typeof t||!t)return t;try{return JSON.parse(t)}catch(e){return console.error("JSON.parse error: ",e),t}}function i(e){var t=(e||"").toUpperCase();return 0==t.indexOf("ZX_")?t:"ZX_COMMON_HEADER_"+t}function r(e,t){try{window.sessionStorage.setItem(i(e),JSON.stringify(t))}catch(e){console.error("set sessionStorage error: ",e)}}function a(e){try{return u(window.sessionStorage.getItem(i(e)))}catch(e){return console.error("get sessionStorage error: ",e),null}}function c(e){try{window.sessionStorage.removeItem(i(e))}catch(e){console.error("remove sessionStorage error: ",e)}}function n(e){for(var t=e+"=",n=document.cookie.split(";"),o=0;o<n.length;o++){for(var i=n[o];" "===i.charAt(0);)i=i.substring(1);if(-1!==i.indexOf(t))return i.substring(t.length,i.length)}return""}function l(e,t,n,o){document.cookie=e+"="+t+(n?";domain="+n:"")+(o?";path="+o:"")}function s(e){if(e&&0!==e.length)if(e.length)for(var t=0;t<e.length;t++)e[t].style.display="none";else e.style.display="none"}function d(i){(i=i||{}).data=i.data||{};var r=null,a=i.timeout||3e3;function c(e){var t=[];for(var n in e)e.hasOwnProperty(n)&&t.push(encodeURIComponent(n)+"="+encodeURIComponent(e[n]));return t.push("t="+(new Date).getTime()),t.join("&")}function l(t){try{delete window[t]}catch(e){window[t]=void 0}}(i.jsonp?function(){var t="jsonp"+Math.ceil(1e6*Math.random()),e=i.data;e.callback=t;var n=document.getElementsByTagName("head")[0],o=document.createElement("script");o.setAttribute("src",i.url+"?"+c(e)),window[t]=function(e){r&&clearTimeout(r),i.success&&i.success(e),n.removeChild(o),l(t)},n.appendChild(o),r=setTimeout(function(){i.error&&i.error("time out"),l(t),n.removeChild(o)},a)}:function(){i.type=(i.type||"GET").toUpperCase(),i.dataType=i.dataType||"json";var e=c(i.data),n=new XMLHttpRequest;if(!n)throw new Error("you browser do not suppot XMLHttpRequest");"GET"===i.type?(n.open("GET",i.url+"?"+e,!0),n.send(null)):(n.open("POST",i.url,!0),n.setRequestHeader("Content-Type","application/x-www-form-urlencoded;charset=UTF-8"),n.send(e)),n.onreadystatechange=function(){var e,t;4===n.readyState&&(r&&clearTimeout(r),200<=n.status&&n.status<300?("",t=-1!==(e=n.getResponseHeader("Content-ty


用的库是requests

  • 写回答

1条回答 默认 最新

  • 喜欢摸鱼的程序员 2021-08-06 11:37
    关注

    cookie目的就是为了保存登录状态,
    即基于cookie模拟登录,headers中带上了cookie就是相当于在登录状态请求页面了,
    所以直接请求登录后要请求的页面即可。

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

    报告相同问题?

    问题事件

    • 系统已结题 8月14日
    • 已采纳回答 8月6日
    • 创建了问题 8月6日

    悬赏问题

    • ¥15 利用加权最小二乘法求亚马逊各类商品的价格指标?怎么求?
    • ¥15 c++ word自动化,为什么可用接口是空的?
    • ¥15 Matlab计算100000*100000的矩阵运算问题:
    • ¥50 VB6.0如何识别粘连的不规则的数字图片验证码
    • ¥16 需要完整的这份订单所有的代码,可以加钱
    • ¥30 写一个带界面控制的机房电脑一键开机关机并且实时监控的软件
    • ¥15 Stata数据分析请教
    • ¥15 请教如何为VS2022搭建 Debug|win32的openCV环境?
    • ¥15 关于#c++#的问题:c++如何使用websocketpp实现websocket接口调用,求示例代码和相关资料
    • ¥15 51单片机的外部中断,按下按键后不能切换到另一个模式