2 zz45509 zz45509 于 2015.07.06 17:18 提问

python模拟浏览器登录,登录成功后的操作无效,dai码贴出,请帮我看看. 50C
#coding=utf-8

import urllib
import urllib2
import cookielib
import re
import sys

reload(sys)
sys.setdefaultencoding("utf-8")

#登录页面,获取登录所需cookie
print u'打开登录页面'
url = "https://www.immigration.govt.nz/secure/Login+Working+holiday.htm"

cookiejar = cookielib.CookieJar()
urlOpener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))

headers = {
'Accept':'text/html, application/xhtml+xml, */*',
'X-HttpWatch-RID': '9765-10012',
'Referer': 'https://www.immigration.govt.nz/',
'Accept-Language': 'zh-CN',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',
# 'Content-Type': 'application/x-www-form-urlencoded',
'Accept-Encoding': 'gzip, deflate',
'Host': 'www.immigration.govt.nz',
# 'Content-Length': '382',
'Connection':'Keep-Alive',
'Cache-Control': 'no-cache'
}

request = urllib2.Request(url,headers=headers)
opener = None
opener = urlOpener.open(request)

#解析隐藏字段 获取认证字符 cookie里面有一项 immigrationAuth,必须携带此项才能登录成功
print u"获取认证字符"
htmlCode = opener.read()
partten = re.compile(r"<[ ]*input[ ]+.*?>")
tags = re.findall(partten, htmlCode)
partten = re.compile(r"<[ ]*input[ ]+.*name[ ]*=[ ]*[\"|\'](.*?)[\"|\'][ ]+.*value[ ]*=[ ]*[\"|\'](.*?)[\"|\'].*?>")

data = {}
for tag in tags:
    param = re.findall(partten, tag)
    if param:
        data[param[0][0]] = param[0][1]
data['OnlineServicesLoginStealth:VisaLoginControl:passwordTextBox'] = 'Zz45509'
data['OnlineServicesLoginStealth:VisaLoginControl:userNameTextBox'] = 'testing___9'
data['VisaDropDown'] = '/secure/Login+Working+Holiday.htm'
data['OnlineServicesLoginStealth:VisaLoginControl:loginImageButton.x'] = '21'
data['OnlineServicesLoginStealth:VisaLoginControl:loginImageButton.y'] = '15'
data['HeaderCommunityHomepage:SearchControl:txtSearchString'] = ''
params = urllib.urlencode(data)

#print params
url = "https://www.immigration.govt.nz/Templates/Secure/Login.aspx?NRMODE=Published&NRNODEGUID=%7bB9707666-55BB-49F9-BA1E-7341EA3B877C%7d&NRORIGINALURL=%2fsecure%2fLogin%2bWorking%2bholiday%2ehtm&NRCACHEHINT=Guest"
request = urllib2.Request(url,headers=headers)
opener = urlOpener.open(request, params)

cookies = ''
cookieList = ['ASP.NET_SessionId','ImmigrationAuth','TS0120d49b']
for cookie in cookiejar:
    if cookie.name in cookieList:
        cookies = cookies+cookie.name+"="+cookie.value+";";
        print cookie.name+"="+cookie.value
cookie = cookies[:-1] #delete the last character


#携带认证登录
print u"携带认证登录"
url = 'http://www.immigration.govt.nz/migrant/default.htm'
request = urllib2.Request(url,headers=headers)
request.add_header("cookie",cookie)
opener = urlOpener.open(request, params)

value = opener.read().find("username")
if value > -1:
    print u"登录成功"

cookies = ''
cookieList = ['ASP.NET_SessionId','ImmigrationAuth','TS0120d49b']
for cookie in cookiejar:
    if cookie.name in cookieList:
        cookies = cookies+cookie.name+"="+cookie.value+";";
        print cookie.name+"="+cookie.value
cookie = cookies[:-1] #delete the last character

#打开提交页面,就是此处登录成功后打开这个页面失败,但是手动打开网页,单独写一个脚本,再把cookie复制进去,却可以成功打开这个页面
#请教过其他人,是说登录的时候,js生成了一个csrf_token,必须计算出这个token,才能登录成功后继续操作
print u"打开提交页面"
url = 'https://www.immigration.govt.nz/WorkingHoliday/Application/Submit.aspx?ApplicationId=1302903'
request = urllib2.Request(url)  
request.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0") 
request.add_header("cookie",cookie)
request.add_header('Host','www.immigration.govt.nz')
request.add_header('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
request.add_header('Accept-Language','zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3')
request.add_header('Accept-Encoding','gzip, deflate')
request.add_header('Connection','keep-alive')
opener = None
opener = urlOpener.open(request)
print opener.read().find("IMPORTANT NOTE")
if opener.read().find("IMPORTANT NOTE") == -1 :
    print u"打开提交页面失败"



打印信息
图片说明

5个回答

oyljerry
oyljerry   Ds   Rxr 2015.07.06 17:27

你看到csrf_token就知道它就是用来防止进行跨站攻击的。它页面做了安全检测,应该是用Javascript等做了动态计算,从而会生成一个额外的token,防止你直接使用cookie

因为 “**但是手动打开网页,单独写一个脚本,再把cookie复制进去,却可以成功打开这个页面**”

那么可能你可以用selenium等来直接用浏览器控件加载页面,然后再设置cookie等。

zz45509
zz45509 那就是始终要设计到GUI的操作,我想完全后台实现
2 年多之前 回复
devmiao
devmiao   Ds   Rxr 2015.07.06 21:40

这个你用fiddler仔细比照下你的程序和浏览器的有什么不同就可以了。

zz45509
zz45509 不知道你所说的不同指哪里不同,程序里面的请求头,post参数都是抓包得到的
2 年多之前 回复
save4me
save4me   Ds   Rxr 2015.07.07 12:11

检查一下向服务器发送请求的时候,看看cookie里是否包含asp.net_Sessionid,可能是因为cookie的httponly设置导致的

zz45509
zz45509 肯定是包含这个sessionId的,所有cookie都不是httponly
2 年多之前 回复
panda620
panda620   2015.07.29 07:32

我以前也做过类似的登录没有成功 现在直接用selenium来模拟

frank_20080215
frank_20080215   2015.07.06 18:06

居然访问ASP.NET,令牌安全属性没有设置。

Csdn user default icon
上传中...
上传图片
插入图片