zz45509 2015-07-06 09:18 采纳率: 0%
浏览 3037
已结题

python模拟浏览器登录,登录成功后的操作无效,dai码贴出,请帮我看看.

#coding=utf-8

import urllib
import urllib2
import cookielib
import re
import sys

reload(sys)
sys.setdefaultencoding("utf-8")

#登录页面,获取登录所需cookie
print u'打开登录页面'
url = "https://www.immigration.govt.nz/secure/Login+Working+holiday.htm"

cookiejar = cookielib.CookieJar()
urlOpener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))

headers = {
'Accept':'text/html, application/xhtml+xml, */*',
'X-HttpWatch-RID': '9765-10012',
'Referer': 'https://www.immigration.govt.nz/',
'Accept-Language': 'zh-CN',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',
# 'Content-Type': 'application/x-www-form-urlencoded',
'Accept-Encoding': 'gzip, deflate',
'Host': 'www.immigration.govt.nz',
# 'Content-Length': '382',
'Connection':'Keep-Alive',
'Cache-Control': 'no-cache'
}

request = urllib2.Request(url,headers=headers)
opener = None
opener = urlOpener.open(request)

#解析隐藏字段 获取认证字符 cookie里面有一项 immigrationAuth,必须携带此项才能登录成功
print u"获取认证字符"
htmlCode = opener.read()
partten = re.compile(r"<[ ]*input[ ]+.*?>")
tags = re.findall(partten, htmlCode)
partten = re.compile(r"<[ ]*input[ ]+.*name[ ]*=[ ]*[\"|\'](.*?)[\"|\'][ ]+.*value[ ]*=[ ]*[\"|\'](.*?)[\"|\'].*?>")

data = {}
for tag in tags:
    param = re.findall(partten, tag)
    if param:
        data[param[0][0]] = param[0][1]
data['OnlineServicesLoginStealth:VisaLoginControl:passwordTextBox'] = 'Zz45509'
data['OnlineServicesLoginStealth:VisaLoginControl:userNameTextBox'] = 'testing___9'
data['VisaDropDown'] = '/secure/Login+Working+Holiday.htm'
data['OnlineServicesLoginStealth:VisaLoginControl:loginImageButton.x'] = '21'
data['OnlineServicesLoginStealth:VisaLoginControl:loginImageButton.y'] = '15'
data['HeaderCommunityHomepage:SearchControl:txtSearchString'] = ''
params = urllib.urlencode(data)

#print params
url = "https://www.immigration.govt.nz/Templates/Secure/Login.aspx?NRMODE=Published&NRNODEGUID=%7bB9707666-55BB-49F9-BA1E-7341EA3B877C%7d&NRORIGINALURL=%2fsecure%2fLogin%2bWorking%2bholiday%2ehtm&NRCACHEHINT=Guest"
request = urllib2.Request(url,headers=headers)
opener = urlOpener.open(request, params)

cookies = ''
cookieList = ['ASP.NET_SessionId','ImmigrationAuth','TS0120d49b']
for cookie in cookiejar:
    if cookie.name in cookieList:
        cookies = cookies+cookie.name+"="+cookie.value+";";
        print cookie.name+"="+cookie.value
cookie = cookies[:-1] #delete the last character


#携带认证登录
print u"携带认证登录"
url = 'http://www.immigration.govt.nz/migrant/default.htm'
request = urllib2.Request(url,headers=headers)
request.add_header("cookie",cookie)
opener = urlOpener.open(request, params)

value = opener.read().find("username")
if value > -1:
    print u"登录成功"

cookies = ''
cookieList = ['ASP.NET_SessionId','ImmigrationAuth','TS0120d49b']
for cookie in cookiejar:
    if cookie.name in cookieList:
        cookies = cookies+cookie.name+"="+cookie.value+";";
        print cookie.name+"="+cookie.value
cookie = cookies[:-1] #delete the last character

#打开提交页面,就是此处登录成功后打开这个页面失败,但是手动打开网页,单独写一个脚本,再把cookie复制进去,却可以成功打开这个页面
#请教过其他人,是说登录的时候,js生成了一个csrf_token,必须计算出这个token,才能登录成功后继续操作
print u"打开提交页面"
url = 'https://www.immigration.govt.nz/WorkingHoliday/Application/Submit.aspx?ApplicationId=1302903'
request = urllib2.Request(url)  
request.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0") 
request.add_header("cookie",cookie)
request.add_header('Host','www.immigration.govt.nz')
request.add_header('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
request.add_header('Accept-Language','zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3')
request.add_header('Accept-Encoding','gzip, deflate')
request.add_header('Connection','keep-alive')
opener = None
opener = urlOpener.open(request)
print opener.read().find("IMPORTANT NOTE")
if opener.read().find("IMPORTANT NOTE") == -1 :
    print u"打开提交页面失败"



打印信息
图片说明

  • 写回答

5条回答

  • oyljerry 2015-07-06 09:27
    关注

    你看到csrf_token就知道它就是用来防止进行跨站攻击的。它页面做了安全检测,应该是用Javascript等做了动态计算,从而会生成一个额外的token,防止你直接使用cookie

    因为 “**但是手动打开网页,单独写一个脚本,再把cookie复制进去,却可以成功打开这个页面**”

    那么可能你可以用selenium等来直接用浏览器控件加载页面,然后再设置cookie等。

    评论

报告相同问题?

悬赏问题

  • ¥15 链接问题 C++LNK2001 无法解析的外部符号
  • ¥50 安装pyaudiokits失败
  • ¥15 计组这些题应该咋做呀
  • ¥60 更换迈创SOL6M4AE卡的时候,驱动要重新装才能使用,怎么解决?
  • ¥15 让node服务器有自动加载文件的功能
  • ¥15 jmeter脚本回放有的是对的有的是错的
  • ¥15 r语言蛋白组学相关问题
  • ¥15 Python时间序列如何拟合疏系数模型
  • ¥15 求学软件的前人们指明方向🥺
  • ¥50 如何增强飞上天的树莓派的热点信号强度,以使得笔记本可以在地面实现远程桌面连接