自己写了一个爬虫用来查看自己在某个论坛的金币数量,我有两个号,当我在我自己的浏览器上登陆大号后再用我的爬虫去查询大号和小号金币数量的时候,查询出来的都是我大号的金币数量。请问这种情况该怎么修改我的代码来实现大号和小号都能分别查询
("小号到底有没有登陆上去"这方面应该没有问题,因为我有另外一个用来在这个论坛上面签到的爬虫,这个爬虫是在那个的上面更改的,所以小号应该在爬虫内应该已经登陆)(但是我比较菜,也可能是没有登陆上去的问题,麻烦了)
代码如下:
import requests
import ddddocr
import re
from lxml import etree
def getFormhash(url, headers):
# 获取formhash
mainResponce = session.get(url, headers=headers).text # 获取登陆界面的内容
tree = etree.HTML(mainResponce)
formHash = tree.xpath('/html/body/div[5]/div[1]/div/div[3]/div/form/input[2]/@value')[0] # 获取formhash
return formHash
def GetImageName(url, headers):
mainResponce = session.get(url, headers=headers).text # 获取登陆界面的内容
tree = etree.HTML(mainResponce)
jpgUrl = tree.xpath('/html/body/div[7]/div/div[2]/div/div[2]/div[1]/div[1]/form/div/span/@id')[0]
jpgName = re.findall('seccode_(.*)', jpgUrl)[0]
return jpgName
def GetImageResult(jpgName, headers1):
"""
获取验证码并识别
"""
jpgUrl = 'https://www.tangguo2.com/misc.php?mod=seccode&update=76180&idhash=' + jpgName
jpgResponce = session.get(jpgUrl, headers=headers1).content
ocr = ddddocr.DdddOcr(old=True)
with open('seccode.jpg', 'wb') as fp:
fp.write(jpgResponce)
image = jpgResponce
image_result = ocr.classification(image) # 获得识别结果
return image_result
if __name__ == "__main__":
User = 原来是我的账号,此处保密一下
session = requests.Session()# 创建session对话
url = 'https://www.tangguo2.com/member.php?mod=logging&action=login' # 登陆界面
mainUrl = 'https://www.tangguo2.com/plugin.php?id=dsu_paulsign:sign' # 签到界面
# 基础请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0',
}
# 用于获得验证码图片的请求头
headers1 = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0',
'referer': 'https://www.tangguo2.com/member.php?mod=logging&action=login',
'sec-fetch-dest': 'image',
'sec-fetch-mode': 'no-cors',
'sec-fetch-site': 'same-origin',
'authority': 'www.tangguo2.com',
'method': 'GET',
'scheme': 'https',
'accept': 'image/webp,image/apng,image/*,*/*;q = 0.8',
'accept-encoding': 'gzip,deflate,br',
'accept-language': 'zh-CN,zh;q=0.9',
}
# 用于登陆的请求头和数据
loginHeaders = {
'authority': 'www.tangguo2.com',
'method': 'POST',
'path': '/member.php?mod=logging&action=login&loginsubmit=yes&loginhash=LmzaO&inajax=1',
'scheme':'https',
'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-encoding':'gzip,deflate,br',
'accept-language':'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
'content-length': '222',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.tangguo2.com',
'referer': 'https://www.tangguo2.com/member.php?mod=logging&action=login',
'sec-fetch-dest': 'iframe',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0'
}
formHash = getFormhash(url, headers)
jpgName = GetImageName(url, headers)
image_result = GetImageResult(jpgName, headers1)
data = {
'formhash': formHash,
'referer': 'https://www.tangguo2.com/forum.php',
'loginfield': 'username',
'username':User,
'password':原来是我的密码,保密一下,
'questionid': '0',
'answer':'',
'seccodehash': jpgName,
'seccodemodid': 'member::logging',
'seccodeverify': image_result
}
# 进行登陆
loginUrl = 'https://www.tangguo2.com/member.php?mod=logging&action=login&loginsubmit=yes&loginhash=LgS81&inajax=1'
loginResponce = session.post(loginUrl, headers=headers, data=data).text
# 进行查询
cxUrl = 'https://www.tangguo2.com/home.php?mod=space&uid=144701&do=profile&from=space'
cxResponce = session.get(cxUrl, headers=headers).text
tree = etree.HTML(cxResponce)
result = tree.xpath('//*[@id="psts"]/ul/li[4]//text()')[0]
result01 = tree.xpath('//*[@id="psts"]/ul/li[4]//text()')[1]
# 返回查询结果
if len(loginResponce) > 300 :
print(User + result + ':' + result01)
else :
print('登陆失败')