希望用python做一个小程序来爬取京东信息,无奈总卡在验证码环节。已确认京东发送验证码的地址应该没错,但是每次收到的验证码却都是几个固定伪码,导致登录不上去。
不知道哪位大神可以提供点思路给小弟。
class JDWrapper(object):
'''
This class used to simulate login JD
'''
def __init__(self, usr_name, usr_pwd):
# cookie info
self.trackid = ''
self.uuid = ''
self.eid = ''
self.fp = ''
self.usr_name = usr_name
self.usr_pwd = usr_pwd
self.interval = 0
# init url related
self.home = 'https://passport.jd.com/new/login.aspx'
self.login = 'https://passport.jd.com/uc/loginService'
self.imag = 'https://authcode.jd.com/verify/image'
self.auth = 'https://passport.jd.com/uc/showAuthCode'
self.sess = requests.Session()
self.sess.header = {
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36',
'ContentType': 'application/x-www-form-urlencoded; charset=utf-8',
'Connection' : 'keep-alive',
}
try:
self.browser = webdriver.PhantomJS('phantomjs.exe')
except Exception, e:
print 'Phantomjs initialize failed :', e
exit(1)
@staticmethod
def print_json(resp_text):
'''
format the response content
'''
if resp_text[0] == '(':
resp_text = resp_text[1:-1]
for k,v in json.loads(resp_text).items():
print u'%s : %s' % (k, v)
@staticmethod
def response_status(resp):
if resp.status_code != requests.codes.OK:
print 'Status: %u, Url: %s' % (resp.status_code, resp.url)
return False
return True
def need_auth_code(self, usr_name):
# check if need auth code
#
auth_dat = {
'loginName': usr_name,
}
payload = {
'r' : random.random(),
'version' : 2015
}
resp = self.sess.post(self.auth, data=auth_dat, params=payload)
if self.response_status(resp) :
js = json.loads(resp.text[1:-1])
return js['verifycode']
print u'获取是否需要验证码失败'
return False
def get_auth_code(self, uuid):
# image save path
image_file = os.path.join(os.getcwd(), 'authcode.jfif')
payload = {
'a' : 1,
'acid' : uuid,
'uid' : uuid,
'yys' : str(int(time.time() * 1000)),
}
# get auth code
r = self.sess.get(self.imag, params=payload)
if not self.response_status(r):
print u'获取验证码失败'
return False
with open (image_file, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
f.write(chunk)
f.close()
os.system('start ' + image_file)
return str(raw_input('Auth Code: '))
def login_once(self, login_data):
# url parameter
payload = {
'r': random.random(),
'uuid' : login_data['uuid'],
'version' : 2015,
}
resp = self.sess.post(self.login, data=login_data, params=payload)
if self.response_status(resp):
js = json.loads(resp.text[1:-1])
#self.print_json(resp.text)
if not js.get('success') :
print js.get('emptyAuthcode')
return False
else:
return True
return False