需要爬取的内容是http://drugs.dxy.cn/drug/89790/detail.htm药品成份下的三角按钮隐藏内容(需要点击按钮才可以获取)
报错内容是dwr.engine._remoteHandleException('2','0',{javaClassName:"java.lang.Throwable",message:"Error"});
{javaClassName:"java.lang.Throwable",message:"Error"}代替了我要爬取的内容
from urllib.parse import urlencode
from urllib import request
import urllib
params = {
'callCount': '1',
'page': '/drug/89790/detail.htm',
'httpSessionId':'',
'scriptSessionId':'D8B63E5C6C13BEC549EB1F56C5D7D79B627',
'c0-scriptName': 'DrugUtils',
'c0-methodName':'showDetail',
'c0-id': '0',
'c0-param0=number': '89790',
'c0-param1=number': '2',
'batchId': '2'
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.47'
}
params = urllib.parse.urlencode(params).encode("utf-8")
base_url = 'http://drugs.dxy.cn/dwr/call/plaincall/DrugUtils.showDetail.dwr'
req = urllib.request.Request(base_url, data=params, headers=headers)
res = urllib.request.urlopen(req)
html = res.read().decode(encoding = "utf-8").strip()
print(html)