var url = "http://bankdata.jnlc.com/sitepages/ProductFilter.aspx";
var configs = {
enableProxy: true,
enableJS : true,
domains: ["jnlc.com"],
scanUrls: [url],
contentUrlRegexes: [/http:\/\/bankdata\.jnlc\.com\/sitepages\/ProductFilter\.aspx/],
helperUrlRegexes: [/http:\/\/bankdata\.jnlc\.com\/sitepages\/ProductFilter\.aspx/],
fields: [
{
name: "items",
//selector: "//*[@id='list1']/table/tbody",
selector : "//*[@id='gview_listNew']/div[3]",
repeated : true,
children : [
{
name: "name",
alias: "理财产品名称",
//selector: "//td[contains(@class,'list_title')]text()",
//selector:"//*[@id='list1']/table/tbody/tr[2]/td[1]/div/div/a",
selector : "//*[@id='46']/td[3]",
required : true
},
{
name: "rate",
alias: "年化收益率",
//selector: "//*[@id='list1']/table/tbody/tr[2]/td[2]"
selector:"//*[@id='46']/td[10]"
}
]
}
]
};
configs.onProcessScanPage = function (page, content, site) {
var helperUrl = "http://bankdata.jnlc.com/sitepages/ProductFilter.aspx";
var options = {
method : "POST",
data: {
_search:"true",
nd:"1494551033210",
rows:30,
page:1,
sidx:"dYqnhsylsx",
sord:"desc",
filters:{
"groupOp":"AND",
"groups":[{
"groupOp":"OR",
"rules":[
{
"field":"sdtSaleEnd",
"op":"ge",
"data":"'2017-05-12'"
},
{
"field":"sdtSaleStart",
"op":"ge",
"data":"'2017-05-12'"
}
]
}],
"rules":[
{
"field":"strFinaType",
"op":"eq",
"data":"'非结构性产品'"
},
{
"field":"strSaleTo",
"op":"cn",
"data":""
}
]
}
}
}
site.addUrl(helperUrl, options);
console.log("debug 1");
return false;
};
configs.onProcessHelperPage = function (page, content, site) {
var currentPage = page.request.data.page;
console.log("debug 2 currentPage="+parseInt(currentPage));
var totalPage = extract(content, "//span[contains(@class,'pages')]/b");
totalPage = parseInt(totalPage);
if(currentPage<totalPage){
var helperUrl = "http://bankdata.jnlc.com/sitepages/ProductFilter.aspx";
var options = {
method : "POST",
data: {
_search:"true",
nd:"1494551033210",
rows:30,
page:currentPage+1,
sidx:"dYqnhsylsx",
sord:"desc",
filters:{
"groupOp":"AND",
"groups":[{
"groupOp":"OR",
"rules":[
{
"field":"sdtSaleEnd",
"op":"ge",
"data":"'2017-05-12'"
},
{
"field":"sdtSaleStart",
"op":"ge",
"data":"'2017-05-12'"
}
]
}],
"rules":[
{
"field":"strFinaType",
"op":"eq",
"data":"'非结构性产品'"
},
{
"field":"strSaleTo",
"op":"cn",
"data":""
}
]
}
},
reserve : true
}
site.addUrl(helperUrl, options);
}
return false;
};
configs.onProcessContentPage = function (page, content, site) {
return false;
};
configs.afterExtractField = function (fieldName, data, page, site) {
if(fieldName=="items.money" || fieldName=="items.duration"){
data =(data||'NaN').replace(/<\/?b>/g,"");
return data;
}
return data;
};
var crawler = new Crawler(configs);
crawler.start();
神箭手爬虫新手问题2:抓取不到数据(全部代码如下)
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
2条回答 默认 最新
- Go 旅城通票 2017-05-13 14:37关注
用下面这个就可以了,直接去掉产品类型这个查询条件,查询2种的就可以了,那个类型蜘蛛传数据乱码了。
var url = "http://bankdata.jnlc.com/sitepages/ProductFilter.aspx"; var configs = { enableProxy: true, //enableJS : true, timeout: 200000, domains: ["bankdata.jnlc.com"], scanUrls: [url], fields: [ { name: "items",selectorType: SelectorType.JsonPath, selector : "$.rows[*]", repeated : true, children : [ { name: "name", alias: "理财产品名称", selector : "strFinaName",selectorType: SelectorType.JsonPath }, { name: "money", alias: "起购金额", selector:"mInvestStart",selectorType: SelectorType.JsonPath } ] } ] }; configs.afterDownloadPage = function(page, site) { page.raw = page.raw.replace(/,"id":\d+/g,'');//去掉返回的数据中id重复项,要不报错无法集采 return page; }; configs.onProcessScanPage = function (page, content, site) { var helperUrl = "http://bankdata.jnlc.com/SitePages/Layouts/JNPJFeature/search.ashx?qt=complex&qn=BankFinacleAllProducts&model=YHLC"; var options = { method: "POST", data: { _search:true, rows:30, page:1, sidx:"dYqnhsylsx", sord:"desc" , filters:JSON.stringify({"groupOp":"AND","groups":[{"groupOp":"OR","rules":[{"field":"sdtSaleEnd","op":"ge","data":"'2017-05-13'"},{"field":"sdtSaleStart","op":"ge","data":"'2017-05-13'"}]}]}) } } site.addUrl(helperUrl, options); return false; }; configs.onProcessHelperPage = function (page, content, site) { var currentPage = page.request.data.page; var totalPage =JSON.parse(page.raw).total; console.log(" currentPage="+currentPage+"--Total:"+totalPage); if(currentPage<totalPage){ var helperUrl = "http://bankdata.jnlc.com/SitePages/Layouts/JNPJFeature/search.ashx?qt=complex&qn=BankFinacleAllProducts&model=YHLC"; var options = { method: "POST", data: { _search:true, rows:30, page:currentPage+1, sidx:"dYqnhsylsx", sord:"desc" , filters:JSON.stringify({"groupOp":"AND","groups":[{"groupOp":"OR","rules":[{"field":"sdtSaleEnd","op":"ge","data":"'2017-05-13'"},{"field":"sdtSaleStart","op":"ge","data":"'2017-05-13'"}]}]}) }, reserve : true }; site.addUrl(helperUrl, options); } return false; }; configs.onProcessContentPage = function (page, content, site) { return false; }; var crawler = new Crawler(configs); crawler.start();
本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 无用评论 打赏 举报
悬赏问题
- ¥15 stm32开发clion时遇到的编译问题
- ¥15 lna设计 源简并电感型共源放大器
- ¥15 如何用Labview在myRIO上做LCD显示?(语言-开发语言)
- ¥15 Vue3地图和异步函数使用
- ¥15 C++ yoloV5改写遇到的问题
- ¥20 win11修改中文用户名路径
- ¥15 win2012磁盘空间不足,c盘正常,d盘无法写入
- ¥15 用土力学知识进行土坡稳定性分析与挡土墙设计
- ¥70 PlayWright在Java上连接CDP关联本地Chrome启动失败,貌似是Windows端口转发问题
- ¥15 帮我写一个c++工程