微风载着阳光在流浪 2017-05-13 07:05 采纳率: 100%
浏览 1489
已采纳

神箭手爬虫新手问题2:抓取不到数据(全部代码如下)

 var url = "http://bankdata.jnlc.com/sitepages/ProductFilter.aspx";

var configs = {
    enableProxy: true,
    enableJS : true,

    domains: ["jnlc.com"],
    scanUrls: [url],

    contentUrlRegexes:  [/http:\/\/bankdata\.jnlc\.com\/sitepages\/ProductFilter\.aspx/],
    helperUrlRegexes:  [/http:\/\/bankdata\.jnlc\.com\/sitepages\/ProductFilter\.aspx/],

    fields: [
        {
            name: "items",
            //selector: "//*[@id='list1']/table/tbody",
            selector : "//*[@id='gview_listNew']/div[3]",
            repeated : true,
            children : [
                {
                    name: "name",
                    alias: "理财产品名称",
                    //selector: "//td[contains(@class,'list_title')]text()",
                    //selector:"//*[@id='list1']/table/tbody/tr[2]/td[1]/div/div/a",
                    selector : "//*[@id='46']/td[3]",
                    required : true
                },
                {
                    name: "rate",
                    alias: "年化收益率",
                    //selector: "//*[@id='list1']/table/tbody/tr[2]/td[2]"
                    selector:"//*[@id='46']/td[10]"
                }
            ]
        }
    ]
};

configs.onProcessScanPage = function (page, content, site) {
    var helperUrl = "http://bankdata.jnlc.com/sitepages/ProductFilter.aspx";
    var options = {
        method : "POST",
        data: {
            _search:"true",
            nd:"1494551033210",
            rows:30,
            page:1,
            sidx:"dYqnhsylsx",
            sord:"desc",
            filters:{
                "groupOp":"AND",
                "groups":[{
                        "groupOp":"OR",
                        "rules":[
                            {
                                "field":"sdtSaleEnd",
                                "op":"ge",
                                "data":"'2017-05-12'"
                            },
                            {
                                "field":"sdtSaleStart",
                                "op":"ge",
                                "data":"'2017-05-12'"
                            }
                        ]
                    }],
                "rules":[
                    {
                        "field":"strFinaType",
                        "op":"eq",
                        "data":"'非结构性产品'"
                    },
                    {
                        "field":"strSaleTo",
                        "op":"cn",
                        "data":""
                    }
                ]
                }
                }
    }
    site.addUrl(helperUrl, options);
    console.log("debug 1");
    return false;
};

configs.onProcessHelperPage = function (page, content, site) {
    var currentPage = page.request.data.page;
    console.log("debug 2 currentPage="+parseInt(currentPage));
    var totalPage = extract(content, "//span[contains(@class,'pages')]/b");

    totalPage = parseInt(totalPage);
    if(currentPage<totalPage){
        var helperUrl = "http://bankdata.jnlc.com/sitepages/ProductFilter.aspx";
        var options = {
            method : "POST",
            data: {
                _search:"true",
                nd:"1494551033210",
                rows:30,
                page:currentPage+1,
                sidx:"dYqnhsylsx",
                sord:"desc",
                        filters:{
                    "groupOp":"AND",
                    "groups":[{
                            "groupOp":"OR",
                            "rules":[
                                {
                                    "field":"sdtSaleEnd",
                                    "op":"ge",
                                    "data":"'2017-05-12'"
                                },
                                {
                                    "field":"sdtSaleStart",
                                    "op":"ge",
                                    "data":"'2017-05-12'"
                                }
                            ]
                        }],
                    "rules":[
                        {
                            "field":"strFinaType",
                            "op":"eq",
                            "data":"'非结构性产品'"
                        },
                        {
                            "field":"strSaleTo",
                            "op":"cn",
                            "data":""
                        }
                    ]
                }
            },
            reserve : true
        }
         site.addUrl(helperUrl, options);       
    }    
    return false;
};

configs.onProcessContentPage = function (page, content, site) {
    return false;
};

configs.afterExtractField = function (fieldName, data, page, site) {
    if(fieldName=="items.money" || fieldName=="items.duration"){
        data =(data||'NaN').replace(/<\/?b>/g,"");
        return data;
    }
    return data;
};

var crawler = new Crawler(configs);
crawler.start();
  • 写回答

2条回答 默认 最新

  • Go 旅城通票 2017-05-13 14:37
    关注

    用下面这个就可以了,直接去掉产品类型这个查询条件,查询2种的就可以了,那个类型蜘蛛传数据乱码了。

     var url = "http://bankdata.jnlc.com/sitepages/ProductFilter.aspx";
    
    
     var configs = {
        enableProxy: true,
         //enableJS : true,
     timeout: 200000,
        domains: ["bankdata.jnlc.com"],
        scanUrls: [url],
        fields: [
            {
                name: "items",selectorType: SelectorType.JsonPath,
                selector : "$.rows[*]",
                repeated : true,
                children : [
                    {
                        name: "name",
                        alias: "理财产品名称",
                        selector : "strFinaName",selectorType: SelectorType.JsonPath
                    },
                    {
                        name: "money",
                        alias: "起购金额",
                        selector:"mInvestStart",selectorType: SelectorType.JsonPath
                    }
                ]
            }
        ]
    };
    configs.afterDownloadPage = function(page, site) {
        page.raw = page.raw.replace(/,"id":\d+/g,'');//去掉返回的数据中id重复项,要不报错无法集采
        return page;
    };
    configs.onProcessScanPage = function (page, content, site) {
        var helperUrl = "http://bankdata.jnlc.com/SitePages/Layouts/JNPJFeature/search.ashx?qt=complex&qn=BankFinacleAllProducts&model=YHLC";
        var options = {
            method: "POST",
            data: {
               _search:true,
               rows:30,
               page:1,  
               sidx:"dYqnhsylsx",
               sord:"desc" ,
               filters:JSON.stringify({"groupOp":"AND","groups":[{"groupOp":"OR","rules":[{"field":"sdtSaleEnd","op":"ge","data":"'2017-05-13'"},{"field":"sdtSaleStart","op":"ge","data":"'2017-05-13'"}]}]})
            }
        }
        site.addUrl(helperUrl, options);
        return false;
    };
    
    configs.onProcessHelperPage = function (page, content, site) {
        var currentPage = page.request.data.page;
        var totalPage =JSON.parse(page.raw).total;
    
        console.log(" currentPage="+currentPage+"--Total:"+totalPage);
        if(currentPage<totalPage){
          var helperUrl = "http://bankdata.jnlc.com/SitePages/Layouts/JNPJFeature/search.ashx?qt=complex&qn=BankFinacleAllProducts&model=YHLC";
        var options = {
            method: "POST",
            data: {
               _search:true,
               rows:30,
               page:currentPage+1,  
               sidx:"dYqnhsylsx",
               sord:"desc" ,
               filters:JSON.stringify({"groupOp":"AND","groups":[{"groupOp":"OR","rules":[{"field":"sdtSaleEnd","op":"ge","data":"'2017-05-13'"},{"field":"sdtSaleStart","op":"ge","data":"'2017-05-13'"}]}]})
            },
            reserve : true
        };
             site.addUrl(helperUrl, options);       
        }    
        return false;
    };
    
    configs.onProcessContentPage = function (page, content, site) {
        return false;
    };
    
    
    var crawler = new Crawler(configs);
    crawler.start();
    
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

悬赏问题

  • ¥15 求chat4.0解答一道线性规划题,用lingo编程运行,第一问要求写出数学模型和lingo语言编程模型,第二问第三问解答就行,我的ddl要到了谁来求了
  • ¥15 Ubuntu在安装序列比对软件STAR时出现报错如何解决
  • ¥50 树莓派安卓APK系统签名
  • ¥15 maple软件,用solve求反函数出现rootof,怎么办?
  • ¥65 汇编语言除法溢出问题
  • ¥15 Visual Studio问题
  • ¥20 求一个html代码,有偿
  • ¥100 关于使用MATLAB中copularnd函数的问题
  • ¥20 在虚拟机的pycharm上
  • ¥15 jupyterthemes 设置完毕后没有效果