douchen5971 2013-09-15 13:30
浏览 46

为什么我不能卷曲这个网站? (PHP)

This is my curl function, and It works very well until this site: http://www.finalpazarlama.com/kategoriler

 //Curl
function curl($site){
    $ch=curl_init();
    $maxredirect = 2;
    curl_setopt($ch, CURLOPT_URL, $site);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    $mr = $maxredirect === null ? 5 : intval($maxredirect);
    if (ini_get('open_basedir') == '' && ini_get('safe_mode' == 'Off')){
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $mr > 0);
        curl_setopt($ch, CURLOPT_MAXREDIRS, $mr);
    }else{
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
        if ($mr > 0){
            $newurl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
            $rch = curl_copy_handle($ch);
            curl_setopt($rch, CURLOPT_HEADER, true);
            curl_setopt($rch, CURLOPT_NOBODY, true);
            curl_setopt($rch, CURLOPT_FORBID_REUSE, false);
            curl_setopt($rch, CURLOPT_RETURNTRANSFER, true);
            do {
                curl_setopt($rch, CURLOPT_URL, $newurl);
                $header = curl_exec($rch);
                if (curl_errno($rch)){
                    $code = 0;
                }else{
                    $code = curl_getinfo($rch, CURLINFO_HTTP_CODE);
                    if ($code == 301 || $code == 302){
                        preg_match('/Location:(.*?)
/', $header, $matches);
                        $newurl = trim(array_pop($matches));
                    }else{
                        $code = 0;
                    }
                }
            }
            while ($code && --$mr);
            curl_close($rch);
            if (!$mr){
                if ($maxredirect === null){
                    trigger_error('Too many redirects. When following redirects, libcurl hit the maximum amount.',E_USER_WARNING);
                }else{
                    $maxredirect = 0;
                }
                return false;
            }
            curl_setopt($ch, CURLOPT_URL, $newurl);
        }
    }
    return curl_exec($ch);
}

When I try to work with http://www.finalpazarlama.com/kategoriler , it returns empty. What could be the problem? Why can't I get it?

  • 写回答

1条回答 默认 最新

  • dongxi7609 2013-09-15 14:38
    关注

    HTTP/1.1 302 Found Cache-Control: private Content-Length: 157 Content-Type: text/html; charset=utf-8 Location: /PageNotFound?aspxerrorpath=/kategoriler Server: Microsoft-IIS/7.5 X-AspNetMvc-Version: 4.0 X-AspNet-Version: 4.0.30319 X-Powered-By: ASP.NET X-Powered-By-Plesk: PleskWin Date: Sun, 15 Sep 2013 14:25:06 GMT

    That's the response, basically you are curling to a relative url that curl doesn't accept as valid i.e. /PageNotFound?aspxerrorpath=/kategoriler

    you need to build the url to an absolute url I already have a function for that: https://code.google.com/p/add-mvc-framework/source/browse/project/trunk/functions/url.functions.php

    feel free to copy those two to end up with something like this:

    <?php
    //Curl
    function curl($site){
        $ch=curl_init();
        $maxredirect = 2;
        curl_setopt($ch, CURLOPT_URL, $site);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        $mr = $maxredirect === null ? 5 : intval($maxredirect);
        if (ini_get('open_basedir') == '' && ini_get('safe_mode' == 'Off')){
            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $mr > 0);
            curl_setopt($ch, CURLOPT_MAXREDIRS, $mr);
        }else{
            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
            if ($mr > 0){
                $newurl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
                $rch = curl_copy_handle($ch);
                curl_setopt($rch, CURLOPT_HEADER, true);
                curl_setopt($rch, CURLOPT_NOBODY, true);
                curl_setopt($rch, CURLOPT_FORBID_REUSE, false);
                curl_setopt($rch, CURLOPT_RETURNTRANSFER, true);
                do {
                    curl_setopt($rch, CURLOPT_URL, $newurl);
                    $header = curl_exec($rch);
                    if (curl_errno($rch)){
                        $code = 0;
                    }else{
                        $code = curl_getinfo($rch, CURLINFO_HTTP_CODE);
                        if ($code == 301 || $code == 302){
                            preg_match('/Location:(.*?)
    /', $header, $matches);
                            $newurl = trim(array_pop($matches));
                            $newurl = build_url($site,$newurl);
                        }else{
                            $code = 0;
                        }
                    }
                }
                while ($code && --$mr);
                curl_close($rch);
                if (!$mr){
                    if ($maxredirect === null){
                        trigger_error('Too many redirects. When following redirects, libcurl hit the maximum amount.',E_USER_WARNING);
                    }else{
                        $maxredirect = 0;
                    }
                    return false;
                }
                curl_setopt($ch, CURLOPT_URL, $newurl);
            }
        }
        return curl_exec($ch);
    }
    
    /**
     * URL functions
     *
     * @package ADD MVC\Functions
     *
     */
    
    /**
     * Returns the complete url according to $base
     *
     * @param string $base
     * @param string $url
     *
     * @since ADD MVC 0.5
     *
     * @version 0.1
     */
    function build_url($base,$url) {
       $base_parts=url_parts($base);
    
       # https://code.google.com/p/add-mvc-framework/issues/detail?id=81
       if (preg_match('/^(javascript|data)\:/',$url)) {
          return $url;
       }
    
       if ($url[0]==='/') {
          return rtrim($base_parts['protocol_domain'],'/').$url;
       }
       if ($url[0]==='?') {
          if (!$base_parts['pathname'])
             $base_parts['pathname']='/';
          return $base_parts['protocol_domain'].$base_parts['pathname'].$url;
       }
       if ($url[0]==='#') {
    
       }
       if (preg_match('/^https?\:\/+/',$url)) {
          return $url;
       }
    
       return rtrim($base_parts['protocol_domain'],"/").$base_parts['path'].$url;
    }
    
    /**
     * Returns the URL parts of the url
     *
     * @param string $url
     *
     * @since ADD MVC 0.5
     */
    function url_parts($url) {
       if (!preg_match('/^(?P<protocol_domain>(?P<protocol>https?\:\/+)(?P<domain>([^\/\W]|[\.\-])+))(?P<request_uri>(?P<pathname>(?P<path>\/(.+\/)?)?(?P<file>[^\?\#]+?)?)?(?P<query_string>\?[^\#]*)?)(\#(?P<hash>.*))?$/',$url,$url_parts)) {
          echo debug_backtrace();
          throw new Exception("Invalid url: $url");
       }
       return $url_parts;
    }
    echo "<xmp>";
    var_dump(curl('http://www.finalpazarlama.com/kategoriler'));
    
    评论

报告相同问题?

悬赏问题

  • ¥15 数学建模招标中位数问题
  • ¥15 phython路径名过长报错 不知道什么问题
  • ¥15 深度学习中模型转换该怎么实现
  • ¥15 HLs设计手写数字识别程序编译通不过
  • ¥15 Stata外部命令安装问题求帮助!
  • ¥15 从键盘随机输入A-H中的一串字符串,用七段数码管方法进行绘制。提交代码及运行截图。
  • ¥15 TYPCE母转母,插入认方向
  • ¥15 如何用python向钉钉机器人发送可以放大的图片?
  • ¥15 matlab(相关搜索:紧聚焦)
  • ¥15 基于51单片机的厨房煤气泄露检测报警系统设计