dtbi27903
2012-05-19 19:16 阅读 85
已采纳

使用PHP卷曲伪造发布请求 - 拒绝

I am trying to build a script that posts information into the RoyalMail tracking system and extracts the output.

What I currently have is getting an error from their server - see the link, somehow it is detecting that I am not using their website as per normal and throwing me an error.

Things I think I have taken into account:

  • Using an exact copy of their form by parsing it beforehand (the post parameters)
  • Saving the cookies between each request
  • Accepting redirect headers
  • Providing a refer header that is actually valid (the previously visited page)

Does anyone know anything else I need to check or can figure out what I am doing wrong?

A full copy of the source is at EDIT: please see my answer below

  • 点赞
  • 写回答
  • 关注问题
  • 收藏
  • 复制链接分享

3条回答 默认 最新

  • 已采纳
    dongwuxie7976 dongwuxie7976 2012-05-20 13:24

    I have now fixed it, the problem was with PHP curl and following redirects, it seems that it doesn't always post the request data and sends a GET request when following.

    To deal with this I disabled curl follow location with curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); and then built a follow location system myself that works recursively. Essentially it extracts the location header from the response, checks for a 301 or a 302 and then runs the method again as required.

    This means the information will definitely be POSTED again.

    I also improved the user agent string, simply copying my current one on the basis it won't be blocked for a long while as in 2012 it is in active use!

    Here is a final copy of the curl class (in case the link dies - been down voted for that in the past) which is working:

    /**
     * Make a curl request respecting redirects
     * Also supports posts
     */
    class pegCurlRequest {
      private $url, $postFields = array(), $referer = NULL, $timeout = 3;
      private $debug = false, $postString = "";
      private $curlInfo = array();
      private $content = "";
      private $response_meta_info = array();
    
      static $cookie;
    
      function __construct($url, $postFields = array(), $referer = NULL, $timeout = 3) {
        $this->setUrl($url);
        $this->setPost($postFields);
        $this->setReferer($referer);
        $this->setTimeout($timeout);
        if(empty(self::$cookie)) self::$cookie = tempnam("/tmp", "pegCurlRequest"); //one time cookie
      }
    
      function setUrl($url) {
        $this->url = $url;
      }
    
      function setTimeout($timeout) {
        $this->timeout = $timeout;
      }
    
      function setPost($postFields) {
        if(is_array($postFields)) {
          $this->postFields = $postFields;
        }
        $this->updatePostString();
      }
    
      function updatePostString() {
        //Cope with posting
        $this->postString = "";
        if(!empty($this->postFields)) {
          foreach($this->postFields as $key=>$value) { $this->postString .= $key.'='.$value.'&'; }
          $this->postString= rtrim($this->postString,'&'); //Trim off the waste
        }   
      }
    
      function setReferer($referer) {
        //Set a referee either specified or based on the url
        $this->referer = $referer;
      }
    
      function debugInfo() {
        //Debug
        if($this->debug) {
          echo "<table><tr><td colspan='2'><b><u>Pre Curl Request</b><u></td></tr>";
          echo "<tr><td><b>URL: </b></td><td>{$this->url}</td></tr>";
          if(!empty(self::$cookie)) echo "<tr><td><b>Cookie String: </b></td><td>".self::$cookie."</td></tr>";
          if(!empty($this->referer)) echo "<tr><td><b>Referer: </b></td><td>".$this->referer."</td></tr>";
          if(!empty($this->postString)) echo "<tr><td><b>Post String: </b></td><td>".$this->postString."</td></tr>";
    
          if(!empty($this->postFields)) {
            echo "<tr><td><b>Post Values:</b></td><td><table>";
            foreach($this->postFields as $key=>$value)
              echo "<tr><td>$key</td><td>$value</td></tr>";
            echo "</table>";
          }
          echo "</td></tr></table><br />
    "; 
        } 
      }
    
      function debugFurtherInfo() {
        //Debug
        if($this->debug) {
          echo "<table><tr><td colspan='2'><b><u>Post Curl Request</b><u></td></tr>";
          echo "<tr><td><b>URL: </b></td><td>{$this->url}</td></tr>";
          if(!empty($this->referer)) echo "<tr><td><b>Referer: </b></td><td>".$this->referer."</td></tr>";
          if(!empty($this->curlInfo)) {
            echo "<tr><td><b>Curl Info:</b></td><td><table>";
            foreach($this->curlInfo as $key=>$value)
              echo "<tr><td>$key</td><td>$value</td></tr>";
            echo "</table>";
          }
          echo "</td></tr></table><br />
    "; 
        } 
      }
    
      /**
       * Make the actual request
       */
      function makeRequest($url=NULL) {
        //Shorthand request
        if(!is_null($url))
          $this->setUrl($url);
    
        //Output debug info
        $this->debugInfo();
    
        //Using a shared cookie
        $cookie = self::$cookie;
    
        //Setting up the starting information
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 Safari/536.11" );
        curl_setopt($ch, CURLOPT_URL, $this->url);
        curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
        curl_setopt($ch, CURLOPT_ENCODING, "gzip");
    
         //register a callback function which will process the headers
        //this assumes your code is into a class method, and uses $this->readHeader as the callback //function
        curl_setopt($ch, CURLOPT_HEADERFUNCTION, array(&$this,'readHeader'));
    
        //Some servers (like Lighttpd) will not process the curl request without this header and will return error code 417 instead. 
        curl_setopt($ch, CURLOPT_HTTPHEADER, array("Expect:"));
    
        //Referer
        if(empty($this->referer)) {
          curl_setopt($ch, CURLOPT_REFERER, dirname($this->url));
        } else {
          curl_setopt($ch, CURLOPT_REFERER, $this->referer);
        }
    
        //Posts
        if(!empty($this->postFields)) {
          curl_setopt($ch, CURLOPT_POST, true);
          curl_setopt($ch, CURLOPT_POSTFIELDS, $this->postString);
        }
    
        //Redirects, transfers and timeouts
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_AUTOREFERER, false);
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout);
        curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
        curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
    
        //Debug
        if($this->debug) {
          curl_setopt($ch, CURLOPT_VERBOSE, true); // logging stuffs
          curl_setopt($ch, CURLINFO_HEADER_OUT, true); // enable tracking
        }
    
        //Get the content and the header info
        $content = curl_exec($ch);
        $response = curl_getinfo($ch);
    
        //get the default response headers
        $headers = curl_getinfo($ch);
    
        //add the headers from the custom headers callback function
        $this->response_meta_info = array_merge($headers, $this->response_meta_info);
    
        curl_close($ch); //be nice
    
        //Curl info
        $this->curlInfo = $response;
    
        //Output debug info
        $this->debugFurtherInfo();
    
        //Are we being redirected?
        if ($response['http_code'] == 301 || $response['http_code'] == 302) {
          $location = $this->getHeaderLocation();
          if(!empty($location)) { //the location exists
            $this->setReferer($this->getTrueUrl()); //update referer
            return $this->makeRequest($location); //recurse to location
          }
        } 
        //Is there a javascript redirect on the page?
        elseif (preg_match("/window\.location\.replace\('(.*)'\)/i", $content, $value) ||
          preg_match("/window\.location\=\"(.*)\"/i", $content, $value)) {
          $this->setReferer($this->getTrueUrl()); //update referer
          return $this->makeRequest($value[1]); //recursion
        } else {
          $this->content = $content; //set the content - final page
        }
      }
    
      /**
       * Get the url after any redirection
       */
      function getTrueUrl() {
        return $this->curlInfo['url'];
      }
    
      function __toString() {
        return $this->content;
      }
    
      /**
       * CURL callback function for reading and processing headers
       * Override this for your needs
       * 
       * @param object $ch
       * @param string $header
       * @return integer
       */
      private function readHeader($ch, $header) {
          //This is run for every header, use ifs to grab and add
          $location = $this->extractCustomHeader('Location: ', '
    ', $header);
          if ($location) {
              $this->response_meta_info['location'] = trim($location);
          }
          return strlen($header);
      }
    
      private function extractCustomHeader($start,$end,$header) {
          $pattern = '/'. $start .'(.*?)'. $end .'/';
          if (preg_match($pattern, $header, $result)) {
              return $result[1];
          } else {
              return false;
          }
      }
    
      function getHeaders() {
          return $this->response_meta_info;
      }
    
      function getHeaderLocation() {
          return $this->response_meta_info['location'];
      }
    }
    
    点赞 评论 复制链接分享
  • duanqun7761 duanqun7761 2012-05-19 19:24

    Well first of all, you are talking about the Royal Mail. So I'm not sure if this simple little trick would trip them up...

    But what you could try is spoofing your user agent with a quick ini_set() -

    ini_set('user_agent', 'Mozilla/5.0 (X11; CrOS i686 1660.57.0) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.46 Safari/535.19'
    

    That's an Ubuntu chrome user agent string.

    The cURL user agent string would look quite different. For example:

    curl/7.15.5 (i686-redhat-linux-gnu) libcurl/7.15.5 OpenSSL/0.9.8b zlib/1.2.3 libidn/0.6.5
    

    It's a long shot - but they might be rejecting requests that are not originating from recognized browsers.

    点赞 评论 复制链接分享
  • douhao7677 douhao7677 2012-05-19 20:28

    Websites usually use 2 ways to detect if you are a human or a bot: HTTP REFERER and USER AGENT. I suggest you use Curl it specified user agent and referer (replace 'http://something/' with real URL of a page you would normally visit before navigating to the url you want to download with PHP):

    <?php
    
    $url = 'http://track2.royalmail.com/portal/rm/track';
    $html = file_get_contents2($url, '');
    
    
    $post['_dyncharset'] = 'ISO-8859-1';
    
    $post['trackConsigniaPage'] = 'track';
    
    $post['/rmg/track/RMTrackFormHandler.value.searchCompleteUrl'] = '/portal/rm/trackresults?catId=22700601&pageId=trt_rmresultspage';
    $post['_D:/rmg/track/RMTrackFormHandler.value.searchCompleteUrl'] = ''; 
    $post['/rmg/track/RMTrackFormHandler.value.invalidInputUrl'] = '/portal/rm/trackresults?catId=22700601&pageId=trt_rmresultspage&keyname=track_blank';
    $post['_D:/rmg/track/RMTrackFormHandler.value.invalidInputUrl'] = '';
    $post['/rmg/track/RMTrackFormHandler.value.searchBusyUrl'] = '/portal/rm/trackresults?catId=22700601&pageId=trt_busypage&keyname=3E_track';
    $post['_D:/rmg/track/RMTrackFormHandler.value.searchBusyUrl'] = ''; 
    $post['/rmg/track/RMTrackFormHandler.value.searchWaitUrl'] = '/portal/rm/trackresults?catId=22700601&timeout=true&pageId=trt_timeoutpage&keyname=3E_track';
    $post['_D:/rmg/track/RMTrackFormHandler.value.searchWaitUrl'] = ''; 
    $post['/rmg/track/RMTrackFormHandler.value.keyname'] = '3E_track';
    $post['_D:/rmg/track/RMTrackFormHandler.value.keyname'] = ''; 
    $post['/rmg/track/RMTrackFormHandler.value.previousTrackingNumber'] = '';
    $post['_D:/rmg/track/RMTrackFormHandler.value.previousTrackingNumber'] = ''; 
    $post['/rmg/track/RMTrackFormHandler.value.trackingNumber'] = 'ZW791944749GB';
    $post['_D:/rmg/track/RMTrackFormHandler.value.trackingNumber'] = ''; 
    $post['/rmg/track/RMTrackFormHandler.track.x'] = '50';
    $post['/rmg/track/RMTrackFormHandler.track.y'] = '14';
    $post['_D:/rmg/track/RMTrackFormHandler.track'] = ''; 
    $post['/rmg/track/RMTrackFormHandler.value.day'] = '19';
    $post['_D:/rmg/track/RMTrackFormHandler.value.day'] = ''; 
    $post['/rmg/track/RMTrackFormHandler.value.month'] = '5';
    $post['_D:/rmg/track/RMTrackFormHandler.value.month'] = '';
    $post['/rmg/track/RMTrackFormHandler.value.year'] = '2012';
    $post['_D:/rmg/track/RMTrackFormHandler.value.year'] = ''; 
    $post['_DARGS'] = '/portal/rmgroup/apps/templates/html/rm/rmTrackResultPage.jsp';
    
    $url2 = 'http://track2.royalmail.com/portal/rm?_DARGS=/portal/rmgroup/apps/templates/html/rm/rmTrackAndTraceForm.jsp';
    $html2 = file_get_contents2($url2, $url, $post);
    
    echo $html2;
    
    function file_get_contents2($address, $referer, $post = false)
    {   
        $useragent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1"; 
    
        $c = curl_init(); 
        curl_setopt($c, CURLOPT_URL, $address);
        curl_setopt($c, CURLOPT_USERAGENT, $useragent);     
        curl_setopt($c, CURLOPT_HEADER, 0);
        curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
    
        if ($post)
        {
            $postF = http_build_query($post);
            curl_setopt($c, CURLOPT_POST, true);
            curl_setopt($c, CURLOPT_POSTFIELDS, $postF);    
        }
    
        curl_setopt($c, CURLOPT_COOKIEJAR, 'cookie.txt');
        //curl_setopt($c, CURLOPT_FRESH_CONNECT, 1);
        curl_setopt($c, CURLOPT_REFERER, $referer);
        curl_setopt($c, CURLOPT_FOLLOWLOCATION, 1);
        if (!$data = curl_exec($c)) 
        {
            return false; 
        } 
    
        return $data;
    }
    

    The above updated code returned me:

    Item ZW791944749GB was posted at 1 High Street RG17 9TJ on 19/05/12 and is being progressed through our network for delivery. 
    

    So it seems it works.

    点赞 评论 复制链接分享

相关推荐