dprh34164 2010-09-27 06:36
浏览 34
已采纳

too long

I am trying to login to a site and then call numerous URLs to get the source and scrape for images. It works fine using regular curl but when I try to use multi_curl I am getting back the exact same response. So that I only have to login once I am resuing the curl resource (this works fine with regular curl) and I think this may be the reason why it is returning the same response.

Does anyone know how to use multi_curl but authenticate first?

Here is the code I am using:

<?php
    // LICENSE: PUBLIC DOMAIN
    // The author disclaims copyright to this source code.
    // AUTHOR: Shailesh N. Humbad
    // SOURCE: http://www.somacon.com/p539.php
    // DATE: 6/4/2008

    // index.php
    // Run the parallel get and print the total time
    $s = microtime(true);
    // Define the URLs
    $urls = array(
      "http://localhost/r.php?echo=request1",
      "http://localhost/r.php?echo=request2",
      "http://localhost/r.php?echo=request3"
    );
    $pg = new ParallelGet($urls);
    print "<br />total time: ".round(microtime(true) - $s, 4)." seconds";

    // Class to run parallel GET requests and return the transfer
    class ParallelGet
    {
      function __construct($urls)
      {
        // Create get requests for each URL
        $mh = curl_multi_init();
        $count = 0;
        $ch = curl_init();

        foreach($urls as $i => $url)
        {
            $count++;

            if($count == 1)
            {
                // SET URL FOR THE POST FORM LOGIN
                curl_setopt($ch, CURLOPT_URL, 'https://www.example.com/login.php');

                // ENABLE HTTP POST
                curl_setopt ($ch, CURLOPT_POST, 1);

                // SET POST PARAMETERS : FORM VALUES FOR EACH FIELD
                curl_setopt ($ch, CURLOPT_POSTFIELDS, 'user=myuser&password=mypassword');

                // IMITATE CLASSIC BROWSER'S BEHAVIOUR : HANDLE COOKIES
                curl_setopt ($ch, CURLOPT_COOKIEJAR, realpath($_SERVER['DOCUMENT_ROOT']) . '/cookie.txt');

                # Setting CURLOPT_RETURNTRANSFER variable to 1 will force cURL
                # not to print out the results of its query.
                # Instead, it will return the results as a string return value
                # from curl_exec() instead of the usual true/false.
                curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);

                // EXECUTE 1st REQUEST (FORM LOGIN)
                curl_exec ($ch);

            }

        $ch = curl_init($url);
        curl_setopt ($ch, CURLOPT_COOKIEFILE, realpath($_SERVER['DOCUMENT_ROOT']) . '/cookie.txt');
        curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
        $ch_array[$i] = $ch;
        curl_multi_add_handle($mh, $ch_array[$i]);
        }

        // Start performing the request
        do {
            $execReturnValue = curl_multi_exec($mh, $runningHandles);
        } while ($execReturnValue == CURLM_CALL_MULTI_PERFORM);
        // Loop and continue processing the request
        while ($runningHandles && $execReturnValue == CURLM_OK) {
          // Wait forever for network
          $numberReady = curl_multi_select($mh);
          if ($numberReady != -1) {
            // Pull in any new data, or at least handle timeouts
            do {
              $execReturnValue = curl_multi_exec($mh, $runningHandles);
            } while ($execReturnValue == CURLM_CALL_MULTI_PERFORM);
          }
        }

        // Check for any errors
        if ($execReturnValue != CURLM_OK) {
          trigger_error("Curl multi read error $execReturnValue
", E_USER_WARNING);
        }

        // Extract the content
        foreach($urls as $i => $url)
        {
          // Check for errors
          $curlError = curl_error($ch_array[$i]);
          if($curlError == "") {
            $res[$i] = curl_multi_getcontent($ch_array[$i]);
          } else {
            print "Curl error on handle $i: $curlError
";
          }
          // Remove and close the handle
          curl_multi_remove_handle($mh, $ch_array[$i]);
          curl_close($ch_array[$i]);
        }
        // Clean up the curl_multi handle
        curl_multi_close($mh);

        // Print the response data
        print_r($res);
      }

    }
    ?>
  • 写回答

1条回答 默认 最新

  • doushun9875 2010-09-27 07:10
    关注

    you need to enable/use cookies with curl as well. look for it on the documentation, don't forget to create the cookies (empty files) with read and write permission for curl.

        $cookie = tempnam ("/tmp", "CURLCOOKIE");
        $ch = curl_init();
        curl_setopt( $ch, CURLOPT_URL, $url );
        curl_setopt( $ch, CURLOPT_COOKIEJAR, $cookie );
    
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 逻辑谓词和消解原理的运用
  • ¥15 三菱伺服电机按启动按钮有使能但不动作
  • ¥15 js,页面2返回页面1时定位进入的设备
  • ¥200 关于#c++#的问题,请各位专家解答!网站的邀请码
  • ¥50 导入文件到网吧的电脑并且在重启之后不会被恢复
  • ¥15 (希望可以解决问题)ma和mb文件无法正常打开,打开后是空白,但是有正常内存占用,但可以在打开Maya应用程序后打开场景ma和mb格式。
  • ¥20 ML307A在使用AT命令连接EMQX平台的MQTT时被拒绝
  • ¥20 腾讯企业邮箱邮件可以恢复么
  • ¥15 有人知道怎么将自己的迁移策略布到edgecloudsim上使用吗?
  • ¥15 错误 LNK2001 无法解析的外部符号