duan19913 2013-09-08 18:38
浏览 25
已采纳

如何使用php在cURL中显示href

I am learning Scrapping webpage, Earlier I used Simple HTML DOM Parser but it was too slow. So I chose cURL. I learning through Some blogs. Now I want to display the href between two tags.

<?php
class tagSpider
{
var $crl;
var $html;
var $binary; 
var $url;

function tagSpider()
{
$this->html = "";
$this->binary = 0;
$this->url = "";
}

function fetchPage($url)
{
$this->url = $url;
if (isset($this->url)) {
$this->ch = curl_init ();
curl_setopt ($this->ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($this->ch, CURLOPT_URL, $this->url); 
curl_setopt($this->ch, CURLOPT_FOLLOWLOCATION, true); 
curl_setopt($this->ch, CURLOPT_BINARYTRANSFER, $this->binary); 
$this->html = curl_exec($this->ch); 
curl_close ($this->ch); 
}
}

function parse_array($beg_tag, $close_tag)
{
preg_match_all("($beg_tag.*$close_tag)siU", $this->html, $matching_data); 
return $matching_data[0];
}
}
?>

<?php
$urlrun="http://m4.cricbuzz.com/";
$stag='<span>';
$etag="</span>";
$tspider = new tagSpider();
$tspider->fetchPage($urlrun);
$linkarray = $tspider->parse_array($stag, $etag); 
foreach ($linkarray as $result) {
echo strip_tags($result, '<br><div>');
echo "<br>-<br>";
}
?> 

How to display the href using the same Program

  • 写回答

1条回答 默认 最新

  • duanlun1955 2013-09-09 06:12
    关注
    <?php
    
        class tagSpider {
    
            var $crl;
            var $html;
            var $binary;
            var $url;
    
            function tagSpider() {
                $this->html = "";
                $this->binary = 0;
                $this->url = "";
            }
    
            function fetchPage($url) {
                $this->url = $url;
                if (isset($this->url)) {
                    $this->ch = curl_init();
                    curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, 1);
                    curl_setopt($this->ch, CURLOPT_URL, $this->url);
                    curl_setopt($this->ch, CURLOPT_FOLLOWLOCATION, true);
                    curl_setopt($this->ch, CURLOPT_BINARYTRANSFER, $this->binary);
                    $this->html = curl_exec($this->ch);
                    curl_close($this->ch);
                }
            }
    
            function parse_array($beg_tag, $close_tag)
            {
            preg_match_all("($beg_tag.*$close_tag)siU", $this->html, $matching_data); 
            return $matching_data[0];
            }
            function getLinks(  ) {
                $dom = new domDocument;
                @$dom->loadHTML($this->html);
                $dom->preserveWhiteSpace = false;
                $list_items  = $dom->getElementsByTagName('li');
                $href = array();
                foreach($list_items as $item){
                    if($item->getAttribute('class')=='ui-li ui-btn-icon-right ui-btn-up-d ui-odd-match-column '){
                        $links = $item->getElementsByTagName('a');
                        foreach($links as $link ){
                            $href[] = $link->getAttribute('href');
                        }
    
                    }
                }
                return $href;
    
            }
    
        }
    ?>
    
    <?php
    
        $urlrun="http://m4.cricbuzz.com/";
        $stag = 'span';
        $etag = "</span>";
        $tspider = new tagSpider();
        $tspider->fetchPage($urlrun);
        $linkarray = $tspider->getLinks( ); 
        var_dump($linkarray);
    ?> 
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?