dou9022 2013-10-09 23:56
浏览 47

DOM Xpath wordpress抓取内容

I have a plugin that i want to modified but im stuck here is the php function:

function wpr_ezinemarkpost($keyword,$num,$start,$optional="",$comments="",$options,$template,$ua,$proxy,$proxytype,$proxyuser) {
    global $wpdb,$wpr_table_templates;

    $page = $start / 20;
    $page = (string) $page; 
    $page = explode(".", $page);    
    $page=(int)$page[0];    
    $page++;    

    if($page == 0) {$page = 1;}
    $prep = floor($start / 20);
    $numb = $start - $prep * 20;
        $search_url = "http://www.freewptube.com/demo4/";

    // make the cURL request to $search_url
    if ( function_exists('curl_init') ) {
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_USERAGENT, $ua);
            if($proxy != "") {
                //curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1); 
                curl_setopt($ch, CURLOPT_PROXY, $proxy);
                if($proxyuser) {curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser);}
                if($proxytype == "socks") {curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);}
            }           
        curl_setopt($ch, CURLOPT_URL,$search_url);
        curl_setopt($ch, CURLOPT_FAILONERROR, true);
        curl_setopt($ch, CURLOPT_AUTOREFERER, true);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
        curl_setopt($ch, CURLOPT_TIMEOUT, 45);
        $html = curl_exec($ch);
        if (!$html) {
            $return["error"]["module"] = "Article";
            $return["error"]["reason"] = "cURL Error";
            $return["error"]["message"] = __("cURL Error Number $search_url","wprobot").curl_errno($ch).": ".curl_error($ch);   
            return $return;
        }       
        curl_close($ch);
    } else {                
        $html = @file_get_contents($search_url);
        if (!$html) {
            $return["error"]["module"] = "Article";
            $return["error"]["reason"] = "cURL Error";
            $return["error"]["message"] = __("cURL is not installed on this server!","wprobot");    
            return $return;     
        }
    }   



    // parse the html into a DOMDocument  

    $dom = new DOMDocument();
    @$dom->loadHTML($html);

    // Grab Product Links  

    $xpath = new DOMXPath($dom);
    $paras = $xpath->query("//div[@class='boxtitle']//h2/a");

    $x = 0;
    $end = $numb + $num;

        if($paras->length == 0) {
            $posts["error"]["module"] = "Article";
            $posts["error"]["reason"] = "No content";
            $posts["error"]["message"] = __("No (more) articles found. $search_url","wprobot"); 
            return $posts;      
        }   

    if($end > $paras->length) { $end = $paras->length;}
    for ($i = $numb;  $i < $end; $i++ ) {

        $para = $paras->item($i);

        if(empty($para)) {
            $posts["error"]["module"] = "Article";
            $posts["error"]["reason"] = "No content";
            $posts["error"]["message"] = __("No (more) articles found. $search_url","wprobot"); 
            print_r($posts);
            return $posts;      
        } else {

            $target_url = $para->getAttribute('href');  

            // make the cURL request to $search_url
            if ( function_exists('curl_init') ) {
                $ch = curl_init();
                curl_setopt($ch, CURLOPT_USERAGENT, $ua);
                if($proxy != "") {
                    //curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1); 
                    curl_setopt($ch, CURLOPT_PROXY, $proxy);
                    if($proxyuser) {curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser);}
                    if($proxytype == "socks") {curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);}
                }                   
                curl_setopt($ch, CURLOPT_URL,$target_url);
                curl_setopt($ch, CURLOPT_FAILONERROR, true);
                curl_setopt($ch, CURLOPT_AUTOREFERER, true);
                curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
                curl_setopt($ch, CURLOPT_TIMEOUT, 45);
                $html = curl_exec($ch);
                if (!$html) {
                    $return["error"]["module"] = "Article";
                    $return["error"]["reason"] = "cURL Error";
                    $return["error"]["message"] = __("cURL Error Number $search_url","wprobot").curl_errno($ch).": ".curl_error($ch);   
                    return $return;
                }       
                curl_close($ch);
            } else {                
                $html = @file_get_contents($target_url);
                if (!$html) {
                    $return["error"]["module"] = "Article";
                    $return["error"]["reason"] = "cURL Error";
                    $return["error"]["message"] = __("cURL is not installed on this server!","wprobot");    
                    return $return;     
                }
            }

            // parse the html into a DOMDocument  

            $dom = new DOMDocument();
            @$dom->loadHTML($html);

            // Grab Article Title           
            $xpath1 = new DOMXPath($dom);
            $paras1 = $xpath1->query("//div[@class='textsection']/h2");
            $para1 = $paras1->item(0);
            $title = $para1->textContent;       

                if (empty($title)) {
                    $return["error"]["module"] = "Article";
                    $return["error"]["reason"] = "IncNum";
                    $return["error"]["message"] = __("Video content skipped. ","wprobot");  
                    return $return;
                }               

            // Grab Article 
            $xpath2 = new DOMXPath($dom);
                        $paras2 = $xpath2->query("//div[@id='screen']/div[@class='videosection']"); 
            $para2 = $paras2->item(0);
            $string = $dom->saveXml($para2);
                    if ($options['wpr_eza_striplinks']=='yes') {$string = wpr_strip_selected_tags($string, array('a'));}    
            $articlebody .= $string. ' ';   



            // Grab Ressource Box   

            $xpath3 = new DOMXPath($dom);
            $paras3 = $xpath3->query("//div[@id='extras']//h4/a");      
            $ressourcetext = "";
            for ($y = 0;  $y < $paras3->length; $y++ ) {  //$paras->length
                $para3 = $paras3->item($y);
                $ressourcetext .= $dom->saveXml($para3);    
            }   

            $title = utf8_decode($title);

            // Split into Pages
            if($options['wpr_eza_split'] == "yes") {
                $articlebody = wordwrap($articlebody, $options['wpr_eza_splitlength'], "<!--nextpage-->");
            }

            $post = $template;
            $post = wpr_random_tags($post);
            $post = str_replace("{article}", $articlebody, $post);          
            $post = str_replace("{authortext}", $ressourcetext, $post); 
            $noqkeyword = str_replace('"', '', $keyword2);
            $post = str_replace("{keyword}", $noqkeyword, $post);
            $post = str_replace("{Keyword}", ucwords($noqkeyword), $post);              
            $post = str_replace("{title}", $title, $post);  
            $post = str_replace("{url}", $target_url, $post);
                    if(function_exists("wpr_rewrite_partial")) {
                        $post = wpr_rewrite_partial($post,$options);
                    }           
                    if(function_exists("wpr_translate_partial")) {
                        $post = wpr_translate_partial($post);
                    }   

            /* We are adding a call to this function to ensure that our keyword is used at least once */
            $posts[$x]["unique"] = $target_url;
            $posts[$x]["title"] = $title;
            $posts[$x]["content"] = $post;              
            $x++;
        }   
    }   
    return $posts;
}

i already made it to grab the title and the embed video but i want to also grab the thumbails located at the homepage. how can we make the thumbnails go to the top of the embed video code? by the way this is a wordpress plugin that i am modifying for me to use.

thanks

  • 写回答

0条回答 默认 最新

    报告相同问题?

    悬赏问题

    • ¥15 执行 virtuoso 命令后,界面没有,cadence 启动不起来
    • ¥50 comfyui下连接animatediff节点生成视频质量非常差的原因
    • ¥20 有关区间dp的问题求解
    • ¥15 多电路系统共用电源的串扰问题
    • ¥15 slam rangenet++配置
    • ¥15 有没有研究水声通信方面的帮我改俩matlab代码
    • ¥15 ubuntu子系统密码忘记
    • ¥15 保护模式-系统加载-段寄存器
    • ¥15 电脑桌面设定一个区域禁止鼠标操作
    • ¥15 求NPF226060磁芯的详细资料