dongzao9044 2013-04-10 15:15
浏览 41
已采纳

用于获取Javascript标记内的数据的HTML源代码

I'm trying to grab the HTML source of a web page, then phrase it to get data inside a Javascript tag.

The Javascript tag on that page is like this:

<script>
fullplayer("player", {
            src:"full_width_player.swf",
            wmode:"window"
        }, { 
    key: '#$0c4de1874473849ff8a',
    canvas: {
        backgroundGradient: "none",
        backgroundColor: "#000000"
    },
    audio: {
        provider: 'servstat',
        q: '128'
    },
    playlist: '/get.php?location=/audio/welcome.mp3',
    plugins: {
        youtube: { }
    }
});
</script>

I'm looking for a way to get the value inside playlist:'*****', which means the location of the audio track - /get.php?location=/audio/welcome.mp3

Is this possible with HTML DOM phrase, or do i need to grab the whole stuff inside Javascript tag and read the data as XML or something like that?

  • 写回答

1条回答 默认 最新

  • douyun1972 2013-04-10 15:20
    关注

    try this is a regex

     preg_match_all('~playlist:[ ]*[\'|"](.*?)[\'|"]~si',$HTML,$Match);
     print_r($Match);
    

    output for your sample:

     Array
     (
       [0] => Array
        (
            [0] => playlist: '/get.php?location=/audio/welcome.mp3'
        )
    
       [1] => Array
        (
            [0] => /get.php?location=/audio/welcome.mp3
        )
    
     )  
    

    curl

    function HeaderProc($response,$Run="",$String=1/*[Is 1 IF Use for String Mode ]*/){
              if($String==1){
                 $response=explode("
    ",$response);  
              }
              $PartHeader=0;
              $out[$PartHeader]=array();
              while(list($key,$val)=each($response)){
                  $name='';
                  $value='';
                  $flag=false;
                  for($i=0;$i<strlen($val);$i++){
                      if($val[$i]==":"){
                          $flag=true;
                          for($j=$i+1;$j<strlen($val);$j++){
                            if($val[$i]=="" and $val[$i+1]=="
    "){    
                                break;
                            }
                            $value.=$val[$j];
                          }
                          break;
                      }
                      $name.=$val[$i]; 
                  }
                  if($flag){
                    if($name=='' and $value==''){
                        $PartHeader++;  
                    }else{
                      if(isset($out[$PartHeader][$name])){
                        if(is_array($out[$PartHeader][$name])){   
                            $out[$PartHeader][$name][]=$value;
                        }else{
                            $T=$out[$PartHeader][$name];
                            $out[$PartHeader][$name]=array();
                            $out[$PartHeader][$name][0]=$T;  
                            $out[$PartHeader][$name][1]=$value;  
                        }
                      }else{
                        $out[$PartHeader][$name]=$value;
                      }
                    }
                  }else{
                    if($name==''){
                        $PartHeader++;  
                    }else{
                        if(isset($out[$PartHeader][$name])){ 
                          if(is_array($out[$PartHeader][$name])){   
                            $out[$PartHeader][$name][]=$value;
                          }else{
                            $T=$out[$PartHeader][$name];
                            $out[$PartHeader][$name]=array();
                            $out[$PartHeader][$name][0]=$T;  
                            $out[$PartHeader][$name][1]=$name;  
                          }
                        }else{
                            $out[$PartHeader][$name]=$name; 
                        }
                    } 
                  }
                  if($Run!=""){
                    $Run($name,$value);  
                  }
              }
              return $out;
    }
    
    class cURL { 
        var $headers; 
        var $user_agent; 
        var $compression; 
        var $cookie_file; 
        var $proxy; 
        var $Cookie; 
        function CookieAnalysis($Cookie){//convert str cookie to array cookie 
           //echo $Cookie;
           $this->Cookie=array();
           preg_match("~(.*?)=(.*?);~si",' '.$Cookie.'; ',$M);
           $this->Cookie[trim($M[1])]=trim($M[2]);
           return $this->Cookie;
        }
        function cURL($cookies=false,$cookie='cookies.txt',$compression='gzip',$proxy='') {
             $this->headers[] = 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
             $this->headers[] = 'Accept-Charset:ISO-8859-1,utf-8;q=0.7,*;q=0.3'; 
             $this->headers[] = 'Accept-Encoding:gzip,deflate,sdch';
             $this->headers[] = 'Accept-Language:en-US,en;q=0.8';
             $this->headers[] = 'Cache-Control:max-age=0';
             $this->headers[] = 'Connection:keep-alive';
             $this->user_agent = 'User-Agent:Mozilla/5.0 (SepidarSoft [Organic Search Engine Crawler] Linux Edition) AppleWebKit/536.5 (KHTML, like Gecko) SepidarBrowser/1.0.100.52 Safari/536.5';
             $this->compression=$compression; 
             $this->proxy=$proxy; 
             $this->cookies=$cookies; 
             if ($this->cookies == TRUE) $this->cookie($cookie); 
        } 
        function cookie($cookie_file) { 
             if (file_exists($cookie_file)) { 
                $this->cookie_file=$cookie_file; 
             } else { 
                fopen($cookie_file,'w') or $this->error('The cookie file could not be opened. Make sure this directory has the correct permissions');
                $this->cookie_file=$cookie_file; 
                @fclose($this->cookie_file); 
             } 
        }
        function GET($url) { 
             $process = curl_init($url); 
             curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers); 
             curl_setopt($process, CURLOPT_HEADER, 1); 
             curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent); 
             if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
             if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
             curl_setopt($process,CURLOPT_ENCODING , $this->compression); 
             curl_setopt($process, CURLOPT_TIMEOUT, 30); 
             if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy); 
             curl_setopt($process, CURLOPT_RETURNTRANSFER, 1); 
             curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1); 
             $response = curl_exec($process);
             $header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
             $result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
             foreach($result['Header'] as $HeaderK=>$HeaderP){
               if(!is_array($HeaderP['Set-Cookie']))continue;
               foreach($HeaderP['Set-Cookie'] as $key=>$val){
                 $result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
               }
             }
             $result['Body'] = substr( $response, $header_size );
             $result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
             $result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL); 
             curl_close($process); 
             return $result; 
        }
        function POST($url,$data) { 
             $process = curl_init($url); 
             curl_setopt($process, CURLOPT_HTTPHEADER, $this->headers); 
             curl_setopt($process, CURLOPT_HEADER, 1); 
             curl_setopt($process, CURLOPT_USERAGENT, $this->user_agent); 
             if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEFILE, $this->cookie_file);
             if ($this->cookies == TRUE) curl_setopt($process, CURLOPT_COOKIEJAR, $this->cookie_file);
             curl_setopt($process, CURLOPT_ENCODING , $this->compression); 
             curl_setopt($process, CURLOPT_TIMEOUT, 30); 
             if ($this->proxy) curl_setopt($process, CURLOPT_PROXY, $this->proxy); 
             curl_setopt($process, CURLOPT_POSTFIELDS, $data); 
             curl_setopt($process, CURLOPT_RETURNTRANSFER, 1); 
             curl_setopt($process, CURLOPT_FOLLOWLOCATION, 1); 
             curl_setopt($process, CURLOPT_POST, 1);
             $response = curl_exec($process); 
             $header_size = curl_getinfo($process,CURLINFO_HEADER_SIZE);
             $result['Header'] = HeaderProc(substr($response, 0, $header_size),'',1);
             foreach($result['Header'] as $HeaderK=>$HeaderP){
                if(!is_array($HeaderP['Set-Cookie']))continue;
               foreach($HeaderP['Set-Cookie'] as $key=>$val){
                 $result['Header'][$HeaderK]['Set-Cookie'][$key]=$this->CookieAnalysis($val);
               }
             }
             $result['Body'] = substr( $response, $header_size );
             $result['HTTP_State'] = curl_getinfo($process,CURLINFO_HTTP_CODE);
             $result['URL'] = curl_getinfo($process,CURLINFO_EFFECTIVE_URL);
             curl_close($process); 
             return $result; 
        }
        function error($error) { 
             echo "<center><div style='width:500px;border: 3px solid #FFEEFF; padding: 3px; background-color: #FFDDFF;font-family: verdana; font-size: 10px'><b>cURL Error</b><br>$error</div></center>";
             die; 
        } 
     }  
    

    sample:

      $cc = new cURL(); 
      $Data=$cc->get('http://www.yahoo.com');
      preg_match_all('~playlist:[ ]*[\'|"](.*?)[\'|"]~si',$Data['Body'],$Match);
      print_r($Match);
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 phython读取excel表格报错 ^7个 SyntaxError: invalid syntax 语句报错
  • ¥20 @microsoft/fetch-event-source 流式响应问题
  • ¥15 ogg dd trandata 报错
  • ¥15 高缺失率数据如何选择填充方式
  • ¥50 potsgresql15备份问题
  • ¥15 Mac系统vs code使用phpstudy如何配置debug来调试php
  • ¥15 目前主流的音乐软件,像网易云音乐,QQ音乐他们的前端和后台部分是用的什么技术实现的?求解!
  • ¥60 pb数据库修改与连接
  • ¥15 spss统计中二分类变量和有序变量的相关性分析可以用kendall相关分析吗?
  • ¥15 拟通过pc下指令到安卓系统,如果追求响应速度,尽可能无延迟,是不是用安卓模拟器会优于实体的安卓手机?如果是,可以快多少毫秒?