dongmu1390 2012-11-11 06:56 采纳率: 100%
浏览 58
已采纳

如何将PHP转换为XML输出

I have a php code. this code outputs an HTML. I need to modify this code to output an XML. ANy ideas as to how shall I go about doing this. Is there any XML library available that directly does the job or do i have to manually create each node.?

My php code is:

<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />

<style>
a {text-decoration:none; color:black;}
</style>
</head>

<body>


<?php

$a=$_POST["title"];
$b=$_POST["name"];

$c="http://www.imdb.com/search/title?title=".urlencode($a)."&title_type=".urlencode($b);
$d=file_get_contents($c);


preg_match_all('/<div id="main">
(No results.)/', $d,$nore);


preg_match_all('#<img src="(.*)"#Us', $d, $img);//image

preg_match_all('/<a\s*href="\/title\/tt[0-9]*\/">((?:[a-z]*(?:&*[.]*)?\s*-*[a-z]*[0-9]*[^<])+)/i',$d,$tit);  //title 

preg_match_all('/<span\sclass="year_type">\s*\(([\d]*)/',$d,$ye); //movie year working fine

preg_match_all('#<span class="credit">
    Dir: (.*)
(?:    With:)?#Us',$d,$dir); //director 

preg_match_all('/<span class="rating-rating"><span class="value">([\w]*.[\w]*)/i',$d,$rat); //rating 

preg_match_all('/<a\shref="(\/title\/tt[0-9]*\/)"\s*[title]+/i',$d,$lin); //link 




for($i=0;$i<5;$i++)
{ 
  if (@$rat[1][$i]=="-")
  $rat[1][$i]="N/A";
}

for($i=0;$i<5;$i++)
{ 
 if(@$dir[1][$i]=="")
 $dir[1][$i]="N/A";
}




if(count($tit[1])>5)
$cnt=5;
else
$cnt=count($tit[1]);



 echo"<center><b>Search Result</b></center>";
echo "<br/>";
echo "<center><b>\"$a\"of type\"$b\":</b></center>";
echo"<br/>";

if(@$nore[1][0]=="No results.")
echo "<center><b>No movies found!</b></center>";
else
{
echo "<center><table border=1><tr><td><center>Image</center></td><td><center>Title</center></td><td><center>Year</center></td><td><center>Director</center></td><td><center>Rating(10)</center></td><td><center>Link to Movie</center></td></tr>";
  for($j=0;$j<$cnt;$j++)
          {
            echo "<tr>";
            echo "<td>".@$img[0][$j+2]."</td>";
            echo "<td><center>".@$tit[1][$j]."</center></td>";
            echo "<td><center>".@$ye[1][$j]."</center></td>";
            echo "<td><center>".@$dir[1][$j]."</center></td>";
            echo "<td><center>".@$rat[1][$j]."</center></td>";
            echo '<td><center><a style="text-decoration:underline; color:blue;" href="http://www.imdb.com'.@$lin[1][$j].'">Details</a></center></td>';
            echo "</tr>";
          }




echo "</table></center>";
}               

?>

</body>
</html>

Expected XML output:

<result cover="http://ia.mediaimdb.com/images      
/M/MV5BMjMyOTM4MDMxNV5BMl5BanBnXkFtZTcwNjIyNzExOA@@._V1._SX54_
CR0,0,54,74_.jpg" title="The Amazing Spider-Man(2012)"year="2012"
director="Marc Webb" rating="7.5"
details="http://www.imdb.com/title/tt0948470"/>

<result cover="http://ia.mediaimdb.
com/images/M/MV5BMzk3MTE5MDU5NV5BMl5BanBnXkFtZTYwMjY3NTY3._V1._SX54_CR0,
0,54,74_.jpg" title="Spider-Man(2002)" year="2002"director="Sam Raimi"
rating="7.3" details="http://www.imdb.com/title/tt0145487"/>

<result cover="http://ia.mediaimdb.
com/images/M/MV5BODUwMDc5Mzc5M15BMl5BanBnXkFtZTcwNDgzOTY0MQ@@._V1._SX54_
CR0,0,54,74_.jpg" title="Spider-Man 3 (2007)" year="2007" director="Sam
Raimi" rating="6.3" details="http://www.imdb.com/title/tt0413300"/>

<result cover="http://i.mediaimdb.
com/images/SF1f0a42ee1aa08d477a576fbbf7562eed/realm/feature.gif" title="
The Amazing Spider-Man 2 (2014)" year="2014" director="Sam Raimi"
rating="6.3" details="http://www.imdb.com/title/tt1872181"/>

<result cover="http://ia.mediaimdb.
com/images/M/MV5BMjE1ODcyODYxMl5BMl5BanBnXkFtZTcwNjA1NDE3MQ@@._V1._SX54_
CR0,0,54,74_.jpg" title="Spider-Man 2 (2004)" year="2004" director="Sam
Raimi" rating="7.5" details="http://www.imdb.com/title/tt0316654"/>
</results>
  • 写回答

1条回答 默认 最新

  • dso0139 2012-11-11 09:40
    关注

    First thing, you're parsing your html result with regex which is inefficient, unnecessary, and... well, you're answering to the cthulhu call!

    Second, parsing IMDB HTML to retrieve results, although valid, might be unnecessary. There are some neat 3rd party APIs that do the job for you, like http://imdbapi.org

    If you don't want to use any 3rd party API though, IMHO, you should, instead, parse the HTML using a DOM parser/manipulator, like DOMDocument, for instance, which is safer, better and, at the same time, can solve your HTML to XML problem.

    Here's the bit you asked (build XML and HTML from results):

    function resultsToHTML($results)
    {
        $doc = new DOMDocumet();
        $table = $doc->createElement('table');
    
        foreach ($results as $r) {
            $row = $doc->createElement('tr');
            $doc->appendChild($row);
            $title = $doc->createElement('td', $r['title']);
            $row->appendChild($title);
            $year = $doc->createElement('td', $r['year']);
            $row->appendChild($year);
            $rating = $doc->createElement('td', $r['rating']);
            $row->appendChild($rating);
    
            $imgTD = $doc->createElement('td');
    
            //Creating a img tag (use only on)
            $img = $doc->createElement('img');
            $img->setAttribute('src', $r['img_src']);
            $imgTD->appendChild($img);
            $row->appendChild($imgTD);
    
            $imgTD = $doc->createElement('td');
    
            //Importing directly from the old document
            $fauxDoc = new DOMDocument();
            $fauxDoc->loadXML($r['img']);
            $img = $fauxDoc->getElementsByTagName('img')->index(0);
            $importedImg = $doc->importNode('$img', true);
            $imgTD->appendChild($importedImg);
            $row->appendChild($imgTD);
        }
        return $doc;
    }
    

    function resultsToXML($results)
    {
        $doc = new DOMDocumet();
        $root = $doc->createElement('results');
        foreach ($results as $r) {
            $element = $root->createElement('result');
            $element->setAttribute('cover', $r['img_src']);
            $element->setAttribute('title', $r['title']);
            $element->setAttribute('year', $r['year']);
            $element->setAttribute('rating', $r['rating']);
            $root->appendChild($element);
        }
        $doc->appendChild($root);
        return $doc;
    }
    

    to print them you just need to

    $xml = resultsToXML($results);
    print $xml->saveXML();
    

    Same thing with html


    Here's a refactor of your code with DOMDocument, based on your post:

    <?php
    //Mock IMDB Link
    $a = 'The Amazing Spider-Man';
    $b = 'title';
    $c = "http://www.imdb.com/search/title?title=".urlencode($a)."&title_type=".urlencode($b);
    
    // HTML might be malformed so we want DOMDocument to be quiet
    libxml_use_internal_errors(true);
    //Initialize DOMDocument parser
    $doc = new DOMDocument();
    
    //Load previously downloaded document
    $doc->loadHTMLFile($c);
    
    //initialize array to store results
    $results = array();
    
    // get table of results and extract a list of rows
    $listOfTables = $doc->getElementsByTagName('table');
    $rows = getResultRows($listOfTables);
    
    $i = 0;
    //loop through all rows to retrieve information
    foreach ($rows as $row) {
        if ($title = getTitle($row)) {
            $results[$i]['title'] = $title;
        }
        if (!is_null($year = getYear($row)) && $year) {
            $results[$i]['year'] = $year;
        }
        if (!is_null($rating = getRating($row)) && $rating) {
            $results[$i]['rating'] = $rating;
        }
        if ($img = getImage($row)) {
            $results[$i]['img'] = $img;
        }
        if ($src = getImageSrc($row)) {
            $results[$i]['img_src'] = $src;
        }
        ++$i;
    }
    
    //the first result can be a false positive due to the
    // results' table header, so we remove it
    if (isset($results[0])) {
        array_shift($results);
    }
    

    FUNCTIONS

    function getResultRows($listOfTables)
    {
        foreach ($listOfTables as $table) {
            if ($table->getAttribute('class') === 'results') {
                return $table->getElementsByTagName('tr');
            }
        }
    }
    
    function getImageSrc($row)
    {
        $img = $row->getElementsByTagName('img')->item(0);
        if (!is_null($img)) {
            return $img->getAttribute('src');
        } else {
            return false;
        }
    }
    
    function getImage($row, $doc)
    {
        $img = $row->getElementsByTagName('img')->item(0);
        if (!is_null($img)) {
            return $doc->saveHTML($img);
        } else {
            return false;
        }
    }
    
    
    function getTitle($row)
    {
        $tdInfo = getTDInfo($row->getElementsByTagName('td'));
        if (!is_null($tdInfo) && !is_null($as = $tdInfo->getElementsByTagName('a'))) {
            return $as->item(0)->nodeValue;
        } else {
            return false;
        }
    }
    
    
    function getYear($row)
    {
        $tdInfo = getTDInfo($row->getElementsByTagName('td'));
        if (!is_null($tdInfo) && !is_null($spans = $tdInfo->getElementsByTagName('span'))) {
            foreach ($spans as $span) {
                if ($span->getAttribute('class') === 'year_type') {
                    return str_replace(')', '', str_replace('(', '', $span->nodeValue));
                }
            }
        }
    }
    
    function getRating($row)
    {
        $tdInfo = getTDInfo($row->getElementsByTagName('td'));
        if (!is_null($tdInfo) && !is_null($spans = $tdInfo->getElementsByTagName('span'))) {
            foreach ($spans as $span) {
                if ($span->getAttribute('class') === 'rating-rating') {
                    return $span->nodeValue;
                }
            }
        }
    }
    
    
    function getTDInfo($tds)
    {
        foreach ($tds as $td) {
            if ($td->getAttribute('class') == 'title') {
                return $td;
            }
        }
    }
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 如何让企业微信机器人实现消息汇总整合
  • ¥50 关于#ui#的问题:做yolov8的ui界面出现的问题
  • ¥15 如何用Python爬取各高校教师公开的教育和工作经历
  • ¥15 TLE9879QXA40 电机驱动
  • ¥20 对于工程问题的非线性数学模型进行线性化
  • ¥15 Mirare PLUS 进行密钥认证?(详解)
  • ¥15 物体双站RCS和其组成阵列后的双站RCS关系验证
  • ¥20 想用ollama做一个自己的AI数据库
  • ¥15 关于qualoth编辑及缝合服装领子的问题解决方案探寻
  • ¥15 请问怎么才能复现这样的图呀