I am scraping HTML with DOM to create a custom RSS feed from an external website. I have all the values I need in an array called $jobs
. I can print those values like this:
function jobscrape($title, $link, $root, $description, $job_location) {
$jobs = array();
$html = file_get_contents($link);
$doc = new DOMDocument();
libxml_use_internal_errors(TRUE);
if(!empty($html)) {
$doc->loadHTML($html);
libxml_clear_errors(); // remove errors for yucky html
$xpath = new DOMXPath($doc);
$row = $xpath->query($job_location);
if ($row->length > 0) {
foreach ($row as $job) {
$jobs['title'] = $job->nodeValue;
$jobs['description'] = "This is a description";
$jobs['link'] = $job->getAttribute('href');
}
}
else { echo "row is less than 0";}
}
else { echo "this is empty";}
}
}
However, I need the array in this format, where each 'sub-array' is one iteration of the three variables (I'm just using three here as an example):
$entries = array(
array(
"title" => "My first test entry",
"description" => "This is the first article's description",
"link" => "http://leolabs.org/my-first-article-url"
),
array(
"title" => "My second test entry",
"description" => "This is the second article's description",
"link" => "http://leolabs.org/my-second-article-url"
),
array(
"title" => "My third test entry",
"description" => "This is the third article's description",
"link" => "http://leolabs.org/my-third-article-url"
)
);
UPDATE
After trying Durgesh's solution, this is my new code:
function jobscrape($title, $link, $root, $description, $job_location) {
header("Content-Type: application/rss+xml; charset=UTF-8");
$xml = new SimpleXMLElement('<rss/>');
$xml->addAttribute("version", "2.0");
$channel = $xml->addChild("channel");
$channel->addChild("title", $title);
$channel->addChild("link", $link);
$channel->addChild("description", "This is a description");
$channel->addChild("language", "en-us");
$html = file_get_contents($link);
$doc = new DOMDocument();
libxml_use_internal_errors(TRUE);
if(!empty($html)) {
$doc->loadHTML($html);
libxml_clear_errors(); // remove errors for yucky html
$xpath = new DOMXPath($doc);
$row = $xpath->query($job_location);
if ($row->length > 0) {
foreach ($row as $job) {
$jobs = array();
$entries = array();
$jobs['title'] = $job->nodeValue;
$jobs['description'] = "This is a description";
$jobs['link'] = $job->getAttribute('href');
array_push($entries,$jobs);
foreach ($entries as $entry) {
$item = $channel->addChild("item");
$item->addChild("title", $entry['title']);
$item->addChild("link", $entry['link']);
$item->addChild("description", $entry['description']);
}
echo $xml->asXML();
}
}
else { echo "row is less than 0";}
}
else {
echo "this is empty";
}
}
However, my RSS is formatting incorrectly, adding the following to every <item>
rather than just in the header:
<?xml version="1.0"?>
<rss version="2.0"><channel><title>Media Muppet</title><link>http://www.mediargh.com/jobs</link><description>This is a description</description><language>en-us</language>