Here's a small function I wrote to get all page links using the DOMDocument which will hopefully be of use to others
<?php
/**
* @author Jay Gilford
*/
/**
* get_links()
*
* @param string $url
* @return array
*/
function get_links($url) {
// Create a new DOM Document to hold our webpage structure
$xml = new DOMDocument();
// Load the url's contents into the DOM
$xml->loadHTMLFile($url);
// Empty array to hold all links to return
$links = array();
//Loop through each <a> tag in the dom and add it to the link array
foreach($xml->getElementsByTagName('a') as $link) {
$links[] = array('url' => $link->getAttribute('href'), 'text' => $link->nodeValue);
}
//Return the links
return $links;
}
?>