Principle: Read the source files of a website and analyze the source code using regular expressions to obtain all links.
<?
/********** Qiushuiwuhen (2002-5-20 )***********/
If (empty ($ URL) $ url = "http://clin003.com/"; // set URL
$ Site = substr ($ URL, 0, strpos ($ URL, "/", 8); // site
$ Base = substr ($ URL, 0, strrpos ($ URL, "/") + 1); // directory of the file
$ Fp = fopen ($ URL, "R"); // open the URL
While (! Feof ($ FP) $ contents. = fread ($ FP, 1024 );//
$ Pattern = "| href = ['"]? ([^ '"] +) ['"] | U ";
Preg_match_all ($ pattern, $ contents, $ regarr, preg_set_order); // match all href =
For ($ I = 0; $ I <count ($ regarr); $ I ++) {// traverse all matches
If (! Eregi (": //", $ regarr [$ I] [1]) // whether it is a relative path, that is, whether there are ://
If (substr ($ regarr [$ I] [1],) = "/") // whether it is the root directory of the site
Echo "Link". ($ I + 1). ":". $ site. $ regarr [$ I] [1]. "<br/>"; // root directory
Else
Echo "Link". ($ I + 1). ":". $ base. $ regarr [$ I] [1]. "<br/>"; // Current Directory
Else
Echo "Link". ($ I + 1). ":". $ regarr [$ I] [1]. "<br/>"; // relative path
}
Fclose ($ FP );
?>