This tool will reach out to a web page, strip out a chunk of that page's source between a start and the end string, strip out all the hyperlinks, and return them in Mike's Newsfeed format for immediate consumption.
Here's how it works:
This script is not elegant by any means. It will not work on some complex pages or pages with cgi query strings (?stuff=morestuff&somenumber=738902 in the URL). Lets give it a try...
exit(0); } else { $url = ereg_replace("http://","",$url); $urlcheck = urlencode($url); if (file_exists("./cache/$urlcheck")) { # file exists, check date $timecheck = time(); $timecheck = $timecheck - 0; if (filectime("./cache/$urlcheck") > $timecheck) { #file is new and cached $file = @join('', file("./cache/$urlcheck")); $cachecheck = 1; } } if (!$cachecheck) { $remoteurl = "http://".$url.""; # the following was modified on December 5th because the PHP file() quit retriving pages. $execute_fetch = exec("./fetch.pl $remoteurl"); # Fetch the file using PERL $file = @join('', file("./cache/temp.txt")); # Load the page data from a temp file created by the perl script. $localfilename = ereg_replace("http://","",$remoteurl); $localfilename = urlencode($localfilename); $filehandle = fopen("./cache/$localfilename","w+"); fwrite ($filehandle, $file); } # first fetch the page $page = $file; # $file = @join('', file("http://liquidtheater.com/")); # echo $file; # now filter out the bad stuff and keep the good stuff in reviewchunk ereg ("$start_string(.*)$end_string", $page, $linkchunk); # get rid of nasty array and go to nice simple var $linkchunk = $linkchunk[0]; # echo $linkchunk; # Filter out any non-link HTML characters $linkchunk = strip_tags($linkchunk, ""); #get back hyperlinks from function above $links = parse_hrefs($linkchunk); # start displaying an html file echo "