#!/usr/bin/perl -w use CGI qw(:standard); use LWP::Simple; use HTML::TokeParser; print header; # Check file cache my $title = param("title"); my $url = param("url"); my $description = param("description"); my $maxnumberofresults = 15; # Build a nice filename and stick the file in the "./cache/" directory $file = $url; $file =~ s/http:\/\///ig; $file =~ s/\//-/ig; $file = "./cache/$file"; # build the URL path for Vbulletin @urlpieces = split(/\//,$url); for ($i=0;$i<$#urlpieces;$i++) { $urlpath = "$urlpath$urlpieces[$i]/"; } # calculate file modified time and get a var for 30 minutes ago ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks) = stat($file); # Check the time of the local file if ($mtime >= time() - 0) { # If the file is more than 30 minutes old, fetch it remotely. # print "loading locally"; } else { # else the file is recent, so use the local one instead. getstore ($url,"$file"); # print "hitting $url"; # hack to break URLs that are near Sticky topics open (FILE, "$file"); #open undef $/; my $filecontents = ; close FILE; # kill the open file $filecontents =~ s/Sticky:.*? $file"); #open print FILE $filecontents; close FILE; } # begin RSS output print " $title $url $description "; # Use HTML::TokeParser to parse HTML and pull out titles and URLs $p = HTML::TokeParser->new(shift||$file); my $count = 0; while (my $token = $p->get_tag("a")) { my $link = $token->[1]{href} || "-"; my $text = $p->get_trimmed_text("/a"); # Here is some decision making to make sure we only grab topics, no names or anything else. Only display $maxnumberofresults. No stickys. if ( # Below are a bunch of conditions to make sure we get the right postings. ( $link =~ /showMessage\?topicID/ # EZboard message check || $link =~ /showthread\.php\?s/ # vBulletin message || ( $link =~ /message\.id/ && $text !~ /AM/ && $text !~ /PM/ ) # EQ Forums || $link =~ /viewtopic\.php/ # Safehouse || $link =~ /showtopic=/ # Berserker.org || $link =~ /ShowPost\.aspx/ # bards ) && $count <= $maxnumberofresults # Display a set number && $text !~ /Sticky/ # No sticky message links && $link !~ /pagenumber/ # No page selector links && $text !~ /Go to last post/ # not a link to last post && $text !~ /Last Page/ # Graffes && $text !~ /Last page/ # EQClerics && $text !~ /About Healers United/ && $text !~ /Join Healers United/ && $text !~ /View latest post/ && $link !~ /start/ # safehouse check && $link !~ /jump=true/ # EQ Live check && $link !~ /getlastpost/ # Berserker Check && $link !~ /st=/ # Berserker Check && $link !~ /PageIndex/ # Bards Check && $text !~ /IMG/ # Bards Check # && $link !~ /\#M/ # EQ Boards Check && $link !~ /page=/ # EQ Boards Check ) { # not a link to the last page $text =~ s/[^A-Za-z0-9 ,\.:'\/\\-]//ig; # Parse out weird characters $text =~ s/Last Post //ig; # Remove "last post" from vBulletin $link =~ s/&/&/ig; # decode & $link =~ s/&/&/ig; # Encode & $link =~ s/&sid=.*//ig; # remove session IDs from Safehouse $link =~ s/\?s=(.*)&/\?/ig; # remove session IDs from EQClerics if ($link !~ /http:/) { $link = $urlpath.$link; } # Rebuild the link path if it has none for vBulletin if ($link =~ /eqforums\.station/) { $link =~ s/\/eq\/\/eq/\/eq/ig; } # I'm lazy. If its an EQ Live link, remove the extra eq if ($link =~ /eqiiforums\.station/) { $link =~ s/\/eq2\/\/eq2/\/eq2/ig; } # I'm still lazy. If its an EQ2 link, remove the extra eq2 print "\n$text\n$link\n\n\n"; # print the RSS link title pair $count ++; # increase counter } } # End of the RSS feed print " "; exit (0);