#!/usr/bin/perl # ----------------------Copyright Information----------------------------------------------- # Copyright (c) 2005, Rami Chowdhury & Robert Bradley # All rights reserved. # # Redistribution and use in source and binary forms, with or without modification, are # permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, this # list of conditions and the following disclaimer in the documentation and/or # other materials provided with the distribution. # * The names of the authors may not be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT # SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCI- # DENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSI- # NESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #------------------------------------------------------------------------------------------------ # IF YOU WANT TO CUSTOMIZE YOUR SCRIPT PLEASE SEE THE GLOBAL CONFIGURATION # SECTION BELOW use warnings; use strict; use File::Find; ######################################################### # SCRIPT GLOBAL CONFIGURATION. PLEASE MODIFY THESE TO FIT YOUR NEEDS. NO TRAILING / IS NEEDED # This is the 'root' directory of your website - calls to http://www.example.com are served from here and its # subdirectories my $WEBROOT = "/home/USER/public_html"; # These are the URLs that you use to access your own site from the Web - http://www.example.com, for instance # All slashes have to have a backslash placed before them, to make sure the regular expressions that will use # them don't choke. my @SITEURLS = ( "http:\/\/www.example.org", "http:\/\/www.example.com" ); # These specify how verbose the script's screen output will be (0 for none, 1 for broken links, 2 for all), # and whether or not it will log data to a file (0 for no log, otherwise specify filename inside quotes) my $VERBOSE = 1; my $LOG = 0;#"/home/USER/links.log"; my $DIRS = 0; # END CONFIGURATION ######################################################### ####################################################### # SUBROUTINES AND STUFF # Stores the main data my @AllLinks; my @CheckedLinks; my @BrokenLinks; # Puts the URLs' regular expression in a variable # foreach my $URL (@SITEURLS) { # $URL =~ s|/|\/|gi; # } sub getRelPath { my ($string) = @_; my $ROOT = $WEBROOT; $ROOT =~ s/\//\\\//gi; $string =~ s/$ROOT//i; unless ($string =~ /^\//) { $string = "/$string"; } unless ($string =~ /\/$/) { $string = "$string/" } return $string; } sub parsePage { my ($pageName,$relPath) = @_; my @links; open(DATA,"< $pageName") or die("Unable to open $relPath$pageName"); my $lineNumber = 1; while () { my $line = $_; while ($line =~ s/Parent --log="FILE" Sets it to write results to FILE Any or all of these can be used in conjunction, though --help will stop program execution. Have fun! ENDHELP exit; } else { print "Invalid option. Use -h or --help for usage instructions.\n"; die("Invalid option specified"); } } } # uses File::Find for recursive checking find( sub { if (/^\./) { return } if (/\.(htm|php|shtm)/) { parsePage($_,getRelPath($File::Find::dir)); print ">"; } },$WEBROOT); # counter variables my $one; my @two; # iterates over the AllLinks, making CheckedLinks foreach $one (@AllLinks) { checkLink(@$one); } # prints the simple report my $numOfLinks = @CheckedLinks; my $numBroken = @BrokenLinks; my $percentBroken = ($numBroken/$numOfLinks)*100; print "\n--$numOfLinks links in total, of which $numBroken ($percentBroken%) are broken--\n"; if ($VERBOSE > 0) { print "Broken links: ($numBroken)\n"; foreach $one (@BrokenLinks) { @two = @$one; printf "$two[0] link at line $two[1], in $two[2]$two[3]\n"; } if ($VERBOSE > 1) { print "All links: ($numOfLinks)\n"; foreach $one (@CheckedLinks) { @two = @$one; printf "$two[0] link at line $two[1], in $two[2]$two[3]\n"; } } } if ($LOG ne 0) { open(LOGF,">$LOG") or die("Unable to open log file."); my $now = localtime time; print LOGF "Link Checker Activity Log - Program executed on $now\n"; print LOGF "\n--$numOfLinks links in total, of which $numBroken ($percentBroken%) are broken--\n"; print LOGF "Broken links:\n"; foreach $one (@BrokenLinks) { @two = @$one; printf LOGF "\t$two[0] link at line $two[1], in $two[2]$two[3]\n"; } print LOGF "\n"; print LOGF "All checked links:\n"; foreach $one (@CheckedLinks) { @two = @$one; print LOGF "\t$two[0] link at line $two[1], in $two[2]$two[3]\n"; } close(LOGF); }