#!/usr/local/bin/perl # unbib $version = "0.95BETA"; # by Jon Van Oast (jon@kzsu.stanford.edu) # see also: http://kzsu.stanford.edu/uwi/about-unbib.html # # It is sloppy perl code that checks NCSA httpd(1.4)-made 'referer_log' # files and builds a list of all/some local web pages and URLs that could # potentially point at them. # # A "data file" is used to store the list for the next usage (when it # is then read in), and standard out is the lucky recipient of the # HTML marked-up junk that will contain the back-links. # Raw data (array) of form: LOCAL_URL X back-links [i.e., more valid].) # # Example usage: unbib unbib.dat > unbib.html $IGNORE = 0; $STRONG = 7; $SHOW_COUNT = 0; print "\n"; # $STORED is the single expected arg -- it is where to save for next time ($STORED,$FILE) = @ARGV; die "Usage: unbib \n" if (($STORED eq "") || ($FILE eq "")); if (open(STORED,$STORED)) { @LINKS = ; chop(@LINKS); close(STORED); } else { print "\n"; } #fairly useful stats $prior_to_count = @LINKS + 1 if ($#LINKS > 0); $to_count = $prior_to_count; #the location/name of the referer_log is hard-coded now. this sucks. #this will "get better" in the next version. #$FILE = "/tmp/htlogs/referer_log"; open(FILE,$FILE) || die "Bad referer_log $FILE!\n"; chop(@REF = ); close(FILE); @REF = grep(/ (\/uwi|\/jon)/,@REF); foreach $REF (@REF) { #this first conditional is a quick weed-out to ignore any local docs you #don't care about. since referer_log entries are of the form #REFERER_URL -> LOCAL_FILE, i just look for " " in the #line to mean that we should process this one. #if (/ \/uwi/) { { ($from,$to) = split(/ \-> /,$REF); #these next two lines may be unnecessary, but i was having trouble #with url args (the +) causing probs. the *next* pair of lines #should solve the problem, so expect these gone soon. #$to =~ tr/\+/\-/; #$from =~ tr/\+/\-/; #these two lines chop off anything after (and including) a '?' in #the url. i did this cuz i dont care about args... cuz, well, i dont. $to = $` if ($to =~ /\?|".$STORED)) { $ok = 1; } else { #(theoretically) if can't write to $STORED dump raw data to stdout print "*** UNABLE TO OPEN $STORED AS DATA FILE ***\n"; print "*** DATA FILE SENT TO STANDARD OUT ***\n\n"; print @LINKS; print "\n\n"; } #dont think this is necessary -- #$from_count++; chop($now = `date`); #makes the html part, to stdout $c1 = $IGNORE + 1; $c2 = $STRONG + 1; print "Unbibliography

Unbibliography

About unbib $version.

File updated: $now
Count of pages linked: $to_count ($prior_to_count from before)
New from-links added: $from_count
Ignore <= $c1 > Weak <= $c2 > Strong


\n"; foreach $link (@LINKS) { print STORED "$link\n" if ($ok); @EACH = split(/\$orig ($count) - "; foreach $each (@EACH) { ($each_ct,$each) = split(/\{/,$each); if (($each_ct > $IGNORE) && ($each_ct < $STRONG)) { $char = '.'; } else { $char = "#"; } $char = "$each_ct:$char" if ($SHOW_COUNT); print "($char) " if ($each_ct > $IGNORE); } } print "


$total unbibliographical references

\n"; close(STORED) if ($ok);