doc/data/cgi/search.cgi
John Fieber 38df6f51c6 Web pages, may I introduce you to CVS. CVS, these are the web pages.
Yes, this is supposed to be a new top level in the repository.

For the moment any changes to this area must be cleared by myself
or Jordan.  Once the kinks are worked out, the policy will probably
be relaxed.
1996-09-24 17:46:04 +00:00

257 lines
6.8 KiB
Perl
Executable file

#!/usr/bin/perl
#
# mail-archive.pl -- a CGI interface to a wais indexed maling list archive.
#
# Origin:
# Tony Sanders <sanders@bsdi.com>, Nov 1993
#
# Hacked beyond recognition by:
# John Fieber <jfieber@cs.smith.edu>, Nov 1994
#
# Format the mail messages a little nicer.
# Add code to check database status before searching.
# John Fieber <jfieber@indiana.eud>, Aug 1996
#
# Disclaimer:
# This is pretty ugly in places.
$server_root = '/usr/local/www';
$waisq = "/usr/local/www/bin/waisq";
$sourcepath = "/f/jfieber/index";
$hints = "/searchhints.html";
$myurl = "/cgi/search.cgi";
require "open2.pl";
require "cgi-lib.pl";
require "cgi-style.pl";
@months = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec');
sub do_wais {
&ReadParse;
@FORM_words = split(/ /, $in{"words"});
@FORM_source = split(/\0/, $in{"source"});
$FORM_max = $in{"max"};
$FORM_docnum = $in{"docnum"};
@AVAIL_source = &checksource(@FORM_source);
if ($#FORM_source != $#AVAIL_source) {
$j = 0;
$k = 0;
foreach $i (0 .. $#FORM_source) {
if ($FORM_source[$i] ne $AVAIL_source[$j]) {
$badsource[$k] = $FORM_source[$i];
$k++;
} else {
$j++;
}
}
$badsource = join("</em>, <em>", @badsource);
$badsource =~ s/,([^,]*)$/ and $1/;
if ($#FORM_source - $#AVAIL_source > 1) {
$availmsg = "<p>[The <em>$badsource</em> archives are currently unavailable.]</p>";
} else {
$availmsg = "<p>[The <em>$badsource</em> archive is currently unavailable.]</p>";
}
}
if ($#AVAIL_source < 0) {
$i = join("</em>, <em>", @FORM_source);
$i =~ s/,([^,]*)$/ and $1/;
print &html_header("Mail Archive Search") .
"<p>None of the archives you requested (<em>$i</em>) are available at " .
"this time.</p>\n";
print "<p>Please try again later, or return to the " .
"search page and select a different archive.</p>\n";
print &html_footer;
exit 0;
}
# Now we formulate the question to ask the server
foreach $i (@AVAIL_source) {
$w_sources .= "(:source-id\n :filename \"$i.src\"\n ) ";
}
$w_question = "\n (:question
:version 2
:seed-words \"@FORM_words\"
:relevant-documents
( )
:sourcepath \"$sourcepath/:\"
:sources
( $w_sources )
:maximum-results $FORM_max
:result-documents
( )
)\n";
#
# First case, no document number so this is a regular search
#
if (length($FORM_docnum) == 0) {
print &html_header("Search Results");
print $availmsg;
if ($#AVAIL_source > 0) {
$src = join("</em>, <em>", @AVAIL_source);
$src =~ s/,([^,]*)$/ and $1/;
print "<p>The archives <em>$src</em> contain ";
}
else {
print "The archive <em>@AVAIL_source</em> contains ";
}
print " the following items relevant to \`@FORM_words\':\n";
print "<OL>\n";
&open2(WAISOUT, WAISIN, $waisq, "-g");
print WAISIN $w_question;
local($hits, $score, $headline, $lines, $bytes, $type, $date, $file);
while (<WAISOUT>) {
/:score\s+(\d+)/ && ($score = $1);
/:filename "(.*)"/ && ($file = $1);
/:number-of-lines\s+(\d+)/ && ($lines = $1);
/:number-of-bytes\s+(\d+)/ && ($bytes = $1);
/:type "(.*)"/ && ($type = $1);
/:headline "(.*)"/ && ($headline = $1); # XXX
/:date "(\d+)"/ && ($date = $1, $hits++, &docdone);
}
print "</OL>\n";
print "<hr><p>Didn't get what you expected? ";
print "<a href=\"$hints\">Look here for searching hints</a>.";
if ($hits == 0) {
print "Nothing found.\n";
}
print &html_footer;
close(WAISOUT);
close(WAISIN);
}
#
# Second Case, a document number was supplied
#
else {
print &html_header("Search Results: Document");
&open2(WAISOUT, WAISIN, $waisq, "-g");
print WAISIN $w_question;
while (<WAISOUT>) {
s/search_word: boolean \`and\' scored//g;
$w_result .= $_;
}
close(WAISOUT);
close(WAISIN);
&open2(WAISOUT, WAISIN, $waisq, "-v", $FORM_docnum);
print WAISIN $w_result;
while (<WAISOUT>) {
$foo .= $_;
}
&printdoc($foo);
print &html_footer;
close(WAISOUT);
close(WAISIN);
}
}
# Given an array of sources (sans .src extension), this routine
# checks to see if they actually exist, and if they do, if they
# are currently available (ie, not being updated). It returns
# an array of sources that are actually available.
sub checksource {
local (@sources) = @_;
$j = 0;
foreach $i (@sources) {
if (stat("$sourcepath/$i.src")) {
if (!stat("$sourcepath/$i.update.lock")) {
$goodsources[$j] = $i;
$j++;
}
}
}
return(@goodsources);
}
# Print a mail message in HTML form
sub printdoc {
local ($doc) = @_;
($header, $body) = split(/\n\n/, $doc, 2);
$body = &htmlescape($body);
$header = &htmlescape($header);
$header =~ s/\n */ /g;
foreach $i (split(/\n/, $header)) {
($field, $data) = split(/ /, $i, 2);
$hdr{$field} = $data;
}
print "<BODY>\n<pre>\n";
if (length($hdr{'Date:'}) > 0) {
print "<strong>Date: </strong> $hdr{'Date:'}\n";
}
if (length($hdr{'From:'}) > 0) {
print "<strong>From: </strong> $hdr{'From:'}\n";
}
if (length($hdr{'To:'}) > 0) {
print "<strong>To: </strong> $hdr{'To:'}\n";
}
if (length($hdr{'Cc:'}) > 0) {
print "<strong>Cc: </strong> $hdr{'Cc:'}\n";
}
if (length($hdr{'Sender:'}) > 0) {
print "<strong>Sender: </strong> $hdr{'Sender:'}\n";
}
if (length($hdr{'Subject:'}) > 0) {
print "<strong>Subject: </strong> $hdr{'Subject:'}\n";
}
print "</pre>\n";
print "<hr>\n<pre>\n$body\n</pre>\n";
}
sub htmlescape {
local ($data) = @_;
$data =~ s/&/&amp;/g;
$data =~ s/</&lt;/g;
return $data;
}
sub docdone {
$file =~ s/\.src$//;
if ($headline =~ /Search produced no result/) {
print "<p>The archive <em>$file</em> contains no relevant documents.</p>"
} else {
$headline = &htmlescape($headline);
$headline =~ s/\\"/\"/g;
print "<li><A HREF=\"${myurl}?$ENV{'QUERY_STRING'}&docnum=$hits\">$headline</A>\n";
print "<br>";
# print "<input type=\"checkbox\" name=\"rf\" value=\"$docnum\">";
print "Score: <em>$score</em>; ";
$_ = $date;
/(..)(..)(..)/ && ($yr = $1 + 1900, $mo = $months[$2 - 1], $dy = $3);
print "Lines: <em>$lines</em>; ";
print "${dy}-${mo}-${yr}; ";
print "Archive: <em>$file</em>";
print "<p></p></li>\n";
}
$score = $headline = $lines = $bytes = $type = $date = $file = '';
}
$| = 1;
open (STDERR,"> /dev/null");
eval '&do_wais';