Script to prune bogus and otherwise worthless
gallery entries from the gallery.db file.
This commit is contained in:
parent
ee10650ab7
commit
932548a791
Notes:
svn2git
2020-12-08 03:00:23 +00:00
svn path=/www/; revision=3981
1 changed files with 81 additions and 0 deletions
81
en/gallery/prune.pl
Executable file
81
en/gallery/prune.pl
Executable file
|
@ -0,0 +1,81 @@
|
|||
#!/usr/local/bin/perl
|
||||
#
|
||||
# prune.pl - Script to prune gallery.db file of malformed
|
||||
# and otherwise worthless entries. And while we're at it,
|
||||
# count the damned things.
|
||||
#
|
||||
# Syntax: prune.pl galleryfile.db outfile.db
|
||||
#
|
||||
# yymmdd own comment
|
||||
# ------ --- ------------------------------------------------
|
||||
# 981229 nsj First pass
|
||||
|
||||
# Setup
|
||||
# Which sort and uniq programs are we using?
|
||||
$sort = "/usr/bin/sort";
|
||||
$uniq = "/usr/bin/uniq";
|
||||
|
||||
# Open a pipe from a unix sort of the db file (case insensitive)
|
||||
if (-f $ARGV[0])
|
||||
{
|
||||
$infile = $ARGV[0];
|
||||
} else {
|
||||
die "File $ARGV[0] is unreadable or does not exist";
|
||||
}
|
||||
|
||||
if ($ARGV[1] eq "") {
|
||||
die "You must supply an output filename.";
|
||||
} else {
|
||||
# Open the output file for writing.
|
||||
open(OUTFILE,">$ARGV[1]");
|
||||
};
|
||||
|
||||
# Open the (sorted) file
|
||||
open(DBFILE, "$sort -f $infile | $uniq |");
|
||||
|
||||
# Set variables so that we can remove dupe entries.
|
||||
my $lastname = "";
|
||||
my $lasturl = "";
|
||||
|
||||
# Initialize counter variables.
|
||||
$numc = 0;
|
||||
$numnp = 0;
|
||||
$nump = 0;
|
||||
|
||||
# Iterate through each line, throwing out those that don't
|
||||
# go in this gallery. Output each entry as list elements.
|
||||
while (<DBFILE>)
|
||||
{
|
||||
chomp;
|
||||
|
||||
# Split the db line into its component parts.
|
||||
($type, $name, $url, $description, $email, $dateadd, $datever) =
|
||||
m/([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]*)\t([^\t]+)\t([^\t]+)\t([^\t]+)/;
|
||||
|
||||
# Skip bogus and worthless entries while cleaning up malformed
|
||||
# http headers.
|
||||
next if ($name =~ m/^$|^\s+$/);
|
||||
next if ($url =~ m/^$|^\s+$|^http:\/\/\s+$/);
|
||||
next if ( ($lastname eq $name) && ($lasturl eq $url) );
|
||||
$url = "http://" . $url unless ($url =~ m/^http:\/\/.*$/);
|
||||
|
||||
print OUTFILE "$type\t$name\t$url\t$description\t$email\t$dateadd\t$datever\n";
|
||||
|
||||
$numc++ if ($type =~ m/commercial/);
|
||||
$numnp++ if ($type =~ m/non\s?profit/);
|
||||
$nump++ if ($type =~ m/personal/);
|
||||
|
||||
$lastname = $name;
|
||||
$lasturl = $url;
|
||||
|
||||
};
|
||||
|
||||
# Close the pipe & file like good little daemons.
|
||||
close(DBFILE);
|
||||
close(OUTFILE);
|
||||
|
||||
# Print Statistics
|
||||
print "Commercial: $numc\n";
|
||||
print "Non-profit: $numnp\n";
|
||||
print "Personal: $nump\n";
|
||||
print "Total: ",$numc+$numnp+$nump,"\n";
|
Loading…
Reference in a new issue