doc/en_US.ISO8859-1/htdocs/cgi/GnatsPR.pm
Hiroki Sato 35babe0ae5 - Set svn:ketwords and svn:mime-type. Note that application/octet-stream is
set on some files as a workaround for binary check.
- Fix pathname for svn co in the webupdate script.

Approved by:	doceng (implicit)
2012-05-17 19:12:14 +00:00

641 lines
17 KiB
Perl

#!/usr/bin/perl -Tw
#------------------------------------------------------------------------------
# Copyright (C) 2011, Shaun Amott <shaun@FreeBSD.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# $FreeBSD$
#------------------------------------------------------------------------------
package GnatsPR;
#use MIME::Base64; # ports/converters/p5-MIME-Base64
#use MIME::QuotedPrint; #
#use Convert::UU qw(uudecode uuencode); # ports/converters/p5-Convert-UU
use GnatsPR::Section::Text;
use GnatsPR::Section::Patch;
use GnatsPR::Section::Email;
use GnatsPR::Section::StateChange;
use GnatsPR::Section::FieldStart;
use GnatsPR::SectionIterator;
use strict;
require 5.006;
#------------------------------------------------------------------------------
# Constants
#------------------------------------------------------------------------------
use constant ENCODING_BASE64 => 1;
use constant ENCODING_QP => 2;
use constant PATCH_ANY => 0x0001;
use constant PATCH_DIFF => 0x0002;
use constant PATCH_UUENC => 0x0004;
use constant PATCH_UUENC_BIN => 0x0008;
use constant PATCH_SHAR => 0x0010;
use constant PATCH_BASE64 => 0x0020;
#------------------------------------------------------------------------------
# Func: new()
# Desc: Constructor - Parses data if provided.
#
# Args: [data] - Raw data (or ref. to) from query-pr (optional)
#
# Retn: $self
#------------------------------------------------------------------------------
sub new
{
my $class = shift;
my ($data) = @_;
my $self = {
blobs_single => {}, # Raw text, single-line fields
blobs_multi => {}, # Raw text, multi-line fields
header => {}, # E-mail header bits
sections => {}, # Hash of arrayrefs of sections
fromwebform => 0, # PR came from the web form?
numfields => 0 # Number of fields we have
};
bless $self, $class;
if (defined $data) {
ref $data
? $self->Parse($data)
: $self->Parse(\$data);
}
return $self;
}
#------------------------------------------------------------------------------
# Func: Header()
# Desc: Return a value from the header hash.
#
# Args: $key - Header name, case insensitive.
#
# Retn: $val - Value.
#------------------------------------------------------------------------------
sub Header
{
my $self = shift;
my ($key) = @_;
return $self->{header}->{lc $key};
}
#------------------------------------------------------------------------------
# Func: FieldSingle()
# Desc: Return a single line field value.
#
# Args: $key - Field name.
#
# Retn: $val - Value.
#------------------------------------------------------------------------------
sub FieldSingle
{
my $self = shift;
my ($key) = @_;
return $self->{blobs_single}->{$key};
}
#------------------------------------------------------------------------------
# Func: Parse()
# Desc: Parse a blob of text from query-pr into a structured unit for easy
# manipulation.
#
# Args: \$data - Raw data from query-pr (non-ref scalar is acceptable too).
#
# Retn: n/a
#------------------------------------------------------------------------------
sub Parse
{
my $self = shift;
my ($data) = @_;
my $ismulti = 0;
my $pastheader = 0;
# GNATS ensures that > isn't allowed as the first
# character on a line, except for field headers.
# Any lines beginning with > will be shunted into
# 'Unformatted'
my @fieldblobs = split /\n>(?!\s)/m, $$data;
# In the rare case the Unformatted field did
# have some debris, be sure to assemble it back
# into a complete section.
while ($fieldblobs[$#fieldblobs] !~ /^Unformatted:/) {
my $last = pop @fieldblobs;
exists $fieldblobs[$#fieldblobs] or last;
$fieldblobs[$#fieldblobs] .= $last;
}
foreach my $blob (@fieldblobs) {
my $key;
# Parse e-mail header; we only care about a few
# fields, not the e-mail routing stuff.
if (!$pastheader) {
foreach my $line (split /\n/, $blob) {
if ($line =~ /^(\S+):\s*(.*)$/) {
my $val = $2;
$key = lc $1;
# Ignore multiple defs (e.g. Received: headers)
exists $self->{header}->{$key}
and next;
$self->{header}->{$key} = $val;
} elsif ($line =~ /^\s+(.*)$/) {
my $val = $1;
defined $key
or next;
# No field to append to
exists $self->{header}->{$key}
or next;
$self->{header}->{$key} .= "\n$val";
}
}
$pastheader = 1;
next;
}
if ($blob =~ s/^([^:]+):(\n|\s*)//) {
$key = $1;
#$ismulti = ($2 and $2 eq "\n");
# It's multi-liners from here on in
$key eq 'Organization'
and $ismulti = 1;
} else {
# Hmm...
next;
}
# Remove leading/trailing whitespace
$blob =~ s/^[\n\s]+//;
$blob =~ s/[\n\s]+$//;
if ($ismulti) {
$self->{blobs_multi}->{$key} = $blob;
} else {
$self->{blobs_single}->{$key} = $blob;
}
}
$self->{numfields} = scalar @fieldblobs;
$self->{fromwebform} =
$self->{header}->{'x-send-pr-version'} =~ /^www-/;
$self->ParseBlobs();
}
#------------------------------------------------------------------------------
# Func: ParseBlobs()
# Desc: Parse all the raw field "blobs" into section objects.
#
# Args: n/a
#
# Retn: n/a
#------------------------------------------------------------------------------
sub ParseBlobs
{
my $self = shift;
foreach my $field (keys %{$self->{blobs_multi}}) {
$self->{sections}->{$field} = [];
push @{$self->{sections}->{$field}},
new GnatsPR::Section::FieldStart($field);
if ($field eq 'Fix') {
$self->ParsePatches($field, \($self->{blobs_multi}->{$field}));
next;
}
if ($field eq 'Audit-Trail') {
# We'll break up the Audit-Trail field by change events.
# This is the most reliable way to split, although it's far
# from perfect. We'll then look for e-mail responses inside
# each chunk for further splitting later.
#
# Notes/Caveats:
# - If someone happened to paste an audit trail event
# inside another's "Why" field, it'd break this. I haven't
# seen this yet and don't expect to.
# - The From-To field has to come first. No reason it wouldn't
# under normal circumstances.
# - Pasted e-mails in the Why field will be promoted to
# responses, although they often break the GNATS conventions
# we (ab)use to find e-mails (e.g.: leading space on message
# body lines), which makes this more difficult.
my @auditevents =
split /(?=^(?:[A-Za-z_]+)-Changed-From-To: (?:.*?)\s*$)/m,
$self->{blobs_multi}->{$field};
foreach my $evt (@auditevents) {
my $sect = new GnatsPR::Section::StateChange;
my $gotwhat = 0;
my $gotsect = 0;
while ($evt =~ s/^([A-Za-z_]+)-Changed-([A-Za-z_-]+?): (.*?)\s*\n//) {
my ($what, $key, $val) = ($1, $2, $3);
if (!$gotwhat) {
$sect->what($what);
$gotwhat = 1;
}
$gotsect = 1;
if ($key eq 'From-To') {
my $fromto = $val;
if ($fromto =~ /^(.*)->(.*)$/) {
$sect->from($1);
$sect->to($2);
}
} elsif ($key eq 'When') {
$sect->when($val);
} elsif ($key eq 'By') {
$sect->by($val);
} elsif ($key eq 'Why') {
# This is the last one; it's a multi-line
# field (remainder of the text.)
last;
}
}
push @{$self->{sections}->{$field}}, $sect
if ($gotsect);
# Now look for blocks that appear to be e-mail replies
# Note: these header fields are the only ones we allow
# as the first header; we could feasibly back-
# track to find the start of the block (in case
# we're not there already), but the more headers
# we accept the more likely this will break on
# some unexpected content.
my $next_email = qr/^(From|To|Cc|Subject|Date): (.*)$/m;
my $gotwhy = 0;
while ($evt =~ /$next_email/) {
my $match_start = $-[0];
my ($header, $body, $indented);
my $why;
$match_start > 0
and $why = substr($evt, 0, $match_start, '');
if ($gotsect) {
# We now know where "Why" terminates
$sect->why($why) if $sect;
} elsif ($why) {
# If the first block was a date block,
# we need to use a text section for the
# intermediate text instead.
push @{$self->{sections}->{$field}},
new GnatsPR::Section::Text($why)
unless ($why =~ /^[\n\s]+$/);
}
$gotwhy = 1;
$sect = undef;
# Chop leading blank lines
$evt =~ s/^\n+//;
if ($evt =~ /^$/m) {
# First blank line signals the end of the
# e-mail header
$header = substr($evt, 0, $+[0]+1, '');
# Deciding where the body ends is more
# difficult...
# First, let's see if the message is
# indented (per GNATS standards)
$indented = ($evt =~ /^ /);
# If, so, find the next blank line, which
# signals the body end.
if ($indented) {
if ($evt =~ /^$/m) {
$body = substr($evt, 0, $-[0], '');
} else {
$body = $evt;
}
$body =~ s/^ //mg; # Remove indent char
} else {
# Look for another e-mail block
if ($evt =~ /$next_email/) {
$body = substr($evt, 0, $-[0], '');
} else {
# Otherwise, use the whole section
$body = $evt;
}
}
push @{$self->{sections}->{$field}},
new GnatsPR::Section::Email($header, $body);
} else {
# No end-of-header marker: no choice but to
# dump the (possible) e-mail into the Why field
if ($gotsect && $sect) {
$sect->why($evt);
} elsif ($evt) {
push @{$self->{sections}->{$field}},
new GnatsPR::Section::Text($evt);
}
}
}
# Check for dangling "Why" block
if (!$gotwhy) {
if ($gotsect && $sect) {
$sect->why($evt);
} elsif ($evt) {
push @{$self->{sections}->{$field}},
new GnatsPR::Section::Text($evt);
}
}
}
next;
}
# Everything else is just text
push @{$self->{sections}->{$field}},
new GnatsPR::Section::Text($self->{blobs_multi}->{$field});
}
}
#------------------------------------------------------------------------------
# Func: ParsePatches()
# Desc: Parse the patches out of the given blob of text, emitting Patch and
# Text sections as appropriate.
#
# Args: $field - Field to push new sections to.
# \$text - Raw text
#
# Retn: n/a
#------------------------------------------------------------------------------
sub ParsePatches
{
my $self = shift;
my ($field, $text) = @_;
while (my $pi = $self->FindPatchStart($text)) {
# Everything up to this fragment can be
# promoted to a text section
push @{$self->{sections}->{$field}},
new GnatsPR::Section::Text(substr(
$$text,
0,
$pi->{start},
''
))
unless $pi->{start} == 0;
$pi->{start} = 0;
$self->FindPatchEnd($text, $pi);
# Try to determine if a web/send-pr attachment
# has another type of patch inside.
if ($pi->{type} eq 'stdattach' or $pi->{type} eq 'webattach') {
if (my $pi2 = $self->FindPatchStart($text)) {
# Upgrade to more specific type
$pi->{type} = $pi2->{type}
if ($pi2->{start} == 0);
}
}
push @{$self->{sections}->{$field}},
new GnatsPR::Section::Patch(substr(
$$text,
0,
$pi->{size},
''
), $pi->{name}, $pi->{type});
$$text =~ s/^[\n\s]+//;
}
# Rest of the field is text
push @{$self->{sections}->{$field}},
new GnatsPR::Section::Text($$text)
if ($$text);
$text = '';
}
#------------------------------------------------------------------------------
# Func: FindPatchStart()
# Desc: Find the beginning of the first patch inside the given text blob,
# if there is one.
#
# Args: \$text - Raw text
#
# Retn: \%pi - Hash of patch info (or undef):
# - start - Start offset of patch
# - type - Type of attachment found
# - name - Filename, if available
#------------------------------------------------------------------------------
sub FindPatchStart
{
my $self = shift;
my ($text) = @_;
# Patch from web CGI script. Characteristics:
# - Only ever one of them.
# - Appended to the end of Fix:
# - Blank line after header line
# - Could contain other types of patch (e.g. shar(1) archive)
if ($$text =~ /^Patch attached with submission follows:$/m && $self->{fromwebform}) {
my $start = $+[0]; # The newline on the above
# Next non-blank line (i.e. start of patch)
if ($$text =~ /\G^./m) {
$start += $+[0]+1;
return {start => $start, type => 'webattach'};
}
return undef;
}
# Patch from send-pr(1). Characteristics:
# - Has header and footer line.
# - Appended to the end of Fix:
# - User has an opportunity to edit/mangle.
# - Could contain other types of patch (e.g. shar(1) archive)
if ($$text =~ /^---{1,8}\s?([A-Za-z0-9-_.,:%]+) (begins|starts) here\s?---+\n/mi) {
my $r = {start => $-[0], type => 'stdattach', name => $1};
# Chop header line
substr($$text, $-[0], $+[0] - $-[0], '');
return $r;
}
# Patch files from diff(1). Characteristics:
# - Easy to find start.
# - Difficult to find the end.
$$text =~ /^((?:(?:---|\*\*\*)\ (?:\S+)\s*(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\ .*)
|(?:(?:---|\*\*\*)\ (?:\S+)\s*(?:\d\d\d\d-\d\d-\d\d\ \d\d:\d\d:\d\d\.\d+)\ .*)
|(diff\ -.*?\ .*?\ .*)|(Index:\ \S+)
|(\*{3}\ \d+,\d+\ \*{4}))$/mx
and return {start => $-[0], type => 'diff'};
# Shell archive from shar(1)
$$text =~ /^# This is a shell archive\. Save it in a file, remove anything before/m
and return {start => $-[0], type => 'shar'};
# UUencoded file. Characteristics:
# - Has header and footer.
$$text =~ /^begin \d\d\d (.*)/m
and return {start => $-[0], type => 'uuencoded', name => $1};
# Base64 encoded file. Characteristics:
# - Has header and footer.
$$text =~ /^begin-base64 \d\d\d (.*)/m
and return {start => $-[0], type => 'base64', name => $1};
return undef;
}
#------------------------------------------------------------------------------
# Func: FindPatchEnd()
# Desc: Find the end of the first patch inside the given text blob, if any.
#
# Args: \$text - Raw text
# \%pi - Patch info hash from FindPatchStart(). We'll add more data:
# - size - Length of the patch.
#
# Retn: \%pi - Same as above, except undef will be returned if no actual
# endpoint was found (size in pi would extend to the end of the
# text blob in this case.)
#------------------------------------------------------------------------------
sub FindPatchEnd
{
my $self = shift;
my ($text, $pi) = @_;
$pi->{size} = 0;
if ($pi->{type} eq 'webattach') {
$$text =~ /$/
and $pi->{size} = $+[0];
} elsif ($pi->{type} eq 'stdattach') {
$$text =~ /^---{1,8}\s?\Q$pi->{name}\E ends here\s?---+/mi
and $pi->{size} = $-[0]-1;
# Chop footer line
substr($$text, $-[0], $+[0] - $-[0], '');
} elsif ($pi->{type} eq 'diff') {
# XXX: could do better
$$text =~ /^$/m
and $pi->{size} = $-[0]-1;
} elsif ($pi->{type} eq 'shar') {
$$text =~ /^exit$/m
and $pi->{size} = $+[0];
} elsif ($pi->{type} eq 'uuencoded') {
$$text =~ /^end$/m
and $pi->{size} = $+[0];
} elsif ($pi->{type} eq 'base64') {
$$text =~ /^====$/m
and $pi->{size} = $+[0];
}
if ($pi->{size} == 0) {
$pi->{size} = length $$text;
return undef;
}
return $pi;
}
#------------------------------------------------------------------------------
# Func: GetAttachment()
# Desc: Recursively search sections for a downloadable attachment.
#
# Args: $num - Attachment index (counts from 1)
#
# Retn: $sec - Attachment section (or undef)
#------------------------------------------------------------------------------
sub GetAttachment
{
my $self = shift;
my ($num) = @_;
my $cur = 1;
my $iter = GnatsPR::SectionIterator->new($self, 'Fix', 'Audit-Trail');
while (my $item = $iter->next()) {
if (ref $item eq 'GnatsPR::Section::Patch') {
# Patch sections
return $item if ($cur++ == $num);
} elsif (ref $item eq 'GnatsPR::Section::Email') {
# Attachments from MIME messages
my $mime_iter = GnatsPR::MIMEIterator->new($item);
while (my $part = $mime_iter->next()) {
if ($part->isattachment) {
return $part if ($cur++ == $num);
}
}
}
}
return undef;
}
1;