set on some files as a workaround for binary check. - Fix pathname for svn co in the webupdate script. Approved by: doceng (implicit)
641 lines
17 KiB
Perl
641 lines
17 KiB
Perl
#!/usr/bin/perl -Tw
|
|
#------------------------------------------------------------------------------
|
|
# Copyright (C) 2011, Shaun Amott <shaun@FreeBSD.org>
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
|
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
|
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
# SUCH DAMAGE.
|
|
#
|
|
# $FreeBSD$
|
|
#------------------------------------------------------------------------------
|
|
|
|
package GnatsPR;
|
|
|
|
#use MIME::Base64; # ports/converters/p5-MIME-Base64
|
|
#use MIME::QuotedPrint; #
|
|
#use Convert::UU qw(uudecode uuencode); # ports/converters/p5-Convert-UU
|
|
|
|
use GnatsPR::Section::Text;
|
|
use GnatsPR::Section::Patch;
|
|
use GnatsPR::Section::Email;
|
|
use GnatsPR::Section::StateChange;
|
|
use GnatsPR::Section::FieldStart;
|
|
|
|
use GnatsPR::SectionIterator;
|
|
|
|
use strict;
|
|
|
|
require 5.006;
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Constants
|
|
#------------------------------------------------------------------------------
|
|
|
|
use constant ENCODING_BASE64 => 1;
|
|
use constant ENCODING_QP => 2;
|
|
|
|
use constant PATCH_ANY => 0x0001;
|
|
use constant PATCH_DIFF => 0x0002;
|
|
use constant PATCH_UUENC => 0x0004;
|
|
use constant PATCH_UUENC_BIN => 0x0008;
|
|
use constant PATCH_SHAR => 0x0010;
|
|
use constant PATCH_BASE64 => 0x0020;
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Func: new()
|
|
# Desc: Constructor - Parses data if provided.
|
|
#
|
|
# Args: [data] - Raw data (or ref. to) from query-pr (optional)
|
|
#
|
|
# Retn: $self
|
|
#------------------------------------------------------------------------------
|
|
|
|
sub new
|
|
{
|
|
my $class = shift;
|
|
my ($data) = @_;
|
|
|
|
my $self = {
|
|
blobs_single => {}, # Raw text, single-line fields
|
|
blobs_multi => {}, # Raw text, multi-line fields
|
|
header => {}, # E-mail header bits
|
|
sections => {}, # Hash of arrayrefs of sections
|
|
fromwebform => 0, # PR came from the web form?
|
|
numfields => 0 # Number of fields we have
|
|
};
|
|
|
|
bless $self, $class;
|
|
|
|
if (defined $data) {
|
|
ref $data
|
|
? $self->Parse($data)
|
|
: $self->Parse(\$data);
|
|
}
|
|
|
|
return $self;
|
|
}
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Func: Header()
|
|
# Desc: Return a value from the header hash.
|
|
#
|
|
# Args: $key - Header name, case insensitive.
|
|
#
|
|
# Retn: $val - Value.
|
|
#------------------------------------------------------------------------------
|
|
|
|
sub Header
|
|
{
|
|
my $self = shift;
|
|
my ($key) = @_;
|
|
|
|
return $self->{header}->{lc $key};
|
|
}
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Func: FieldSingle()
|
|
# Desc: Return a single line field value.
|
|
#
|
|
# Args: $key - Field name.
|
|
#
|
|
# Retn: $val - Value.
|
|
#------------------------------------------------------------------------------
|
|
|
|
sub FieldSingle
|
|
{
|
|
my $self = shift;
|
|
my ($key) = @_;
|
|
|
|
return $self->{blobs_single}->{$key};
|
|
}
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Func: Parse()
|
|
# Desc: Parse a blob of text from query-pr into a structured unit for easy
|
|
# manipulation.
|
|
#
|
|
# Args: \$data - Raw data from query-pr (non-ref scalar is acceptable too).
|
|
#
|
|
# Retn: n/a
|
|
#------------------------------------------------------------------------------
|
|
|
|
sub Parse
|
|
{
|
|
my $self = shift;
|
|
my ($data) = @_;
|
|
|
|
my $ismulti = 0;
|
|
my $pastheader = 0;
|
|
|
|
# GNATS ensures that > isn't allowed as the first
|
|
# character on a line, except for field headers.
|
|
# Any lines beginning with > will be shunted into
|
|
# 'Unformatted'
|
|
my @fieldblobs = split /\n>(?!\s)/m, $$data;
|
|
|
|
# In the rare case the Unformatted field did
|
|
# have some debris, be sure to assemble it back
|
|
# into a complete section.
|
|
while ($fieldblobs[$#fieldblobs] !~ /^Unformatted:/) {
|
|
my $last = pop @fieldblobs;
|
|
exists $fieldblobs[$#fieldblobs] or last;
|
|
$fieldblobs[$#fieldblobs] .= $last;
|
|
}
|
|
|
|
foreach my $blob (@fieldblobs) {
|
|
my $key;
|
|
|
|
# Parse e-mail header; we only care about a few
|
|
# fields, not the e-mail routing stuff.
|
|
if (!$pastheader) {
|
|
foreach my $line (split /\n/, $blob) {
|
|
if ($line =~ /^(\S+):\s*(.*)$/) {
|
|
my $val = $2;
|
|
$key = lc $1;
|
|
|
|
# Ignore multiple defs (e.g. Received: headers)
|
|
exists $self->{header}->{$key}
|
|
and next;
|
|
|
|
$self->{header}->{$key} = $val;
|
|
} elsif ($line =~ /^\s+(.*)$/) {
|
|
my $val = $1;
|
|
|
|
defined $key
|
|
or next;
|
|
|
|
# No field to append to
|
|
exists $self->{header}->{$key}
|
|
or next;
|
|
|
|
$self->{header}->{$key} .= "\n$val";
|
|
}
|
|
}
|
|
|
|
$pastheader = 1;
|
|
next;
|
|
}
|
|
|
|
if ($blob =~ s/^([^:]+):(\n|\s*)//) {
|
|
$key = $1;
|
|
#$ismulti = ($2 and $2 eq "\n");
|
|
|
|
# It's multi-liners from here on in
|
|
$key eq 'Organization'
|
|
and $ismulti = 1;
|
|
} else {
|
|
# Hmm...
|
|
next;
|
|
}
|
|
|
|
# Remove leading/trailing whitespace
|
|
$blob =~ s/^[\n\s]+//;
|
|
$blob =~ s/[\n\s]+$//;
|
|
|
|
if ($ismulti) {
|
|
$self->{blobs_multi}->{$key} = $blob;
|
|
} else {
|
|
$self->{blobs_single}->{$key} = $blob;
|
|
}
|
|
}
|
|
|
|
$self->{numfields} = scalar @fieldblobs;
|
|
|
|
$self->{fromwebform} =
|
|
$self->{header}->{'x-send-pr-version'} =~ /^www-/;
|
|
|
|
$self->ParseBlobs();
|
|
}
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Func: ParseBlobs()
|
|
# Desc: Parse all the raw field "blobs" into section objects.
|
|
#
|
|
# Args: n/a
|
|
#
|
|
# Retn: n/a
|
|
#------------------------------------------------------------------------------
|
|
|
|
sub ParseBlobs
|
|
{
|
|
my $self = shift;
|
|
|
|
foreach my $field (keys %{$self->{blobs_multi}}) {
|
|
$self->{sections}->{$field} = [];
|
|
|
|
push @{$self->{sections}->{$field}},
|
|
new GnatsPR::Section::FieldStart($field);
|
|
|
|
if ($field eq 'Fix') {
|
|
$self->ParsePatches($field, \($self->{blobs_multi}->{$field}));
|
|
next;
|
|
}
|
|
|
|
if ($field eq 'Audit-Trail') {
|
|
# We'll break up the Audit-Trail field by change events.
|
|
# This is the most reliable way to split, although it's far
|
|
# from perfect. We'll then look for e-mail responses inside
|
|
# each chunk for further splitting later.
|
|
#
|
|
# Notes/Caveats:
|
|
# - If someone happened to paste an audit trail event
|
|
# inside another's "Why" field, it'd break this. I haven't
|
|
# seen this yet and don't expect to.
|
|
# - The From-To field has to come first. No reason it wouldn't
|
|
# under normal circumstances.
|
|
# - Pasted e-mails in the Why field will be promoted to
|
|
# responses, although they often break the GNATS conventions
|
|
# we (ab)use to find e-mails (e.g.: leading space on message
|
|
# body lines), which makes this more difficult.
|
|
my @auditevents =
|
|
split /(?=^(?:[A-Za-z_]+)-Changed-From-To: (?:.*?)\s*$)/m,
|
|
$self->{blobs_multi}->{$field};
|
|
|
|
foreach my $evt (@auditevents) {
|
|
my $sect = new GnatsPR::Section::StateChange;
|
|
my $gotwhat = 0;
|
|
my $gotsect = 0;
|
|
while ($evt =~ s/^([A-Za-z_]+)-Changed-([A-Za-z_-]+?): (.*?)\s*\n//) {
|
|
my ($what, $key, $val) = ($1, $2, $3);
|
|
|
|
if (!$gotwhat) {
|
|
$sect->what($what);
|
|
$gotwhat = 1;
|
|
}
|
|
|
|
$gotsect = 1;
|
|
|
|
if ($key eq 'From-To') {
|
|
my $fromto = $val;
|
|
if ($fromto =~ /^(.*)->(.*)$/) {
|
|
$sect->from($1);
|
|
$sect->to($2);
|
|
}
|
|
} elsif ($key eq 'When') {
|
|
$sect->when($val);
|
|
} elsif ($key eq 'By') {
|
|
$sect->by($val);
|
|
} elsif ($key eq 'Why') {
|
|
# This is the last one; it's a multi-line
|
|
# field (remainder of the text.)
|
|
last;
|
|
}
|
|
}
|
|
|
|
push @{$self->{sections}->{$field}}, $sect
|
|
if ($gotsect);
|
|
|
|
# Now look for blocks that appear to be e-mail replies
|
|
# Note: these header fields are the only ones we allow
|
|
# as the first header; we could feasibly back-
|
|
# track to find the start of the block (in case
|
|
# we're not there already), but the more headers
|
|
# we accept the more likely this will break on
|
|
# some unexpected content.
|
|
my $next_email = qr/^(From|To|Cc|Subject|Date): (.*)$/m;
|
|
my $gotwhy = 0;
|
|
|
|
while ($evt =~ /$next_email/) {
|
|
my $match_start = $-[0];
|
|
my ($header, $body, $indented);
|
|
my $why;
|
|
|
|
$match_start > 0
|
|
and $why = substr($evt, 0, $match_start, '');
|
|
|
|
if ($gotsect) {
|
|
# We now know where "Why" terminates
|
|
$sect->why($why) if $sect;
|
|
} elsif ($why) {
|
|
# If the first block was a date block,
|
|
# we need to use a text section for the
|
|
# intermediate text instead.
|
|
push @{$self->{sections}->{$field}},
|
|
new GnatsPR::Section::Text($why)
|
|
unless ($why =~ /^[\n\s]+$/);
|
|
}
|
|
|
|
$gotwhy = 1;
|
|
$sect = undef;
|
|
|
|
# Chop leading blank lines
|
|
$evt =~ s/^\n+//;
|
|
|
|
if ($evt =~ /^$/m) {
|
|
# First blank line signals the end of the
|
|
# e-mail header
|
|
$header = substr($evt, 0, $+[0]+1, '');
|
|
|
|
# Deciding where the body ends is more
|
|
# difficult...
|
|
|
|
# First, let's see if the message is
|
|
# indented (per GNATS standards)
|
|
$indented = ($evt =~ /^ /);
|
|
|
|
# If, so, find the next blank line, which
|
|
# signals the body end.
|
|
if ($indented) {
|
|
if ($evt =~ /^$/m) {
|
|
$body = substr($evt, 0, $-[0], '');
|
|
} else {
|
|
$body = $evt;
|
|
}
|
|
$body =~ s/^ //mg; # Remove indent char
|
|
} else {
|
|
# Look for another e-mail block
|
|
if ($evt =~ /$next_email/) {
|
|
$body = substr($evt, 0, $-[0], '');
|
|
} else {
|
|
# Otherwise, use the whole section
|
|
$body = $evt;
|
|
}
|
|
}
|
|
|
|
push @{$self->{sections}->{$field}},
|
|
new GnatsPR::Section::Email($header, $body);
|
|
} else {
|
|
# No end-of-header marker: no choice but to
|
|
# dump the (possible) e-mail into the Why field
|
|
|
|
if ($gotsect && $sect) {
|
|
$sect->why($evt);
|
|
} elsif ($evt) {
|
|
push @{$self->{sections}->{$field}},
|
|
new GnatsPR::Section::Text($evt);
|
|
}
|
|
}
|
|
}
|
|
|
|
# Check for dangling "Why" block
|
|
if (!$gotwhy) {
|
|
if ($gotsect && $sect) {
|
|
$sect->why($evt);
|
|
} elsif ($evt) {
|
|
push @{$self->{sections}->{$field}},
|
|
new GnatsPR::Section::Text($evt);
|
|
}
|
|
}
|
|
}
|
|
|
|
next;
|
|
}
|
|
|
|
# Everything else is just text
|
|
push @{$self->{sections}->{$field}},
|
|
new GnatsPR::Section::Text($self->{blobs_multi}->{$field});
|
|
}
|
|
}
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Func: ParsePatches()
|
|
# Desc: Parse the patches out of the given blob of text, emitting Patch and
|
|
# Text sections as appropriate.
|
|
#
|
|
# Args: $field - Field to push new sections to.
|
|
# \$text - Raw text
|
|
#
|
|
# Retn: n/a
|
|
#------------------------------------------------------------------------------
|
|
|
|
sub ParsePatches
|
|
{
|
|
my $self = shift;
|
|
my ($field, $text) = @_;
|
|
|
|
while (my $pi = $self->FindPatchStart($text)) {
|
|
# Everything up to this fragment can be
|
|
# promoted to a text section
|
|
push @{$self->{sections}->{$field}},
|
|
new GnatsPR::Section::Text(substr(
|
|
$$text,
|
|
0,
|
|
$pi->{start},
|
|
''
|
|
))
|
|
unless $pi->{start} == 0;
|
|
|
|
$pi->{start} = 0;
|
|
|
|
$self->FindPatchEnd($text, $pi);
|
|
|
|
# Try to determine if a web/send-pr attachment
|
|
# has another type of patch inside.
|
|
if ($pi->{type} eq 'stdattach' or $pi->{type} eq 'webattach') {
|
|
if (my $pi2 = $self->FindPatchStart($text)) {
|
|
# Upgrade to more specific type
|
|
$pi->{type} = $pi2->{type}
|
|
if ($pi2->{start} == 0);
|
|
}
|
|
}
|
|
|
|
push @{$self->{sections}->{$field}},
|
|
new GnatsPR::Section::Patch(substr(
|
|
$$text,
|
|
0,
|
|
$pi->{size},
|
|
''
|
|
), $pi->{name}, $pi->{type});
|
|
|
|
$$text =~ s/^[\n\s]+//;
|
|
}
|
|
|
|
# Rest of the field is text
|
|
push @{$self->{sections}->{$field}},
|
|
new GnatsPR::Section::Text($$text)
|
|
if ($$text);
|
|
|
|
$text = '';
|
|
}
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Func: FindPatchStart()
|
|
# Desc: Find the beginning of the first patch inside the given text blob,
|
|
# if there is one.
|
|
#
|
|
# Args: \$text - Raw text
|
|
#
|
|
# Retn: \%pi - Hash of patch info (or undef):
|
|
# - start - Start offset of patch
|
|
# - type - Type of attachment found
|
|
# - name - Filename, if available
|
|
#------------------------------------------------------------------------------
|
|
|
|
sub FindPatchStart
|
|
{
|
|
my $self = shift;
|
|
my ($text) = @_;
|
|
|
|
# Patch from web CGI script. Characteristics:
|
|
# - Only ever one of them.
|
|
# - Appended to the end of Fix:
|
|
# - Blank line after header line
|
|
# - Could contain other types of patch (e.g. shar(1) archive)
|
|
if ($$text =~ /^Patch attached with submission follows:$/m && $self->{fromwebform}) {
|
|
my $start = $+[0]; # The newline on the above
|
|
|
|
# Next non-blank line (i.e. start of patch)
|
|
if ($$text =~ /\G^./m) {
|
|
$start += $+[0]+1;
|
|
return {start => $start, type => 'webattach'};
|
|
}
|
|
|
|
return undef;
|
|
}
|
|
|
|
# Patch from send-pr(1). Characteristics:
|
|
# - Has header and footer line.
|
|
# - Appended to the end of Fix:
|
|
# - User has an opportunity to edit/mangle.
|
|
# - Could contain other types of patch (e.g. shar(1) archive)
|
|
if ($$text =~ /^---{1,8}\s?([A-Za-z0-9-_.,:%]+) (begins|starts) here\s?---+\n/mi) {
|
|
my $r = {start => $-[0], type => 'stdattach', name => $1};
|
|
|
|
# Chop header line
|
|
substr($$text, $-[0], $+[0] - $-[0], '');
|
|
|
|
return $r;
|
|
}
|
|
|
|
# Patch files from diff(1). Characteristics:
|
|
# - Easy to find start.
|
|
# - Difficult to find the end.
|
|
$$text =~ /^((?:(?:---|\*\*\*)\ (?:\S+)\s*(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\ .*)
|
|
|(?:(?:---|\*\*\*)\ (?:\S+)\s*(?:\d\d\d\d-\d\d-\d\d\ \d\d:\d\d:\d\d\.\d+)\ .*)
|
|
|(diff\ -.*?\ .*?\ .*)|(Index:\ \S+)
|
|
|(\*{3}\ \d+,\d+\ \*{4}))$/mx
|
|
and return {start => $-[0], type => 'diff'};
|
|
|
|
# Shell archive from shar(1)
|
|
$$text =~ /^# This is a shell archive\. Save it in a file, remove anything before/m
|
|
and return {start => $-[0], type => 'shar'};
|
|
|
|
# UUencoded file. Characteristics:
|
|
# - Has header and footer.
|
|
$$text =~ /^begin \d\d\d (.*)/m
|
|
and return {start => $-[0], type => 'uuencoded', name => $1};
|
|
|
|
# Base64 encoded file. Characteristics:
|
|
# - Has header and footer.
|
|
$$text =~ /^begin-base64 \d\d\d (.*)/m
|
|
and return {start => $-[0], type => 'base64', name => $1};
|
|
|
|
return undef;
|
|
}
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Func: FindPatchEnd()
|
|
# Desc: Find the end of the first patch inside the given text blob, if any.
|
|
#
|
|
# Args: \$text - Raw text
|
|
# \%pi - Patch info hash from FindPatchStart(). We'll add more data:
|
|
# - size - Length of the patch.
|
|
#
|
|
# Retn: \%pi - Same as above, except undef will be returned if no actual
|
|
# endpoint was found (size in pi would extend to the end of the
|
|
# text blob in this case.)
|
|
#------------------------------------------------------------------------------
|
|
|
|
sub FindPatchEnd
|
|
{
|
|
my $self = shift;
|
|
my ($text, $pi) = @_;
|
|
|
|
$pi->{size} = 0;
|
|
|
|
if ($pi->{type} eq 'webattach') {
|
|
$$text =~ /$/
|
|
and $pi->{size} = $+[0];
|
|
} elsif ($pi->{type} eq 'stdattach') {
|
|
$$text =~ /^---{1,8}\s?\Q$pi->{name}\E ends here\s?---+/mi
|
|
and $pi->{size} = $-[0]-1;
|
|
# Chop footer line
|
|
substr($$text, $-[0], $+[0] - $-[0], '');
|
|
} elsif ($pi->{type} eq 'diff') {
|
|
# XXX: could do better
|
|
$$text =~ /^$/m
|
|
and $pi->{size} = $-[0]-1;
|
|
} elsif ($pi->{type} eq 'shar') {
|
|
$$text =~ /^exit$/m
|
|
and $pi->{size} = $+[0];
|
|
} elsif ($pi->{type} eq 'uuencoded') {
|
|
$$text =~ /^end$/m
|
|
and $pi->{size} = $+[0];
|
|
} elsif ($pi->{type} eq 'base64') {
|
|
$$text =~ /^====$/m
|
|
and $pi->{size} = $+[0];
|
|
}
|
|
|
|
if ($pi->{size} == 0) {
|
|
$pi->{size} = length $$text;
|
|
return undef;
|
|
}
|
|
|
|
return $pi;
|
|
}
|
|
|
|
|
|
#------------------------------------------------------------------------------
|
|
# Func: GetAttachment()
|
|
# Desc: Recursively search sections for a downloadable attachment.
|
|
#
|
|
# Args: $num - Attachment index (counts from 1)
|
|
#
|
|
# Retn: $sec - Attachment section (or undef)
|
|
#------------------------------------------------------------------------------
|
|
|
|
sub GetAttachment
|
|
{
|
|
my $self = shift;
|
|
my ($num) = @_;
|
|
my $cur = 1;
|
|
|
|
my $iter = GnatsPR::SectionIterator->new($self, 'Fix', 'Audit-Trail');
|
|
|
|
while (my $item = $iter->next()) {
|
|
if (ref $item eq 'GnatsPR::Section::Patch') {
|
|
# Patch sections
|
|
return $item if ($cur++ == $num);
|
|
} elsif (ref $item eq 'GnatsPR::Section::Email') {
|
|
# Attachments from MIME messages
|
|
my $mime_iter = GnatsPR::MIMEIterator->new($item);
|
|
while (my $part = $mime_iter->next()) {
|
|
if ($part->isattachment) {
|
|
return $part if ($cur++ == $num);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return undef;
|
|
}
|
|
|
|
|
|
1;
|