#!/usr/bin/perl -Tw #------------------------------------------------------------------------------ # Copyright (C) 2011, Shaun Amott # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ #------------------------------------------------------------------------------ package GnatsPR; #use MIME::Base64; # ports/converters/p5-MIME-Base64 #use MIME::QuotedPrint; # #use Convert::UU qw(uudecode uuencode); # ports/converters/p5-Convert-UU use GnatsPR::Section::Text; use GnatsPR::Section::Patch; use GnatsPR::Section::Email; use GnatsPR::Section::StateChange; use GnatsPR::Section::FieldStart; use GnatsPR::SectionIterator; use strict; require 5.006; #------------------------------------------------------------------------------ # Constants #------------------------------------------------------------------------------ use constant ENCODING_BASE64 => 1; use constant ENCODING_QP => 2; use constant PATCH_ANY => 0x0001; use constant PATCH_DIFF => 0x0002; use constant PATCH_UUENC => 0x0004; use constant PATCH_UUENC_BIN => 0x0008; use constant PATCH_SHAR => 0x0010; use constant PATCH_BASE64 => 0x0020; #------------------------------------------------------------------------------ # Func: new() # Desc: Constructor - Parses data if provided. # # Args: [data] - Raw data (or ref. to) from query-pr (optional) # # Retn: $self #------------------------------------------------------------------------------ sub new { my $class = shift; my ($data) = @_; my $self = { blobs_single => {}, # Raw text, single-line fields blobs_multi => {}, # Raw text, multi-line fields header => {}, # E-mail header bits sections => {}, # Hash of arrayrefs of sections fromwebform => 0, # PR came from the web form? numfields => 0 # Number of fields we have }; bless $self, $class; if (defined $data) { ref $data ? $self->Parse($data) : $self->Parse(\$data); } return $self; } #------------------------------------------------------------------------------ # Func: Header() # Desc: Return a value from the header hash. # # Args: $key - Header name, case insensitive. # # Retn: $val - Value. #------------------------------------------------------------------------------ sub Header { my $self = shift; my ($key) = @_; return $self->{header}->{lc $key}; } #------------------------------------------------------------------------------ # Func: FieldSingle() # Desc: Return a single line field value. # # Args: $key - Field name. # # Retn: $val - Value. #------------------------------------------------------------------------------ sub FieldSingle { my $self = shift; my ($key) = @_; return $self->{blobs_single}->{$key}; } #------------------------------------------------------------------------------ # Func: Parse() # Desc: Parse a blob of text from query-pr into a structured unit for easy # manipulation. # # Args: \$data - Raw data from query-pr (non-ref scalar is acceptable too). # # Retn: n/a #------------------------------------------------------------------------------ sub Parse { my $self = shift; my ($data) = @_; my $ismulti = 0; my $pastheader = 0; # GNATS ensures that > isn't allowed as the first # character on a line, except for field headers. # Any lines beginning with > will be shunted into # 'Unformatted' my @fieldblobs = split /\n>(?!\s)/m, $$data; # In the rare case the Unformatted field did # have some debris, be sure to assemble it back # into a complete section. while ($fieldblobs[$#fieldblobs] !~ /^Unformatted:/) { my $last = pop @fieldblobs; exists $fieldblobs[$#fieldblobs] or last; $fieldblobs[$#fieldblobs] .= $last; } foreach my $blob (@fieldblobs) { my $key; # Parse e-mail header; we only care about a few # fields, not the e-mail routing stuff. if (!$pastheader) { foreach my $line (split /\n/, $blob) { if ($line =~ /^(\S+):\s*(.*)$/) { my $val = $2; $key = lc $1; # Ignore multiple defs (e.g. Received: headers) exists $self->{header}->{$key} and next; $self->{header}->{$key} = $val; } elsif ($line =~ /^\s+(.*)$/) { my $val = $1; defined $key or next; # No field to append to exists $self->{header}->{$key} or next; $self->{header}->{$key} .= "\n$val"; } } $pastheader = 1; next; } if ($blob =~ s/^([^:]+):(\n|\s*)//) { $key = $1; #$ismulti = ($2 and $2 eq "\n"); # It's multi-liners from here on in $key eq 'Organization' and $ismulti = 1; } else { # Hmm... next; } # Remove leading/trailing whitespace $blob =~ s/^[\n\s]+//; $blob =~ s/[\n\s]+$//; if ($ismulti) { $self->{blobs_multi}->{$key} = $blob; } else { $self->{blobs_single}->{$key} = $blob; } } $self->{numfields} = scalar @fieldblobs; $self->{fromwebform} = $self->{header}->{'x-send-pr-version'} =~ /^www-/; $self->ParseBlobs(); } #------------------------------------------------------------------------------ # Func: ParseBlobs() # Desc: Parse all the raw field "blobs" into section objects. # # Args: n/a # # Retn: n/a #------------------------------------------------------------------------------ sub ParseBlobs { my $self = shift; foreach my $field (keys %{$self->{blobs_multi}}) { $self->{sections}->{$field} = []; push @{$self->{sections}->{$field}}, new GnatsPR::Section::FieldStart($field); if ($field eq 'Fix') { $self->ParsePatches($field, \($self->{blobs_multi}->{$field})); next; } if ($field eq 'Audit-Trail') { # We'll break up the Audit-Trail field by change events. # This is the most reliable way to split, although it's far # from perfect. We'll then look for e-mail responses inside # each chunk for further splitting later. # # Notes/Caveats: # - If someone happened to paste an audit trail event # inside another's "Why" field, it'd break this. I haven't # seen this yet and don't expect to. # - The From-To field has to come first. No reason it wouldn't # under normal circumstances. # - Pasted e-mails in the Why field will be promoted to # responses, although they often break the GNATS conventions # we (ab)use to find e-mails (e.g.: leading space on message # body lines), which makes this more difficult. my @auditevents = split /(?=^(?:[A-Za-z_]+)-Changed-From-To: (?:.*?)\s*$)/m, $self->{blobs_multi}->{$field}; foreach my $evt (@auditevents) { my $sect = new GnatsPR::Section::StateChange; my $gotwhat = 0; my $gotsect = 0; while ($evt =~ s/^([A-Za-z_]+)-Changed-([A-Za-z_-]+?): (.*?)\s*\n//) { my ($what, $key, $val) = ($1, $2, $3); if (!$gotwhat) { $sect->what($what); $gotwhat = 1; } $gotsect = 1; if ($key eq 'From-To') { my $fromto = $val; if ($fromto =~ /^(.*)->(.*)$/) { $sect->from($1); $sect->to($2); } } elsif ($key eq 'When') { $sect->when($val); } elsif ($key eq 'By') { $sect->by($val); } elsif ($key eq 'Why') { # This is the last one; it's a multi-line # field (remainder of the text.) last; } } push @{$self->{sections}->{$field}}, $sect if ($gotsect); # Now look for blocks that appear to be e-mail replies # Note: these header fields are the only ones we allow # as the first header; we could feasibly back- # track to find the start of the block (in case # we're not there already), but the more headers # we accept the more likely this will break on # some unexpected content. my $next_email = qr/^(From|To|Cc|Subject|Date): (.*)$/m; my $gotwhy = 0; while ($evt =~ /$next_email/) { my $match_start = $-[0]; my ($header, $body, $indented); my $why; $match_start > 0 and $why = substr($evt, 0, $match_start, ''); if ($gotsect) { # We now know where "Why" terminates $sect->why($why) if $sect; } elsif ($why) { # If the first block was a date block, # we need to use a text section for the # intermediate text instead. push @{$self->{sections}->{$field}}, new GnatsPR::Section::Text($why) unless ($why =~ /^[\n\s]+$/); } $gotwhy = 1; $sect = undef; # Chop leading blank lines $evt =~ s/^\n+//; if ($evt =~ /^$/m) { # First blank line signals the end of the # e-mail header $header = substr($evt, 0, $+[0]+1, ''); # Deciding where the body ends is more # difficult... # First, let's see if the message is # indented (per GNATS standards) $indented = ($evt =~ /^ /); # If, so, find the next blank line, which # signals the body end. if ($indented) { if ($evt =~ /^$/m) { $body = substr($evt, 0, $-[0], ''); } else { $body = $evt; } $body =~ s/^ //mg; # Remove indent char } else { # Look for another e-mail block if ($evt =~ /$next_email/) { $body = substr($evt, 0, $-[0], ''); } else { # Otherwise, use the whole section $body = $evt; } } push @{$self->{sections}->{$field}}, new GnatsPR::Section::Email($header, $body); } else { # No end-of-header marker: no choice but to # dump the (possible) e-mail into the Why field if ($gotsect && $sect) { $sect->why($evt); } elsif ($evt) { push @{$self->{sections}->{$field}}, new GnatsPR::Section::Text($evt); } } } # Check for dangling "Why" block if (!$gotwhy) { if ($gotsect && $sect) { $sect->why($evt); } elsif ($evt) { push @{$self->{sections}->{$field}}, new GnatsPR::Section::Text($evt); } } } next; } # Everything else is just text push @{$self->{sections}->{$field}}, new GnatsPR::Section::Text($self->{blobs_multi}->{$field}); } } #------------------------------------------------------------------------------ # Func: ParsePatches() # Desc: Parse the patches out of the given blob of text, emitting Patch and # Text sections as appropriate. # # Args: $field - Field to push new sections to. # \$text - Raw text # # Retn: n/a #------------------------------------------------------------------------------ sub ParsePatches { my $self = shift; my ($field, $text) = @_; while (my $pi = $self->FindPatchStart($text)) { # Everything up to this fragment can be # promoted to a text section push @{$self->{sections}->{$field}}, new GnatsPR::Section::Text(substr( $$text, 0, $pi->{start}, '' )) unless $pi->{start} == 0; $pi->{start} = 0; $self->FindPatchEnd($text, $pi); # Try to determine if a web/send-pr attachment # has another type of patch inside. if ($pi->{type} eq 'stdattach' or $pi->{type} eq 'webattach') { if (my $pi2 = $self->FindPatchStart($text)) { # Upgrade to more specific type $pi->{type} = $pi2->{type} if ($pi2->{start} == 0); } } push @{$self->{sections}->{$field}}, new GnatsPR::Section::Patch(substr( $$text, 0, $pi->{size}, '' ), $pi->{name}, $pi->{type}); $$text =~ s/^[\n\s]+//; } # Rest of the field is text push @{$self->{sections}->{$field}}, new GnatsPR::Section::Text($$text) if ($$text); $text = ''; } #------------------------------------------------------------------------------ # Func: FindPatchStart() # Desc: Find the beginning of the first patch inside the given text blob, # if there is one. # # Args: \$text - Raw text # # Retn: \%pi - Hash of patch info (or undef): # - start - Start offset of patch # - type - Type of attachment found # - name - Filename, if available #------------------------------------------------------------------------------ sub FindPatchStart { my $self = shift; my ($text) = @_; # Patch from web CGI script. Characteristics: # - Only ever one of them. # - Appended to the end of Fix: # - Blank line after header line # - Could contain other types of patch (e.g. shar(1) archive) if ($$text =~ /^Patch attached with submission follows:$/m && $self->{fromwebform}) { my $start = $+[0]; # The newline on the above # Next non-blank line (i.e. start of patch) if ($$text =~ /\G^./m) { $start += $+[0]+1; return {start => $start, type => 'webattach'}; } return undef; } # Patch from send-pr(1). Characteristics: # - Has header and footer line. # - Appended to the end of Fix: # - User has an opportunity to edit/mangle. # - Could contain other types of patch (e.g. shar(1) archive) if ($$text =~ /^---{1,8}\s?([A-Za-z0-9-_.,:%]+) (begins|starts) here\s?---+\n/mi) { my $r = {start => $-[0], type => 'stdattach', name => $1}; # Chop header line substr($$text, $-[0], $+[0] - $-[0], ''); return $r; } # Patch files from diff(1). Characteristics: # - Easy to find start. # - Difficult to find the end. $$text =~ /^((?:(?:---|\*\*\*)\ (?:\S+)\s*(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\ .*) |(?:(?:---|\*\*\*)\ (?:\S+)\s*(?:\d\d\d\d-\d\d-\d\d\ \d\d:\d\d:\d\d\.\d+)\ .*) |(diff\ -.*?\ .*?\ .*)|(Index:\ \S+) |(\*{3}\ \d+,\d+\ \*{4}))$/mx and return {start => $-[0], type => 'diff'}; # Shell archive from shar(1) $$text =~ /^# This is a shell archive\. Save it in a file, remove anything before/m and return {start => $-[0], type => 'shar'}; # UUencoded file. Characteristics: # - Has header and footer. $$text =~ /^begin \d\d\d (.*)/m and return {start => $-[0], type => 'uuencoded', name => $1}; # Base64 encoded file. Characteristics: # - Has header and footer. $$text =~ /^begin-base64 \d\d\d (.*)/m and return {start => $-[0], type => 'base64', name => $1}; return undef; } #------------------------------------------------------------------------------ # Func: FindPatchEnd() # Desc: Find the end of the first patch inside the given text blob, if any. # # Args: \$text - Raw text # \%pi - Patch info hash from FindPatchStart(). We'll add more data: # - size - Length of the patch. # # Retn: \%pi - Same as above, except undef will be returned if no actual # endpoint was found (size in pi would extend to the end of the # text blob in this case.) #------------------------------------------------------------------------------ sub FindPatchEnd { my $self = shift; my ($text, $pi) = @_; $pi->{size} = 0; if ($pi->{type} eq 'webattach') { $$text =~ /$/ and $pi->{size} = $+[0]; } elsif ($pi->{type} eq 'stdattach') { $$text =~ /^---{1,8}\s?\Q$pi->{name}\E ends here\s?---+/mi and $pi->{size} = $-[0]-1; # Chop footer line substr($$text, $-[0], $+[0] - $-[0], ''); } elsif ($pi->{type} eq 'diff') { # XXX: could do better $$text =~ /^$/m and $pi->{size} = $-[0]-1; } elsif ($pi->{type} eq 'shar') { $$text =~ /^exit$/m and $pi->{size} = $+[0]; } elsif ($pi->{type} eq 'uuencoded') { $$text =~ /^end$/m and $pi->{size} = $+[0]; } elsif ($pi->{type} eq 'base64') { $$text =~ /^====$/m and $pi->{size} = $+[0]; } if ($pi->{size} == 0) { $pi->{size} = length $$text; return undef; } return $pi; } #------------------------------------------------------------------------------ # Func: GetAttachment() # Desc: Recursively search sections for a downloadable attachment. # # Args: $num - Attachment index (counts from 1) # # Retn: $sec - Attachment section (or undef) #------------------------------------------------------------------------------ sub GetAttachment { my $self = shift; my ($num) = @_; my $cur = 1; my $iter = GnatsPR::SectionIterator->new($self, 'Fix', 'Audit-Trail'); while (my $item = $iter->next()) { if (ref $item eq 'GnatsPR::Section::Patch') { # Patch sections return $item if ($cur++ == $num); } elsif (ref $item eq 'GnatsPR::Section::Email') { # Attachments from MIME messages my $mime_iter = GnatsPR::MIMEIterator->new($item); while (my $part = $mime_iter->next()) { if ($part->isattachment) { return $part if ($cur++ == $num); } } } } return undef; } 1;