#!/usr/local/bin/perl
use MIME::Parser;
use MIME::Base64;
use MIME::QuotedPrint;

# mail2url is designed to catch attachments sent to a mailing list and
# publish them on a web server, instead of discarding them (as some
# listservers do) or sending them along by mail to the members (as other
# listservers do).
#
# mail2url reads a mail on standard input. If the mail is of content-type
# multipart, mail2url will process it. Mail of all other content-types
# will be passed right through to standard output unaltered.
#
# In case of a multipart mail, mail2url will save the attachments to disk,
# rename them and assign a web URL to each one. The first bodypart of
# content-type text/plain will be considered to be the main message of the
# original mail, and will be printed to standard output, after the headers
# of the original mail (modified as appropriate) and a list of URL:s to
# the saved attachments. Any bodypart of type application/applefile (i.e.
# the Macintosh-specific part of a "appledouble" mail, contains the
# "finder fork" of the file) will be silently discarded.
#
# mail2url can also save a list of converted attachments to a HTML
# formatted file in the web directory. Currently, the From and Subject
# lines from the processed mails will be used as link text. Quotation
# marks and e-mail addresses enclosed in angle brackets will be removed
# from the From line. The characters "Re: " and the list name prefix will
# be removed from the Subject line. Since entries are appended to the file
# at the end, no closing UL or TABLE elements can be used. If those are
# required by your web design, use SSI to merge the raw listing with a
# properly formatted file.
#
# Installation: The mail server should pipe all incoming mail to the
# mailinglist through mail2url before delivering the mail to the list
# server. You also must have a web server serving files from the directory
# where mail2url saves the attachments (configure paths below).
#
# By Anders Hultman, September 2002--July 2003.
# Sample code used written by Magnus Bodin and Eryq.
# Released under the GNU General Public License.
# See the end of file for "known issues".
#
############################################################################
# Configure variables

# Directory to temporarily store incoming files in during processing.
# Use absolute path and no trailing slash in directory name.
  $tmpdir = "/var/mail2url";

# Web directory from where incoming files are published.
# Use absolute path and no trailing slash in directory name.
# NB! $tmpdir and $webdir must be on the same file system (partition).
  $webdir = "/var/www/mail2url";

# Web URL to prefix resulting file names with. NB! Trailing slash!
  $weburl = "http://your.server.com/mail2url/"; 

# File name of table of content listing in the web directory. 
# Leave blank for no file.
  $tocfile = "index.html";

# List prefix in the Subject line to remove from the content list entry.
# Note the trailing space. Leave blank for no prefix removal.
  $listid  = "[mailinglistname] ";

# Phrase to print in the mail in case of successful conversion.
  $conv = "Attachment converted by mail2url:";

# Phrase to print in the mail in case of a failed conversion.
  $noconv = "Attachment NOT converted:";

# Set $clean to TRUE to empty the $tmpdir after each pass.
# Set $clean to FALSE to keep the raw mail and any non-published bodyparts
# (most often only the text part that went into the outgoing mail) in the
# $tmpdir after each pass (useful for debugging).
  $clean = 1;

# No more configuration.
############################################################################

# Get the headers from the original mail
# into one scalar and one array
while (<STDIN>) {
  last if (/^$/);
  $headers .= $_;
  $attachment = 1 if (/^Content-Type:\smultipart/i);
  if (/^From:\s+(.+)/)    { $from = rfc2047_decode_string ($1); }
  if (/^Subject:\s+(.+)/) { $subj = rfc2047_decode_string ($1); }
}

if ($attachment) {
  my $incoming = "$tmpdir/$^T.$$.mail";        
  open (FILE,">$incoming") || ($fel = "open returned $!");
  print FILE "$headers\n" || ($fel = "print returned $!");
  while (<STDIN>) {
    print FILE $_ || ($fel = "print returned $!");
  }
  close(FILE) || ($fel = "close returned $!");

  unless ($fel) {
    # Parse the message
    my $parser = new MIME::Parser;
  
    $parser->output_dir($tmpdir);
    $entity = $parser->parse_open($incoming) or
       ($fel = "couldn't parse MIME stream");
    ## Sometimes the above line sets $! to "No such file or directory"
    ## without really failing. I don't know why but I don't want that so
    ## I'll just shamelessly reset it here.
    $strangeerror = $!; undef($!);
    $body_text = "";
    ($dt,$pd) = get_date(0);
    dump_entity($entity,"");
  }

  # Juggle with the content-type headers
  $headers      =~ s/^(Content-.+)$/X-Outer-$1/img;
  $body_headers =~ s/^(Content-.+)$/X-Inner-$1/img;
  $body_headers .= "Content-Type: text/plain; charset=iso-8859-1\n".
                   "Content-Transfer-Encoding: 8bit\n";

  # Print the mail
  print $headers;
  print "X-Mail2url: 3.2.3\n";
  foreach (@ori_attach) {
    print "X-Attachment$_\n" if ($_); 
  }
  print $body_headers;
  print "X-Strange-Error: $strangeerror\n" if ($strangeerror);
  print "\n";
  if ((@ori_attach) && (! $fel)) {
    print "$conv\n";
    foreach (@new_attach) {
      print "  $weburl$_  \n" if ($_); 
    }
    print "=" x 76, "\n\n";
  }

  $fel = "error: $!" if (($!) && (! $fel));
  if ($fel) {
    print "$noconv\n";
    print "  $fel\n";
    print "=" x 76, "\n\n";
  }
  print "$body_text\n";
  unlink <$tmpdir/*> if ($clean);
} else {
  print "$headers\n";
  print while (<STDIN>);
}     
exit(0);

############################################################################

sub dump_entity {
  my ($entity,$nr) = @_;
  my $IO;

  # Output the body:
  my @parts = $entity->parts;
  if (@parts) {                     # multipart...
    my $i;
    foreach $i (0 .. $#parts) {       # dump each part...
      dump_entity($parts[$i], $nr.$i);
    }
  }
  else {                            # single part...	
    # Get MIME type, and display accordingly...
    my ($type, $subtype) = split('/', $entity->head->mime_type);
    my $body = $entity->bodyhandle;
    if (($type =~ /^(text|message)$/) && ($body_text eq "")) {
      # save first text part 
      $body_headers = $entity->head->original_text;
      if ($IO = $body->open("r")) {
        $body_text .= $_ while (defined($_ = $IO->getline));
        $IO->close;
      }
      else {       # d'oh!
        $body_text = "$0: couldn't find/open $nr: $!";
        $fel = $body_text;
      }
    }
    elsif ($subtype !~ /^applefile$/) { 
      # process all other parts except for ugly "application/applefile"
      my $path = $body->path;
      my $size = ($path ? (-s $path) : '???');
      my $ext;
      $path =~ m/\/([^\/]+)$/;
      my $file = $1;
      $ori_attach[$nr+0] = "-$nr: $file ($type/$subtype $size bytes)";
      if ($file =~ m/.+(\.\w+)$/) { $ext = $1; }
      $ext = ".".$subtype unless ($ext);
      $new_attach[$nr+0] = $dt.$nr.$ext;
      rename($path,"$webdir/$new_attach[$nr+0]") ||
            ($fel = "rename returned $!");
      chmod(0666,"$webdir/$new_attach[$nr+0]") ||
            ($fel = "chmod returned $!");
      $from =~ s/<.+>//;  $from =~ s/"//g; $from =~ s/\s+$//;
      $subj =~ s/^Re: //; $subj =~ s/^\Q$listid\E//;
      open (TOC,">>$webdir/$tocfile");
      print TOC "<tt>$pd</tt> ",
                '<a href="',$new_attach[$nr+0],'">',
                "$from: $subj</a><!-- $file $type/$subtype ",
                "$size bytes --><br>\n";
      close(TOC);
    }
  }
  1;
}

############################################################################

sub get_date ($)
{
    my ($offset) = @_;
    $offset += 0;
    my @time = localtime(time + $offset);
    return( sprintf("%4d%02d%02d-%02d%02d%02d-",
            $time[5]+1900, $time[4]+1, $time[3], 
            $time[2], $time[1], $time[0]),
            sprintf("%4d-%02d-%02d %02d:%02d",
            $time[5]+1900, $time[4]+1, $time[3], 
            $time[2], $time[1])
          );
}

######################################################

sub rfc2047_decode_encoded_word ($)
{
        my ($charset, $encoding, $text) = @_;
        $encoding = lc $encoding; 
        if ($encoding eq 'q')
        {
                $text = decode_qp_in_header($text);
        }
        elsif ($encoding eq 'b')
        {
                $text = decode_base64($text);
        }

        return $text;
}

sub rfc2047_decode_string ($)
{
        my ($line) = @_;
        my ($c, $e, $t);
        $line =~ s/^\s+//;
        $line =~ s{=\?([^\?]+)\?(\w)\?([^\?]*)\?=}
        {$c=$1;$e=$2;$t=$3;&rfc2047_decode_encoded_word($c, $e, $t)}gei;
        # In a more extensive version, we might want to save ALL charsets mentioned above
        # {push(@{$c},$1);push(@{$e},$2);$t=$3;&rfc2047_decode_encoded_word($1, $2, $t)}gei;
        return ($c,$e,$line);
}

sub decode_qp_in_header ($)
{
        my ($line) = @_;
        $line =~ s/=([0-9a-f][0-9a-f])/chr(hex($1))/ige;
        $line =~ s/_/ /g;
        return $line;
}

############################################################################
# Known issues: 
#
# # Naming conflicts with nested multiparts: If a mail has more than ten
#   bodyparts, and any of them is in turn a multipart with more than
#   ten bodyparts, a naming conflict can occur which makes some of the
#   bodyparts not to be published. Any unpublished bodypart will be left
#   in the $tmpdir directory (if $clean is set to FALSE).
# # All kinds of multipart messages is considered to be mails with
#   attachments, not only multipart/mixed but also multipart/alternative
#   which may or may not be what you want.
# # Filetype is determined by suggested filename extension. If missing,
#   the subtype of the content type will be used. This will make some
#   attachments being published with the file extensions ".plain" and
#   ".octet-stream" which may confuse the web server, the web clients or
#   the members of the mailing list. Future versions of mail2url may use
#   the magic number or some other technique to determine file type if 
#   the first tests should fail.
# # If a mail has an "inline" attachment, i.e. a text part, the attachment
#   and a second text part that is also meant to be a part of the main
#   message, the second part will be saved as a attachment too, since only
#   the first text part will be used in the resulting mail.
# # The system needs double the size of the raw mail available on the
#   file system partition to successfully convert the attachments.
#
# END.