#!/usr/local/bin/perl use MIME::Parser; use MIME::Base64; use MIME::QuotedPrint; # mail2url is designed to catch attachments sent to a mailing list and # publish them on a web server, instead of discarding them (as some # listservers do) or sending them along by mail to the members (as other # listservers do). # # mail2url reads a mail on standard input. If the mail is of content-type # multipart, mail2url will process it. Mail of all other content-types # will be passed right through to standard output unaltered. # # In case of a multipart mail, mail2url will save the attachments to disk, # rename them and assign a web URL to each one. The first bodypart of # content-type text/plain will be considered to be the main message of the # original mail, and will be printed to standard output, after the headers # of the original mail (modified as appropriate) and a list of URL:s to # the saved attachments. Any bodypart of type application/applefile (i.e. # the Macintosh-specific part of a "appledouble" mail, contains the # "finder fork" of the file) will be silently discarded. # # mail2url can also save a list of converted attachments to a HTML # formatted file in the web directory. Currently, the From and Subject # lines from the processed mails will be used as link text. Quotation # marks and e-mail addresses enclosed in angle brackets will be removed # from the From line. The characters "Re: " and the list name prefix will # be removed from the Subject line. Since entries are appended to the file # at the end, no closing UL or TABLE elements can be used. If those are # required by your web design, use SSI to merge the raw listing with a # properly formatted file. # # Installation: The mail server should pipe all incoming mail to the # mailinglist through mail2url before delivering the mail to the list # server. You also must have a web server serving files from the directory # where mail2url saves the attachments (configure paths below). # # By Anders Hultman, September--December 2002. # Sample code used written by Magnus Bodin and Eryq. # Released under the GNU General Public License. # See the end of file for "known issues". # ############################################################################ # Configure variables # Directory to temporarily store incoming files in during processing. # Use absolute path and no trailing slash in directory name. $tmpdir = "/var/mail2url"; # Web directory from where incoming files are published. # Use absolute path and no trailing slash in directory name. # NB! $tmpdir and $webdir must be on the same file system (partition). $webdir = "/var/www/mail2url"; # Web URL to prefix resulting file names with. NB! Trailing slash! $weburl = "http://your.server.com/mail2url/"; # File name of table of content listing in the web directory. # Leave blank for no file. $tocfile = "index.html"; # List prefix in the Subject line to remove from the content list entry. # Note the trailing space. Leave blank for no prefix removal. $listid = "[mailinglistname] "; # Phrase to print in the mail in case of successful conversion. $conv = "Attachment converted by mail2url:"; # Phrase to print in the mail in case of a failed conversion. $noconv = "Attachment NOT converted:"; # Set $clean to TRUE to empty the $tmpdir after each pass. # Set $clean to FALSE to keep the raw mail and any non-published bodyparts # (most often only the text part that went into the outgoing mail) in the # $tmpdir after each pass (useful for debugging). $clean = 1; # No more configuration. ############################################################################ # Get the headers from the original mail # into one scalar and one array while () { last if (/^$/); $headers .= $_; $attachment = 1 if (/^Content-Type:\smultipart/i); if (/^From:\s+(.+)/) { $from = rfc2047_decode_string ($1); } if (/^Subject:\s+(.+)/) { $subj = rfc2047_decode_string ($1); } } if ($attachment) { my $incoming = "$tmpdir/$^T.$$.mail"; open (FILE,">$incoming") || ($fel = "open returned $!"); print FILE "$headers\n" || ($fel = "print returned $!"); while () { print FILE $_ || ($fel = "print returned $!"); } close(FILE) || ($fel = "close returned $!"); unless ($fel) { # Parse the message my $parser = new MIME::Parser; $parser->output_dir($tmpdir); $entity = $parser->parse_open($incoming) or ($fel = "couldn't parse MIME stream"); ## Sometimes the above line sets $! to "No such file or directory" ## without really failing. I don't know why but I don't want that so ## I'll just shamelessly reset it here. $strangeerror = $!; undef($!); $body_text = ""; $dt = get_date(0); dump_entity($entity,""); } # Juggle with the content-type headers if ($body_headers) { $headers =~ s/^(Content-.+)$/X-Original-Pt0-$1/img; $body_headers =~ s/^(Content-Transfer-Encoding:)(.*)$/$1 8bit\nX-Original-Pt1-$1$2/im; } # Print the mail print $headers; print "X-Mail2url: 3.2\n"; foreach (@ori_attach) { print "X-Attachment$_\n" if ($_); } print $body_headers if ($body_headers); print "X-Strange-Error: $strangeerror\n" if ($strangeerror); print "\n"; if ((@ori_attach) && (! $fel)) { print "$conv\n"; foreach (@new_attach) { print " $weburl$_ \n" if ($_); } print "=" x 76, "\n\n"; } $fel = "error: $!" if (($!) && (! $fel)); if ($fel) { print "$noconv\n"; print " $fel\n"; print "=" x 76, "\n\n"; } print "$body_text\n"; unlink <$tmpdir/*> if ($clean); } else { print "$headers\n"; print while (); } exit(0); ############################################################################ sub dump_entity { my ($entity,$nr) = @_; my $IO; # Output the body: my @parts = $entity->parts; if (@parts) { # multipart... my $i; foreach $i (0 .. $#parts) { # dump each part... dump_entity($parts[$i], $nr.$i); } } else { # single part... # Get MIME type, and display accordingly... my ($type, $subtype) = split('/', $entity->head->mime_type); my $body = $entity->bodyhandle; if (($type =~ /^(text|message)$/) && ($body_text eq "")) { # save first text part $body_headers = $entity->head->original_text; if ($IO = $body->open("r")) { $body_text .= $_ while (defined($_ = $IO->getline)); $IO->close; } else { # d'oh! $body_text = "$0: couldn't find/open $nr: $!"; $fel = $body_text; } } elsif ($subtype !~ /^applefile$/) { # process all other parts except for ugly "application/applefile" my $path = $body->path; my $size = ($path ? (-s $path) : '???'); my $ext; $path =~ m/\/([^\/]+)$/; my $file = $1; $ori_attach[$nr+0] = "-$nr: $file ($type/$subtype $size bytes)"; if ($file =~ m/.+(\.\w+)$/) { $ext = $1; } $ext = ".".$subtype unless ($ext); $new_attach[$nr+0] = $dt.$nr.$ext; rename($path,"$webdir/$new_attach[$nr+0]") || ($fel = "rename returned $!"); chmod(0666,"$webdir/$new_attach[$nr+0]") || ($fel = "chmod returned $!"); if ($tocfile) { open (TOC,">>$webdir/$tocfile"); $from =~ s/<.+>//; $from =~ s/"//g; $from =~ s/\s+$//; $subj =~ s/Re: //; $subj =~ s/^\Q$listid\E// if ($listid); print TOC '', "$from: $subj
\n"; close(TOC); } } } 1; } ############################################################################ sub get_date ($) { my ($offset) = @_; $offset += 0; my @time = localtime(time + $offset); return sprintf("%4d%02d%02d-%02d%02d%02d-", $time[5]+1900, $time[4]+1, $time[3], $time[2], $time[1], $time[0]); } ###################################################### sub rfc2047_decode_encoded_word ($) { my ($charset, $encoding, $text) = @_; $encoding = lc $encoding; if ($encoding eq 'q') { $text = decode_qp_in_header($text); } elsif ($encoding eq 'b') { $text = decode_base64($text); } return $text; } sub rfc2047_decode_string ($) { my ($line) = @_; my ($c, $e, $t); $line =~ s/^\s+//; $line =~ s{=\?([^\?]+)\?(\w)\?([^\?]*)\?=} {$c=$1;$e=$2;$t=$3;&rfc2047_decode_encoded_word($c, $e, $t)}gei; # In a more extensive version, we might want to save ALL charsets mentioned above # {push(@{$c},$1);push(@{$e},$2);$t=$3;&rfc2047_decode_encoded_word($1, $2, $t)}gei; return ($c,$e,$line); } sub decode_qp_in_header ($) { my ($line) = @_; $line =~ s/=([0-9a-f][0-9a-f])/chr(hex($1))/ige; $line =~ s/_/ /g; return $line; } ############################################################################ # Known issues: # # # Naming conflicts with nested multiparts: If a mail has more than ten # bodyparts, and any of them is in turn a multipart with more than # ten bodyparts, a naming conflict can occur which makes some of the # bodyparts not to be published. Any unpublished bodypart will be left # in the $tmpdir directory (if $clean is set to FALSE). # # All kinds of multipart messages is considered to be mails with # attachments, not only multipart/mixed but also multipart/alternative # which may or may not be what you want. # # Filetype is determined by suggested filename extension. If missing, # the subtype of the content type will be used. This will make some # attachments being published with the file extensions ".plain" and # ".octet-stream" which may confuse the web server, the web clients or # the members of the mailing list. Future versions of mail2url may use # the magic number or some other technique to determine file type if # the first tests should fail. # # If a mail has an "inline" attachment, i.e. a text part, the attachment # and a second text part that is also meant to be a part of the main # message, the second part will be saved as a attachment too, since only # the first text part will be used in the resulting mail. # # The system needs double the size of the raw mail available on the # file system partition to successfully convert the attachments. # # END.