#!/usr/local/bin/perl use MIME::Parser; use MIME::Base64; use MIME::QuotedPrint; # mail2url is designed to catch attachments sent to a mailing list and # publish them on a web server, instead of discarding them (as some # listservers do) or sending them along by mail to the members (as other # listservers do). # # mail2url reads a mail on standard input. If the mail is of content-type # multipart, mail2url will process it. Mail of all other content-types # will be passed right through to standard output unaltered. # # In case of a multipart mail, mail2url will save the attachments to disk, # rename them and assign a web URL to each one. The first bodypart of # content-type text/plain or message/* will be considered to be the main # message of the original mail, and will be printed to standard output, # after the headers of the original mail (modified as appropriate) and a # list of URL:s to the saved attachments. # # In case of a multipart/alternative with both a text/plain part and a # text/html part, the administrator has the option to either discard the # html or to save it in a file as any other attachment. All other cases # of text/html bodyparts will be published as regular attachments # (inluding mails with text/html only). # # Any bodypart of type application/applefile (i.e. the Macintosh-specific # part of a "appledouble" mail, contains the "finder fork" of the file) # will be silently discarded. # # mail2url can also save a list of converted attachments to a HTML # formatted file in the web directory. The "From" and "Subject" # lines from the processed mails will be used as link text. Quotation # marks and e-mail addresses enclosed in angle brackets will be removed # from the "From" line. The characters "Re: " and the list name prefix # will be removed from the "Subject" line. Since entries are appended to # the file at the end, no closing UL or TABLE elements can be used. If # those are required by your web design, use SSI to merge the raw listing # with a properly formatted file. # # Installation: The mail server should pipe all incoming mail to the # mailinglist through mail2url before delivering the mail to the list # server. An example .qmail file for ezmlm would look like this: # |/usr/local/bin/mail2url | /usr/local/bin/ezmlm-send '/path/list' # You also must have a web server serving files from the directory # where mail2url saves the attachments (configure paths below). # # By Anders Hultman, September 2002--August 2005. # Sample code used written by Magnus Bodin and Eryq. # Released under the GNU General Public License. # See the end of file for "known issues". # ############################################################################ # Configure variables # Directory to temporarily store incoming files in during processing. # Use absolute path and no trailing slash in directory name. $tmpdir = "/var/mail2url"; # Web directory from where incoming files are published. # Use absolute path and no trailing slash in directory name. # NB! $tmpdir and $webdir must be on the same file system (partition). $webdir = "/var/www/mail2url"; # Web URL to prefix resulting file names with. NB! Trailing slash! $weburl = "http://your.server.com/mail2url/"; # File name of table of content listing in the web directory. # Leave blank for no file. $tocfile = "index.html"; # List prefix in the Subject line to remove from the content list entry. # Note the trailing space. Leave blank for no prefix removal. $listid = "[mailinglistname] "; # Phrase to print in the mail in case of successful conversion. $conv = "Attachment converted by mail2url:"; # Phrase to print in the mail in case of a failed conversion. $noconv = "Attachment NOT converted:"; # Set $clean to 1 to empty the $tmpdir after each pass. # Set $clean to 0 to keep the raw mail and any non-published bodyparts # (most often only the text part that went into the outgoing mail) in the # $tmpdir after each pass (useful for debugging). $clean = 1; # Set $save_html to 1 to save the HTML part of a multipart/alternative # mail just like any other attachment # Set $save_html to 0 to discard the HTML part of a multipart/alternative $save_html = 0; # No more configuration. ############################################################################ # Get the headers from the original mail # into one scalar and one array while () { last if (/^$/); $headers .= $_; $attachment = 1 if (/^Content-Type:\smultipart/i); $alternative = 1 if (/^Content-Type:\smultipart\/alternative/i); if (/^From:\s+(.+)/) { $from = rfc2047_decode_string ($1); } if (/^Subject:\s+(.+)/) { $subj = rfc2047_decode_string ($1); } } if ($attachment) { my $incoming = "$tmpdir/$^T.$$.mail"; open (FILE,">$incoming") || ($fel = "open returned $!"); print FILE "$headers\n" || ($fel = "print returned $!"); while () { print FILE $_ || ($fel = "print returned $!"); } close(FILE) || ($fel = "close returned $!"); # Arrange parameters for the file listing page $from =~ s/"//g; $from =~ s/\s+$//; $ori_from = $from; $from =~ s/<.+>//; $ori_from =~ s///; $from = $ori_from unless ($from); $subj =~ s/^Re: //; $subj =~ s/^\Q$listid\E//; $subj =~ s/\{\Q$pixels\E\}\s*// if ($pixels); unless ($fel) { # Parse the message my $parser = new MIME::Parser; $parser->output_dir($tmpdir); $entity = $parser->parse_open($incoming) or ($fel = "couldn't parse MIME stream"); ## Sometimes the above line sets $! to "No such file or directory" ## without really failing. I don't know why but I don't want that so ## I'll just shamelessly reset it here. $strangeerror = $!; undef($!); $body_text = ""; ($dt,$pd) = get_date(0); dump_entity($entity,""); } # Juggle with the content-type headers $headers =~ s/^(Content-.+)$/X-Outer-$1/img; $body_headers =~ s/^(Content-.+)$/X-Inner-$1/img; if ($body_headers =~ /utf\-8/im) { $body_headers .= "Content-Type: text/plain; charset=utf-8\n". "Content-Transfer-Encoding: 8bit\n"; } else { $body_headers .= "Content-Type: text/plain; charset=iso-8859-1\n". "Content-Transfer-Encoding: 8bit\n"; } # Print the mail print $headers; print "X-Mail2url: 3.3\n"; foreach (@ori_attach) { print "X-Attachment$_\n" if ($_); } print $body_headers if ($body_headers); print "X-Strange-Error: $strangeerror\n" if ($strangeerror); print "\n"; if ((@ori_attach) && (! $fel)) { print "$conv\n"; foreach (@new_attach) { print " $weburl$_ \n" if ($_); } print "-" x 76, "\n\n"; } $fel = "error: $!" if (($!) && (! $fel)); if ($fel) { print "$noconv\n"; print " $fel\n"; print "-" x 76, "\n\n"; } print "$body_text\n"; unlink <$tmpdir/*> if ($clean); } else { print "$headers\n"; print while (); } exit(0); ############################################################################ sub dump_entity { my ($entity,$nr) = @_; my $IO; # Output the body: my @parts = $entity->parts; if (@parts) { # multipart... my $i; foreach $i (0 .. $#parts) { # dump each part... dump_entity($parts[$i], $nr.$i); } } else { # single part... # Get MIME type, and display accordingly... my ($type, $subtype) = split('/', $entity->head->mime_type); my $body = $entity->bodyhandle; if (((($type =~ /^text$/i) && ($subtype =~ /^plain$/i)) || ($type =~ /^message$/i)) && ($body_text eq "")) { # save first proper text part $body_headers = $entity->head->original_text; if ($IO = $body->open("r")) { $body_text .= $_ while (defined($_ = $IO->getline)); $IO->close; } else { # d'oh! $body_text = "$0: couldn't find/open $nr: $!"; $fel = $body_text; } } elsif ($subtype =~ /^applefile$/) { # silently discard ugly "application/applefile" } elsif (($alternative) && (! $save_html)) { # silently discard HTML part of multipart/alternative } else { # process all other parts my $path = $body->path; my $size = ($path ? (-s $path) : '???'); my $ext; $path =~ m/\/([^\/]+)$/; my $file = $1; $ori_attach[$nr+0] = "-$nr: $file ($type/$subtype $size bytes)"; if ($file =~ m/.+(\.\w+)$/) { $ext = $1; } $ext = ".".$subtype unless ($ext); $new_attach[$nr+0] = $dt.$nr.$ext; rename($path,"$webdir/$new_attach[$nr+0]") || ($fel = "rename returned $!"); chmod(0666,"$webdir/$new_attach[$nr+0]") || ($fel = "chmod returned $!"); open (TOC,">>$webdir/$tocfile"); print TOC "$pd ", '', "$from: $subj
\n"; close(TOC); } } 1; } ############################################################################ sub get_date ($) { my ($offset) = @_; $offset += 0; my @time = localtime(time + $offset); return( sprintf("%4d%02d%02d-%02d%02d%02d-", $time[5]+1900, $time[4]+1, $time[3], $time[2], $time[1], $time[0]), sprintf("%4d-%02d-%02d %02d:%02d", $time[5]+1900, $time[4]+1, $time[3], $time[2], $time[1]) ); } ###################################################### sub rfc2047_decode_encoded_word ($) { my ($charset, $encoding, $text) = @_; $encoding = lc $encoding; if ($encoding eq 'q') { $text = decode_qp_in_header($text); } elsif ($encoding eq 'b') { $text = decode_base64($text); } return $text; } sub rfc2047_decode_string ($) { my ($line) = @_; my ($c, $e, $t); $line =~ s/^\s+//; $line =~ s{=\?([^\?]+)\?(\w)\?([^\?]*)\?=} {$c=$1;$e=$2;$t=$3;&rfc2047_decode_encoded_word($c, $e, $t)}gei; # In a more extensive version, we might want to save ALL charsets mentioned above # {push(@{$c},$1);push(@{$e},$2);$t=$3;&rfc2047_decode_encoded_word($1, $2, $t)}gei; return ($c,$e,$line); } sub decode_qp_in_header ($) { my ($line) = @_; $line =~ s/=([0-9a-f][0-9a-f])/chr(hex($1))/ige; $line =~ s/_/ /g; return $line; } ############################################################################ # Known issues: # # # Naming conflicts with nested multiparts: If a mail has more than ten # bodyparts, and any of them is in turn a multipart with more than # ten bodyparts, a naming conflict can occur which makes some of the # bodyparts not to be published. Any unpublished bodypart will be left # in the $tmpdir directory (if $clean is set to FALSE). # # Filetype is determined by suggested filename extension. If missing, # the subtype of the content type will be used. This will make some # attachments being published with the file extensions ".plain" and # ".octet-stream" which may confuse the web server, the web clients or # the members of the mailing list. Future versions of mail2url may use # the magic number or some other technique to determine file type if # the first tests should fail. # # If a mail has an "inline" attachment, i.e. a text part, the attachment # and a second text part that is also meant to be a part of the main # message, the second part will be saved as a attachment too, since only # the first text part will be used in the resulting mail. # # Currently, you need to install one copy of mail2url for each mailing # list you want to use it on, each with different settings in the # beginning of the file. A future version may use separate config files # instead. # # A future version may reject certain file types that more often than # not contain viruses, and restrict permitted file types to just images # or some other criteria. # # The system needs double the size of the raw mail available on the # file system partition to successfully convert the attachments. # # END.