secmail.py - whimsy - Git at Google

 #!/usr/bin/python

 """
 The purpose of this script is to find attachments to email messages that
 are sent to secretary@apache.org and commit them into svn:documents/received.

 This task is made more difficult by the fact that email often uses payloads
 for reasons other than attachments, from time to time we get spam, some
 people routinely pgp sign all of their emails, and others use pgp signatures
 to sign forms.

 Deciding what to commit is therefore, necessarily, a bit of heuristics.  When
 in doubt, the intent here is to err on the side of commiting more than is
 necessary than to miss an email.

 Examples of heurisitics:
  * Images less than 10K bytes tend to be decorations for HTML formatted
    spam emails, and are not likely to be scanned forms.
  * text/plain email that contain a PGP signature and the ASF fax number
    are likely to be signed forms.
 """

 import email
 import gzip
 import mailbox
 import rfc822
 import mimetypes
 import os
 from datetime import datetime
 from email.header import decode_header
 from glob import glob
 import re
 from subprocess import Popen, PIPE
 from threading import Thread
 import commands
 import getpass

 try:
   from hashlib import md5
 except ImportError:
   from md5 import new as md5

 # attachment types which generally are not saved.
 skip = ['multipart/alternative', 'multipart/related', 'multipart/mixed',
         'message/delivery-status', 'text/plain', 'text/html']

 # attachment file names which always are saved, even if they come in
 # with one of the 'skip' mime types.
 forms = ['pgp.txt', 'icla.txt', 'icla.txt.asc', 'icla.pdf', 'icla.pdf.asc', 'membership-application.txt']

 # mime types for pgp signatures
 sigs  = ['application/pkcs7-signature', 'application/pgp-signature']

 # convert header from whatever encoding it is in to utf-8.  Handle
 # mislabelled encodings.
 def decode(header, field=0):
   if isinstance(header, unicode):
     data = (header.encode('utf-8'), 'utf-8')
   else:
     data = decode_header(header)[field]

   try:
     return data[0].decode(data[1]).encode('utf-8')
   except:
     return data[0].decode('iso-8859-1').encode('utf-8')

 # convert non-ascii characters into rough equivalents for the purpose
 # of determining a file name to store in SVN.
 def asciize(name):
   if re.search(r"[^\x00-\x7F]", name):
     # digraphs.  May be culturally sensitive
     name=re.sub(r"\xc3\x9f", 'ss', name)
     name=re.sub(r"\xc3\xa4|a\xcc\x88", 'ae', name)
     name=re.sub(r"\xc3\xa5|a\xcc\x8a", 'aa', name)
     name=re.sub(r"\xc3\xa6", 'ae', name)
     name=re.sub(r"\xc3\xb1|n\xcc\x83", 'ny', name)
     name=re.sub(r"\xc3\xb6|o\xcc\x88", 'oe', name)
     name=re.sub(r"\xc3\xbc|u\xcc\x88", 'ue', name)

     # latin 1
     name=re.sub(r"\xc3[\xa0-\xa5]", 'a', name)
     name=re.sub(r"\xc3\xa7", 'c', name)
     name=re.sub(r"\xc3[\xa8-\xab]", 'e', name)
     name=re.sub(r"\xc3[\xac-\xaf]", 'i', name)
     name=re.sub(r"\xc3[\xb2-\xb6]|\xc3\xb8", 'o', name)
     name=re.sub(r"\xc3[\xb9-\xbc]", 'u', name)
     name=re.sub(r"\xc3[\xbd\xbf]", 'y', name)

     # Latin Extended-A
     name=re.sub(r"\xc4[\x80-\x85]", 'a', name)
     name=re.sub(r"\xc4[\x86-\x8d]", 'c', name)
     name=re.sub(r"\xc4[\x8e-\x91]", 'd', name)
     name=re.sub(r"\xc4[\x92-\x9b]", 'e', name)
     name=re.sub(r"\xc4[\x9c-\xa3]", 'g', name)
     name=re.sub(r"\xc4[\xa4-\xa7]", 'h', name)
     name=re.sub(r"\xc4[\xa8-\xb1]", 'i', name)
     name=re.sub(r"\xc4[\xb2-\xb3]", 'ij', name)
     name=re.sub(r"\xc4[\xb4-\xb5]", 'j', name)
     name=re.sub(r"\xc4[\xb6-\xb8]", 'k', name)
     name=re.sub(r"\xc4[\xb9-\xff]|\xc5[\x80-\x82]", 'l', name)
     name=re.sub(r"\xc5[\x83-\x8b]", 'n', name)
     name=re.sub(r"\xc5[\x8c-\x91]", 'o', name)
     name=re.sub(r"\xc5[\x92-\x93]", 'oe', name)
     name=re.sub(r"\xc5[\x94-\x99]", 'r', name)
     name=re.sub(r"\xc5[\x9a-\xa2]", 's', name)
     name=re.sub(r"\xc5[\xa2-\xa7]", 't', name)
     name=re.sub(r"\xc5[\xa8-\xb3]", 'u', name)
     name=re.sub(r"\xc5[\xb4-\xb5]", 'w', name)
     name=re.sub(r"\xc5[\xb6-\xb8]", 'y', name)
     name=re.sub(r"\xc5[\xb9-\xbe]", 'z', name)

     # denormalized diacritics
     name=re.sub(r"\xcc[\x80-\xff]|\xcd[\x80-\xaf]", '', name)

   return re.sub(r"[^.\w]+", '-', name)

 # add svn at sign if necessary
 def svn(command, file):
   command = 'svn ' + command + ' ' + file
   if '@' in file: command = command + '@'
   # import sys
   # sys.stderr.write(command+"\n")
   return os.system(command)

 # spam assassin client
 def analyze(msg):
   spamc = Popen('spamc', shell=True, stdin=PIPE, stdout=PIPE)
   class passthru(Thread):
     def __init__(self, stdin, msg):
       Thread.__init__(self)
       self.msg = msg
       self.stdin = stdin
     def run(self):
       try:
         email.generator.Generator(self.stdin).flatten(self.msg)
       except:
         pass
       self.stdin.close()
   thread = passthru(spamc.stdin, msg)
   thread.start()
   subject = msg['subject']
   msg = email.message_from_file(spamc.stdout)
   msg['subject'] = subject # spamc mangles encoded strings
   setattr(msg, 'spam', str(msg['X-Spam-Status']).startswith('Yes'))
   thread.join()
   spamc.wait()
   spamc.stdout.close()
   return msg

 # main logic for this script: process attachments for a single message
 def detach(msg):
   # quick exit if we have seen this entry before
   if not msg['message-id']: return
   id = md5(msg['message-id']).hexdigest()
   if os.path.exists(os.path.join('tally',id)): return

   # known spammers
   if '<r_ieftin@yahoo.ro>' in msg['from']:
     return

   # collect eligible attachments
   attachments = []
   for payload in msg.get_payload():

     # progress into multipart/mixed
     if payload.get_content_type() == 'multipart/mixed':
       payload = payload.get_payload()
     else:
       payload = [payload]

     # iterate over (possibly nested) attachments
     for subpayload in payload:
       if subpayload.get_content_type() in skip:
         if subpayload.get_filename() not in forms: continue
         content = subpayload.get_payload(decode=True)
         if 'License Agreement' not in content and \
           '-----BEGIN PGP SIGNATURE-----' not in content:
           continue
       if subpayload.get_content_type() == 'image/gif':
         if len(subpayload.get_payload(decode=True))<10240: continue
       # if not subpayload.get_payload(decode=True): continue

       # get_filename doesn't appear to have an endswith method
       # if subpayload.get_filename().endswith('.gpg'): continue
       attachments.append(subpayload)

   if len(attachments) == 0: return

   if os.system('svn update received') != 0:
     return

   ## COMMENTED OUT - AS SPAMC IS NOT INSTALLED HERE
   #
   # if 'eFax message from' not in decode(msg['subject']):
   #   msg = analyze(msg)
   #   if msg.spam:
   #     attachments = []

   # determine output file name prefix
   prefix = ''
   if len(attachments) > 1:
     prefix = rfc822.parseaddr(decode(msg['from']).decode('utf-8'))[1]
     received = os.path.join('received',prefix)
     if (not re.match(r'^[.@\w]+$',prefix)) or os.path.exists(received):
       dirname = datetime(*email.utils.parsedate(msg['date'])[:7]).isoformat()
       prefix = dirname.replace(':','_').replace('-','_')
       received = os.path.join('received',prefix)
     if not os.path.exists(received): os.mkdir(received)
     svn('add', received)
     prefix += os.sep
   elif len(attachments) == 1:
     name=asciize(decode(attachments[0].get_filename()))
     if not name: return
     if attachments[0].get_content_type() in sigs: return
     if len(name)<16:
       prefix = decode(msg['from'])
       if prefix.startswith('"eFax"'):
         prefix = 'eFax'
       else:
         prefix = asciize(prefix)
         if prefix.find('<')>=0: prefix = prefix.split('<')[1]
         prefix = prefix.split('@')[0]
       prefix = prefix + '-'
     try:
       name.decode('utf-8')
     except:
       name=name.decode('iso-8859-1').encode('utf-8')

   # determine commit message
   summary = "\n".join([
     'Subject: ' + decode(msg['subject']),
     'From: ' + decode(msg['from']),
     'Date: ' + str(msg['date']),
     'Message-Id: ' + str(msg['message-id']),
     'X-Spam-Status' + str(msg['X-Spam-Status']),
   ])

   count = 0
   file = None

   # decode payloads and place add to svn
   for attachment in attachments:
     mime = attachment.get_content_type()
     if mime == 'application/octet-stream':
       mime = mimetypes.guess_type(decode(attachment.get_filename()))[0]
     name=asciize(decode(attachment.get_filename()))
     if name=='none': name=str(dict(attachment.get_params()).get('name'))

     content = attachment.get_payload(decode=True)
     if content:
       file=os.path.join('received',(prefix+name).strip('-'))
       if os.path.isdir(file): file = os.path.join(file, 'unnamed')
       fh=open(file,'w')
       fh.write(content)
       fh.close()

       svn('add', file)
       if mime: svn('propset svn:mime-type ' + mime, file)
       count = count + 1

   if count>1: file = os.path.join('received',prefix.strip('-'))

   try:
     name = decode(msg['from'],0)
     try:
       addr = rfc822.parseaddr(decode(msg['from'],1))[1]
     except:
       name, addr = rfc822.parseaddr(name)

     if name != 'eFax' and file:
       props = {
         'email:id': msg['message-id'],
         'email:subject': re.sub(r'\n\s*', ' ', decode(msg['subject']))
       }
       if name: props['email:name'] = name
       if addr: props['email:addr'] = addr
       if msg['cc']: props['email:cc'] =  re.sub('\s+', ' ', decode(msg['cc']))
       for (key, value) in props.items():
         svn('propset ' + key + ' ' + repr(value), file)
   except:
     pass

   tally = os.path.join('tally',id)
   fh=open(tally,'w')
   fh.write(summary + "\n")
   fh.close()

   if count>0 and getpass.getuser() != 'www-data':
     if svn('commit --file ' + tally, file) != 0:
       return # try again next cron cycle

 if __name__ == "__main__":
   if os.path.exists('/home/apmail/private-arch/officers-secretary'):
     archive = '/home/apmail/private-arch/officers-secretary/20*'
     os.chdir('/home/apmail/secretary-mail')
     previous = os.stat('latest').st_mtime
   elif os.path.exists('mailbox'):
     archive = 'mailbox'
     previous = int(os.stat(archive).st_mtime) - 1
   else:
     import sys
     sys.stderr.write("can't find mailbox.  Exiting.\n")
     sys.exit(1)

   latest = previous
   last_processed = None

   # process updated mbox files
   for file in glob(archive):
     if int(previous) >= int(os.stat(file).st_mtime): continue

     # open gzipped/raw file
     if file.endswith('.gz'):
       fh=gzip.open(file)
     else:
       fh=open(file)

     # process each multipart message in the mailbox
     for msg in iter(mailbox.UnixMailbox(fh, email.message_from_file)):
       last_processed = msg['Date']

       if msg.is_multipart():
         detach(msg)
       elif '919-573-9199' in msg.get_payload():
         if '-----BEGIN PGP SIGNATURE-----' in msg.get_payload().split("\n"):
           msg.add_header('Content-Disposition', 'attachment',
             filename='pgp.txt')
           wrapper=email.message.Message()
           wrapper.attach(msg)
           for header in msg.keys(): wrapper[header]=msg[header]
           detach(wrapper)

     # keep track of the latest
     if latest < os.stat(file).st_mtime:
       latest = os.stat(file).st_mtime

   # record where we are so that the next run can pick up where we left off
   if latest > previous:
     os.utime('latest', (latest, latest))

   # check for any incomplete removals
   if commands.getoutput('svn status received') != '':
     os.system("svn st received | grep '!' | cut -c 8- | xargs -r svn revert --")

   # check for any incomplete commits
   if commands.getoutput('svn status received') != '':
     if getpass.getuser() != 'www-data':
       os.system('svn commit -m "queued documents" received')

   # update web page with last processed information
   if last_processed and os.path.exists('../public_html/secmail.txt'):
     fh = open('../public_html/secmail.txt', 'w')
     fh.write("Latest email processed was sent: %s" % last_processed)
     fh.close()
	#!/usr/bin/python

	"""
	The purpose of this script is to find attachments to email messages that
	are sent to secretary@apache.org and commit them into svn:documents/received.

	This task is made more difficult by the fact that email often uses payloads
	for reasons other than attachments, from time to time we get spam, some
	people routinely pgp sign all of their emails, and others use pgp signatures
	to sign forms.

	Deciding what to commit is therefore, necessarily, a bit of heuristics. When
	in doubt, the intent here is to err on the side of commiting more than is
	necessary than to miss an email.

	Examples of heurisitics:
	* Images less than 10K bytes tend to be decorations for HTML formatted
	spam emails, and are not likely to be scanned forms.
	* text/plain email that contain a PGP signature and the ASF fax number
	are likely to be signed forms.
	"""

	import email
	import gzip
	import mailbox
	import rfc822
	import mimetypes
	import os
	from datetime import datetime
	from email.header import decode_header
	from glob import glob
	import re
	from subprocess import Popen, PIPE
	from threading import Thread
	import commands
	import getpass

	try:
	from hashlib import md5
	except ImportError:
	from md5 import new as md5

	# attachment types which generally are not saved.
	skip = ['multipart/alternative', 'multipart/related', 'multipart/mixed',
	'message/delivery-status', 'text/plain', 'text/html']

	# attachment file names which always are saved, even if they come in
	# with one of the 'skip' mime types.
	forms = ['pgp.txt', 'icla.txt', 'icla.txt.asc', 'icla.pdf', 'icla.pdf.asc', 'membership-application.txt']

	# mime types for pgp signatures
	sigs = ['application/pkcs7-signature', 'application/pgp-signature']

	# convert header from whatever encoding it is in to utf-8. Handle
	# mislabelled encodings.
	def decode(header, field=0):
	if isinstance(header, unicode):
	data = (header.encode('utf-8'), 'utf-8')
	else:
	data = decode_header(header)[field]

	try:
	return data[0].decode(data[1]).encode('utf-8')
	except:
	return data[0].decode('iso-8859-1').encode('utf-8')

	# convert non-ascii characters into rough equivalents for the purpose
	# of determining a file name to store in SVN.
	def asciize(name):
	if re.search(r"[^\x00-\x7F]", name):
	# digraphs. May be culturally sensitive
	name=re.sub(r"\xc3\x9f", 'ss', name)
	name=re.sub(r"\xc3\xa4\|a\xcc\x88", 'ae', name)
	name=re.sub(r"\xc3\xa5\|a\xcc\x8a", 'aa', name)
	name=re.sub(r"\xc3\xa6", 'ae', name)
	name=re.sub(r"\xc3\xb1\|n\xcc\x83", 'ny', name)
	name=re.sub(r"\xc3\xb6\|o\xcc\x88", 'oe', name)
	name=re.sub(r"\xc3\xbc\|u\xcc\x88", 'ue', name)

	# latin 1
	name=re.sub(r"\xc3[\xa0-\xa5]", 'a', name)
	name=re.sub(r"\xc3\xa7", 'c', name)
	name=re.sub(r"\xc3[\xa8-\xab]", 'e', name)
	name=re.sub(r"\xc3[\xac-\xaf]", 'i', name)
	name=re.sub(r"\xc3[\xb2-\xb6]\|\xc3\xb8", 'o', name)
	name=re.sub(r"\xc3[\xb9-\xbc]", 'u', name)
	name=re.sub(r"\xc3[\xbd\xbf]", 'y', name)

	# Latin Extended-A
	name=re.sub(r"\xc4[\x80-\x85]", 'a', name)
	name=re.sub(r"\xc4[\x86-\x8d]", 'c', name)
	name=re.sub(r"\xc4[\x8e-\x91]", 'd', name)
	name=re.sub(r"\xc4[\x92-\x9b]", 'e', name)
	name=re.sub(r"\xc4[\x9c-\xa3]", 'g', name)
	name=re.sub(r"\xc4[\xa4-\xa7]", 'h', name)
	name=re.sub(r"\xc4[\xa8-\xb1]", 'i', name)
	name=re.sub(r"\xc4[\xb2-\xb3]", 'ij', name)
	name=re.sub(r"\xc4[\xb4-\xb5]", 'j', name)
	name=re.sub(r"\xc4[\xb6-\xb8]", 'k', name)
	name=re.sub(r"\xc4[\xb9-\xff]\|\xc5[\x80-\x82]", 'l', name)
	name=re.sub(r"\xc5[\x83-\x8b]", 'n', name)
	name=re.sub(r"\xc5[\x8c-\x91]", 'o', name)
	name=re.sub(r"\xc5[\x92-\x93]", 'oe', name)
	name=re.sub(r"\xc5[\x94-\x99]", 'r', name)
	name=re.sub(r"\xc5[\x9a-\xa2]", 's', name)
	name=re.sub(r"\xc5[\xa2-\xa7]", 't', name)
	name=re.sub(r"\xc5[\xa8-\xb3]", 'u', name)
	name=re.sub(r"\xc5[\xb4-\xb5]", 'w', name)
	name=re.sub(r"\xc5[\xb6-\xb8]", 'y', name)
	name=re.sub(r"\xc5[\xb9-\xbe]", 'z', name)

	# denormalized diacritics
	name=re.sub(r"\xcc[\x80-\xff]\|\xcd[\x80-\xaf]", '', name)

	return re.sub(r"[^.\w]+", '-', name)

	# add svn at sign if necessary
	def svn(command, file):
	command = 'svn ' + command + ' ' + file
	if '@' in file: command = command + '@'
	# import sys
	# sys.stderr.write(command+"\n")
	return os.system(command)

	# spam assassin client
	def analyze(msg):
	spamc = Popen('spamc', shell=True, stdin=PIPE, stdout=PIPE)
	class passthru(Thread):
	def __init__(self, stdin, msg):
	Thread.__init__(self)
	self.msg = msg
	self.stdin = stdin
	def run(self):
	try:
	email.generator.Generator(self.stdin).flatten(self.msg)
	except:
	pass
	self.stdin.close()
	thread = passthru(spamc.stdin, msg)
	thread.start()
	subject = msg['subject']
	msg = email.message_from_file(spamc.stdout)
	msg['subject'] = subject # spamc mangles encoded strings
	setattr(msg, 'spam', str(msg['X-Spam-Status']).startswith('Yes'))
	thread.join()
	spamc.wait()
	spamc.stdout.close()
	return msg

	# main logic for this script: process attachments for a single message
	def detach(msg):
	# quick exit if we have seen this entry before
	if not msg['message-id']: return
	id = md5(msg['message-id']).hexdigest()
	if os.path.exists(os.path.join('tally',id)): return

	# known spammers
	if '<r_ieftin@yahoo.ro>' in msg['from']:
	return

	# collect eligible attachments
	attachments = []
	for payload in msg.get_payload():

	# progress into multipart/mixed
	if payload.get_content_type() == 'multipart/mixed':
	payload = payload.get_payload()
	else:
	payload = [payload]

	# iterate over (possibly nested) attachments
	for subpayload in payload:
	if subpayload.get_content_type() in skip:
	if subpayload.get_filename() not in forms: continue
	content = subpayload.get_payload(decode=True)
	if 'License Agreement' not in content and \
	'-----BEGIN PGP SIGNATURE-----' not in content:
	continue
	if subpayload.get_content_type() == 'image/gif':
	if len(subpayload.get_payload(decode=True))<10240: continue
	# if not subpayload.get_payload(decode=True): continue

	# get_filename doesn't appear to have an endswith method
	# if subpayload.get_filename().endswith('.gpg'): continue
	attachments.append(subpayload)

	if len(attachments) == 0: return

	if os.system('svn update received') != 0:
	return

	## COMMENTED OUT - AS SPAMC IS NOT INSTALLED HERE
	#
	# if 'eFax message from' not in decode(msg['subject']):
	# msg = analyze(msg)
	# if msg.spam:
	# attachments = []

	# determine output file name prefix
	prefix = ''
	if len(attachments) > 1:
	prefix = rfc822.parseaddr(decode(msg['from']).decode('utf-8'))[1]
	received = os.path.join('received',prefix)
	if (not re.match(r'^[.@\w]+$',prefix)) or os.path.exists(received):
	dirname = datetime(*email.utils.parsedate(msg['date'])[:7]).isoformat()
	prefix = dirname.replace(':','_').replace('-','_')
	received = os.path.join('received',prefix)
	if not os.path.exists(received): os.mkdir(received)
	svn('add', received)
	prefix += os.sep
	elif len(attachments) == 1:
	name=asciize(decode(attachments[0].get_filename()))
	if not name: return
	if attachments[0].get_content_type() in sigs: return
	if len(name)<16:
	prefix = decode(msg['from'])
	if prefix.startswith('"eFax"'):
	prefix = 'eFax'
	else:
	prefix = asciize(prefix)
	if prefix.find('<')>=0: prefix = prefix.split('<')[1]
	prefix = prefix.split('@')[0]
	prefix = prefix + '-'
	try:
	name.decode('utf-8')
	except:
	name=name.decode('iso-8859-1').encode('utf-8')

	# determine commit message
	summary = "\n".join([
	'Subject: ' + decode(msg['subject']),
	'From: ' + decode(msg['from']),
	'Date: ' + str(msg['date']),
	'Message-Id: ' + str(msg['message-id']),
	'X-Spam-Status' + str(msg['X-Spam-Status']),
	])

	count = 0
	file = None

	# decode payloads and place add to svn
	for attachment in attachments:
	mime = attachment.get_content_type()
	if mime == 'application/octet-stream':
	mime = mimetypes.guess_type(decode(attachment.get_filename()))[0]
	name=asciize(decode(attachment.get_filename()))
	if name=='none': name=str(dict(attachment.get_params()).get('name'))

	content = attachment.get_payload(decode=True)
	if content:
	file=os.path.join('received',(prefix+name).strip('-'))
	if os.path.isdir(file): file = os.path.join(file, 'unnamed')
	fh=open(file,'w')
	fh.write(content)
	fh.close()

	svn('add', file)
	if mime: svn('propset svn:mime-type ' + mime, file)
	count = count + 1

	if count>1: file = os.path.join('received',prefix.strip('-'))

	try:
	name = decode(msg['from'],0)
	try:
	addr = rfc822.parseaddr(decode(msg['from'],1))[1]
	except:
	name, addr = rfc822.parseaddr(name)

	if name != 'eFax' and file:
	props = {
	'email:id': msg['message-id'],
	'email:subject': re.sub(r'\n\s*', ' ', decode(msg['subject']))
	}
	if name: props['email:name'] = name
	if addr: props['email:addr'] = addr
	if msg['cc']: props['email:cc'] = re.sub('\s+', ' ', decode(msg['cc']))
	for (key, value) in props.items():
	svn('propset ' + key + ' ' + repr(value), file)
	except:
	pass

	tally = os.path.join('tally',id)
	fh=open(tally,'w')
	fh.write(summary + "\n")
	fh.close()

	if count>0 and getpass.getuser() != 'www-data':
	if svn('commit --file ' + tally, file) != 0:
	return # try again next cron cycle

	if __name__ == "__main__":
	if os.path.exists('/home/apmail/private-arch/officers-secretary'):
	archive = '/home/apmail/private-arch/officers-secretary/20*'
	os.chdir('/home/apmail/secretary-mail')
	previous = os.stat('latest').st_mtime
	elif os.path.exists('mailbox'):
	archive = 'mailbox'
	previous = int(os.stat(archive).st_mtime) - 1
	else:
	import sys
	sys.stderr.write("can't find mailbox. Exiting.\n")
	sys.exit(1)

	latest = previous
	last_processed = None

	# process updated mbox files
	for file in glob(archive):
	if int(previous) >= int(os.stat(file).st_mtime): continue

	# open gzipped/raw file
	if file.endswith('.gz'):
	fh=gzip.open(file)
	else:
	fh=open(file)

	# process each multipart message in the mailbox
	for msg in iter(mailbox.UnixMailbox(fh, email.message_from_file)):
	last_processed = msg['Date']

	if msg.is_multipart():
	detach(msg)
	elif '919-573-9199' in msg.get_payload():
	if '-----BEGIN PGP SIGNATURE-----' in msg.get_payload().split("\n"):
	msg.add_header('Content-Disposition', 'attachment',
	filename='pgp.txt')
	wrapper=email.message.Message()
	wrapper.attach(msg)
	for header in msg.keys(): wrapper[header]=msg[header]
	detach(wrapper)

	# keep track of the latest
	if latest < os.stat(file).st_mtime:
	latest = os.stat(file).st_mtime

	# record where we are so that the next run can pick up where we left off
	if latest > previous:
	os.utime('latest', (latest, latest))

	# check for any incomplete removals
	if commands.getoutput('svn status received') != '':
	os.system("svn st received \| grep '!' \| cut -c 8- \| xargs -r svn revert --")

	# check for any incomplete commits
	if commands.getoutput('svn status received') != '':
	if getpass.getuser() != 'www-data':
	os.system('svn commit -m "queued documents" received')

	# update web page with last processed information
	if last_processed and os.path.exists('../public_html/secmail.txt'):
	fh = open('../public_html/secmail.txt', 'w')
	fh.write("Latest email processed was sent: %s" % last_processed)
	fh.close()