[Assorted-commits] SF.net SVN: assorted: [704] mailing-list-filter
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-05-07 16:06:46
|
Revision: 704 http://assorted.svn.sourceforge.net/assorted/?rev=704&view=rev Author: yangzhang Date: 2008-05-07 09:06:28 -0700 (Wed, 07 May 2008) Log Message: ----------- added mailing list filter! still exploring how gmail imap starred messages work Added Paths: ----------- mailing-list-filter/ mailing-list-filter/trunk/ mailing-list-filter/trunk/src/ mailing-list-filter/trunk/src/filter.py Added: mailing-list-filter/trunk/src/filter.py =================================================================== --- mailing-list-filter/trunk/src/filter.py (rev 0) +++ mailing-list-filter/trunk/src/filter.py 2008-05-07 16:06:28 UTC (rev 704) @@ -0,0 +1,149 @@ +#!/usr/bin/env python + +""" +Given an IMAP mailbox, mark all messages as read except for those threads in +which you were a participant, where thread grouping is performed via the +In-Reply-To and References headers. + +Currently, we assume that the server specification points to a mailbox +containing all messages (both sent and received), and a message is determined +to have been sent by you by looking at the From: header field. This should work +well with Gmail. An alternative strategy is to look through two folders, one +that's the Inbox and one that's the Sent mailbox, and treat all messages in +Sent as having been sent by you. +""" + +from __future__ import with_statement +from collections import defaultdict +from email import message_from_string +from getpass import getpass +from imaplib import IMAP4_SSL +from argparse import ArgumentParser +from path import path +from re import match +from functools import partial +from commons.decs import pickle_memoized +from commons.log import * +from commons.files import cleanse_filename, soft_makedirs +from commons.misc import default_if_none +from commons.networking import logout +from commons.seqs import concat, grouper +from commons.startup import run_main +from contextlib import closing + +info = partial(info, '') +debug = partial(debug, '') +error = partial(error, '') +die = partial(die, '') + +def getmail(imap): + info( 'finding max seqno' ) + ok, [seqnos] = imap.search(None, 'ALL') + maxseqno = int( seqnos.split()[-1] ) + del seqnos + + info( 'actually fetching the messages in chunks' ) + # The syntax/fields of the FETCH command is documented in RFC 2060. Also, + # this article contains a brief overview: + # http://www.devshed.com/c/a/Python/Python-Email-Libraries-part-2-IMAP/3/ + # BODY.PEEK prevents the message from automatically being flagged as \Seen. + query = '(FLAGS BODY.PEEK[HEADER.FIELDS (Message-ID References In-Reply-To From Subject)])' + step = 1000 + return list( concat( + imap.fetch('%d:%d' % (start, start + step - 1), query)[1] + for start in xrange(1, maxseqno + 1, step) ) ) + +def main(argv): + import logging + config_logging(level = logging.INFO, do_console = True) + + p = ArgumentParser(description = __doc__) + p.add_argument('--credfile', default = path( '~/.mlf.auth' ).expanduser(), + help = """File containing your login credentials, with the username on the + first line and the password on the second line. Ignored iff --prompt.""") + p.add_argument('--cachedir', default = path( '~/.mlf.cache' ).expanduser(), + help = "Directory to use for caching our data.") + p.add_argument('--prompt', action = 'store_true', + help = "Interactively prompt for the username and password.") + p.add_argument('sender', + help = "Your email address.") + p.add_argument('server', + help = "The server in the format: <host>[:<port>][/<mailbox>].") + + cfg = p.parse_args(argv[1:]) + + if cfg.prompt: + print "username:", + cfg.user = raw_input() + print "password:", + cfg.passwd = getpass() + else: + with file(cfg.credfile) as f: + [cfg.user, cfg.passwd] = map(lambda x: x.strip('\r\n'), f.readlines()) + + try: + m = match( r'(?P<host>[^:/]+)(:(?P<port>\d+))?(/(?P<mailbox>.+))?$', cfg.server ) + cfg.host = m.group('host') + cfg.port = int( default_if_none(m.group('port'), 993) ) + cfg.mailbox = default_if_none(m.group('mailbox'), 'INBOX') + except: + p.error('Need to specify the server in the correct format.') + + soft_makedirs(cfg.cachedir) + + with logout(IMAP4_SSL(cfg.host, cfg.port)) as imap: + imap.login(cfg.user, cfg.passwd) + with closing(imap) as imap: + # Select the main mailbox (INBOX). + imap.select(cfg.mailbox) + + # Fetch message IDs, references, and senders. + xs = pickle_memoized \ + (lambda imap: cfg.cachedir / cleanse_filename(cfg.sender)) \ + (getmail) \ + (imap) + + debug('fetched:', xs) + + info('determining the set of messages that were sent by you') + + sent = set() + for (envelope, data), paren in grouper(2, xs): + msg = message_from_string(data) + if cfg.sender in msg['From']: + sent.add( msg['Message-ID'] ) + + info( 'find the threads in which I am a participant' ) + + # Every second item is just a closing paren. + # Example data: + # [('13300 (BODY[HEADER.FIELDS (Message-ID References In-Reply-To)] {67}', + # 'Message-ID: <mai...@py...>\r\n\r\n'), + # ')', + # ('13301 (BODY[HEADER.FIELDS (Message-ID References In-Reply-To)] {59}', + # 'Message-Id: <200...@hv...>\r\n\r\n'), + # ')', + # ('13302 (BODY[HEADER.FIELDS (Message-ID References In-Reply-To)] {92}', + # 'Message-ID: <C43EAFC0.2E3AE%ni...@ya...>\r\nIn-Reply-To: <481...@gm...>\r\n\r\n')] + for (envelope, data), paren in grouper(2, xs): + m = match( r"(?P<seqno>\d+) \(FLAGS \((?P<flags>[^)]+)\)", envelope ) + seqno = m.group('seqno') + flags = m.group('flags') + if r'\Flagged' in flags: # flags != r'\Seen' and flags != r'\Seen NonJunk': + print 'FLAG' + print seqno, flags + print '\n'.join( map( str, msg.items() ) ) + print + msg = message_from_string(data) + id = msg['Message-ID'] + irt = default_if_none( msg.get_all('In-Reply-To'), [] ) + refs = default_if_none( msg.get_all('References'), [] ) + refs = set( ' '.join( irt + refs ).split() ) + if refs & sent: + print 'SENT' + print seqno, flags + print '\n'.join( map( str, msg.items() ) ) + print +# if refs & sent: + +run_main() Property changes on: mailing-list-filter/trunk/src/filter.py ___________________________________________________________________ Name: svn:executable + * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |