#!/usr/bin/env python
import sys, os, time, getopt, getpass, socket, struct, mmap, stat, email
import shutil, pprint
verbose = 0
imapdconf = '/etc/imapd.conf'
maildir = os.path.expanduser('~/Maildir.cyrus')
mailbox = getpass.getuser()
configdir = None
partition = None
seen = None
def usage():
print """%s [-h] [-v]* [--maildir
] [--imapdconf ]
-h: produce this help text
-v: increase verbose level (can be repeated)
--maildir : use as the Maildir target
currently: %s
--imapdconf : use as the Cyrus imapd.conf
currently: %s
--user : process mailbox name
currently: %s
""" % (os.path.basename(sys.argv[0]), maildir, imapdconf, mailbox)
sys.exit(0)
counter = 0
# Cyrus databases
# deliver.db berkeley
# annotations.db skiplist
# user/m/mailbox.seen skiplist
# user/m/mailbox.sub flat
rectypes = {
1: 'INORDER',
2: 'ADD',
4: 'DELETE',
255: 'COMMIT',
257: 'DUMMY',
}
class Skiplist(dict):
"""Read an entire Cyrus skiplist database into memory"""
def __init__(self, name):
global rectypes
size = os.stat(name)[stat.ST_SIZE]
f = open(name)
# for the skiplist db format, see Cyrus imapd's source file:
# src/cyrusdb_skiplist.c
# and also this Python implementation:
# http://oss.netfarm.it/download/skiplist.py
assert f.read(20) == "\241\002\213\015skiplist file\0\0\0"
major, minor, maxlevel, curlevel, listsize, log_start, last_recovery = \
struct.unpack('!7I', f.read(28))
# skiplist DB format version 1
assert major == 1
while True:
rectype = f.read(4)
if not rectype: break # EOF
rectype = struct.unpack('!I', rectype)[0]
rectype = rectypes[rectype]
if rectype == 'COMMIT': continue
if rectype == 'DELETE':
delptr = struct.unpack('!I', f.read(4))[0]
continue
keysize = struct.unpack('!I', f.read(4))[0]
key = f.read(keysize)
f.read(((keysize + 3) & 0xFFFFFFFC) - keysize)
datasize = struct.unpack('!I', f.read(4))[0]
data = f.read(datasize)
f.read(((datasize + 3) & 0xFFFFFFFC) - datasize)
self[key] = data
skiplist = []
while True:
skipptr = struct.unpack('!I', f.read(4))[0]
if skipptr == 0xFFFFFFFFL: break
#skiplist.append(skipptr)
f.close()
header_magic = """\241\002\213\015Cyrus mailbox header
"The best thing about this system was that it had lots of goals."
\t--Jim Morris on Andrew
"""
rand_base = '%%s/%%s/cur/%%d.%dFMIc%%d.%s:2,%%s' % \
(os.getpid(), socket.gethostname())
def onelevel(dirpath, dirnames, filenames):
global counter
relpath = dirpath[len(partition):].replace('/', '.')
if verbose:
print relpath
if relpath not in ['', '.']:
os.mkdir(maildir + '/' + relpath)
os.mkdir(maildir + '/' + relpath + '/tmp')
os.mkdir(maildir + '/' + relpath + '/new')
os.mkdir(maildir + '/' + relpath + '/cur')
# find the uniqueid for the folder
# see mailbox_read_header in Cyrus src/mailbox.c
assert 'cyrus.header' in filenames or not filenames
if filenames:
f = open(dirpath + '/cyrus.header')
assert f.read(len(header_magic)) == header_magic
line = f.readline()
f.close()
uniqueid = line.split('\t')[1].strip()
# look up the uniqueid in the per-user seen DB
# to find out which messages have been seen
seenset = decode_seen(uniqueid)
for f in filenames:
if f[-1] == '.':
try:
msgid = int(f[:-1])
except ValueError:
if verbose > 1:
print 'skipping', f
continue
else:
if verbose > 1:
print 'skipping', f
continue
# generate the maildir filename
counter += 1
# see http://cr.yp.to/proto/maildir.html for flag meanings
if msgid in seenset:
flags = 'S'
else:
flags = ''
nf = rand_base % (maildir, relpath, int(time.time()), counter, flags)
if verbose > 2:
print dirpath + '/' + f, '->', nf
timestamp = email_received(dirpath + '/' + f)
shutil.copy(dirpath + '/' + f, nf)
os.utime(nf, (timestamp, timestamp))
def decode_seen(uniqueid):
seenset = set()
try:
seenlist = seen[uniqueid].split(' ', 4)
# see seen_readit in Cyrus src/seen_db.c
assert seenlist[0] == '1' # version
if seenlist[-1]:
if verbose:
print 'seen', seenlist[-1] #seenset
for interval in seenlist[-1].split(','):
if ':' in interval:
# a message ID range
begin, end = interval.split(':')
seenset.update(range(int(begin), int(end) + 1))
else:
# individual message ID
seenset.add(interval)
return seenset
except KeyError:
# this folder probably has never been seen
# XXX should probably log a warning
return set()
def email_received(filename):
"""find the timestamp to use by trying in order:
1) first of the Received: headers
2) Date: header
3) Unix timestamp (ctime) of the Cyrus mail file
"""
m = email.message_from_file(open(filename))
received = m.get_all('received')
if received:
received = received[0].split(';')[-1].strip()
ts = time.mktime(email.Utils.parsedate(received))
else:
if 'from' not in m or 'date' not in m:
print 'WARNING: could not determine received date for:'
pprint.pprint(m._headers)
print '-'*72
if 'date' in m:
ts = time.mktime(email.Utils.parsedate(m['date']))
else:
ts = os.stat(filename)[stat.ST_CTIME]
return ts
def migrate():
global configdir, partition, seen
try:
os.makedirs(maildir)
print 'created maildir target', maildir
except os.error:
pass
# get config information from imapd.conf
f = open(imapdconf)
for line in f:
line = line.strip()
if not line or line.startswith('#'): continue
option, value = line.split(':', 2)
option = option.strip()
value = value.strip()
if option == 'configdirectory': configdir = value
if option == 'partition-default': partition = value
assert configdir and partition
if verbose: print 'configdir', configdir, 'partition', partition
# process only one mailbox
partition += '/user/' + mailbox
# walk the walk
# mbox name -> mbox type (4 bytes), partition ID, space, ACLs
# mboxes = Skiplist('%s/mailboxes.db' % configdir)
# mailbox UUID -> read list
seen = Skiplist('%s/user/%s/%s.seen' % (configdir, mailbox[0], mailbox))
for level in os.walk(partition):
onelevel(*level)
if __name__ == '__main__':
opts, args = getopt.getopt(
sys.argv[1:], 'vh',
['maildir=', 'imapdconf=', 'user='])
for opt, arg in opts:
if opt == '-v':
verbose += 1
if opt == '-h':
usage()
if opt == '--imapdconf':
imapdconf = arg
if opt == '--maildir':
maildir = arg
if opt == '--user':
mailbox = arg
migrate()