#!/usr/bin/env python
import sys, os, time, getopt, getpass, socket, struct, mmap, stat, email
import shutil, pprint

verbose = 0
imapdconf = '/etc/imapd.conf'
maildir = os.path.expanduser('~/Maildir.cyrus')
mailbox = getpass.getuser()
configdir = None
partition = None
seen = None

def usage():
  print """%s [-h] [-v]* [--maildir <dir>] [--imapdconf <file>]
  -h:                   produce this help text
  -v:                   increase verbose level (can be repeated)
  --maildir <dir>:      use <dir> as the Maildir target
        currently:      %s
  --imapdconf <file>:   use <file> as the Cyrus imapd.conf
           currently:   %s
  --user <user>:        process mailbox name <user>
      currently:        %s
  
  """ % (os.path.basename(sys.argv[0]), maildir, imapdconf, mailbox)
  sys.exit(0)


counter = 0

# Cyrus databases
# deliver.db           berkeley
# annotations.db       skiplist
# user/m/mailbox.seen  skiplist
# user/m/mailbox.sub   flat

rectypes = {
  1: 'INORDER',
  2: 'ADD',
  4: 'DELETE',
  255: 'COMMIT',
  257: 'DUMMY',
  }

class Skiplist(dict):
  """Read an entire Cyrus skiplist database into memory"""
  def __init__(self, name):
    global rectypes
    size = os.stat(name)[stat.ST_SIZE]
    f = open(name)
    # for the skiplist db format, see Cyrus imapd's source file:
    #   src/cyrusdb_skiplist.c
    # and also this Python implementation:
    #   http://oss.netfarm.it/download/skiplist.py
    assert f.read(20) == "\241\002\213\015skiplist file\0\0\0"
    major, minor, maxlevel, curlevel, listsize, log_start, last_recovery = \
           struct.unpack('!7I', f.read(28))
    # skiplist DB format version 1
    assert major == 1
    while True:
      rectype = f.read(4)
      if not rectype: break # EOF
      rectype = struct.unpack('!I', rectype)[0]
      rectype = rectypes[rectype]
      if rectype == 'COMMIT': continue
      if rectype == 'DELETE':
        delptr = struct.unpack('!I', f.read(4))[0]
        continue
      keysize = struct.unpack('!I', f.read(4))[0]
      key = f.read(keysize)
      f.read(((keysize + 3) & 0xFFFFFFFC) - keysize)
      datasize = struct.unpack('!I', f.read(4))[0]
      data = f.read(datasize)
      f.read(((datasize + 3) & 0xFFFFFFFC) - datasize)
      self[key] = data
      skiplist = []
      while True:
        skipptr = struct.unpack('!I', f.read(4))[0]
        if skipptr == 0xFFFFFFFFL: break
        #skiplist.append(skipptr)
    f.close()

header_magic = """\241\002\213\015Cyrus mailbox header
"The best thing about this system was that it had lots of goals."
\t--Jim Morris on Andrew
"""
rand_base = '%%s/%%s/cur/%%d.%dFMIc%%d.%s:2,%%s' % \
            (os.getpid(), socket.gethostname())
def onelevel(dirpath, dirnames, filenames):
  global counter
  relpath = dirpath[len(partition):].replace('/', '.')
  if verbose:
    print relpath
  if relpath not in ['', '.']:
    os.mkdir(maildir + '/' + relpath)
  os.mkdir(maildir + '/' + relpath + '/tmp')
  os.mkdir(maildir + '/' + relpath + '/new')
  os.mkdir(maildir + '/' + relpath + '/cur')
  # find the uniqueid for the folder
  # see mailbox_read_header in Cyrus src/mailbox.c
  assert 'cyrus.header' in filenames or not filenames
  if filenames:
    f = open(dirpath + '/cyrus.header')
    assert f.read(len(header_magic)) == header_magic
    line = f.readline()
    f.close()
    uniqueid = line.split('\t')[1].strip()
    # look up the uniqueid in the per-user seen DB
    # to find out which messages have been seen
    seenset = decode_seen(uniqueid)
    for f in filenames:
      if f[-1] == '.':
        try:
          msgid = int(f[:-1])
        except ValueError:
          if verbose > 1:
            print 'skipping', f
          continue
      else:
        if verbose > 1:
          print 'skipping', f
        continue
      # generate the maildir filename
      counter += 1
      # see http://cr.yp.to/proto/maildir.html for flag meanings
      if msgid in seenset:
        flags = 'S'
      else:
        flags = ''
      nf = rand_base % (maildir, relpath, int(time.time()), counter, flags)
      if verbose > 2:
        print dirpath + '/' + f, '->', nf
      timestamp = email_received(dirpath + '/' + f)
      shutil.copy(dirpath + '/' + f, nf)
      os.utime(nf, (timestamp, timestamp))

def decode_seen(uniqueid):
  seenset = set()
  try:
    seenlist = seen[uniqueid].split(' ', 4)
    # see seen_readit in Cyrus src/seen_db.c
    assert seenlist[0] == '1' # version
    if seenlist[-1]:
      if verbose:
        print 'seen', seenlist[-1] #seenset
      for interval in seenlist[-1].split(','):
        if ':' in interval:
          # a message ID range
          begin, end = interval.split(':')
          seenset.update(range(int(begin), int(end) + 1))
        else:
          # individual message ID
          seenset.add(interval)
    return seenset
  except KeyError:
    # this folder probably has never been seen
    # XXX should probably log a warning
    return set()

def email_received(filename):
  """find the timestamp to use by trying in order:
  1) first of the Received: headers
  2) Date: header
  3) Unix timestamp (ctime) of the Cyrus mail file
  """
  m = email.message_from_file(open(filename))
  received = m.get_all('received')
  if received:
    received = received[0].split(';')[-1].strip()
    ts = time.mktime(email.Utils.parsedate(received))
  else:
    if 'from' not in m or 'date' not in m:
      print 'WARNING: could not determine received date for:'
      pprint.pprint(m._headers)
      print '-'*72
    if 'date' in m:
      ts = time.mktime(email.Utils.parsedate(m['date']))
    else:
      ts = os.stat(filename)[stat.ST_CTIME]
  return ts

def migrate():
  global configdir, partition, seen
  try:
    os.makedirs(maildir)
    print 'created maildir target', maildir
  except os.error:
    pass
  # get config information from imapd.conf
  f = open(imapdconf)
  for line in f:
    line = line.strip()
    if not line or line.startswith('#'): continue
    option, value = line.split(':', 2)
    option = option.strip()
    value = value.strip()
    if option == 'configdirectory': configdir = value
    if option == 'partition-default': partition = value
  assert configdir and partition
  if verbose: print 'configdir', configdir, 'partition', partition
  # process only one mailbox
  partition += '/user/' + mailbox
  # walk the walk
  # mbox name -> mbox type (4 bytes), partition ID, space, ACLs
  # mboxes = Skiplist('%s/mailboxes.db' % configdir)
  # mailbox UUID -> read list
  seen = Skiplist('%s/user/%s/%s.seen' % (configdir, mailbox[0], mailbox))
  for level in os.walk(partition):
    onelevel(*level)

if __name__ == '__main__':
  opts, args = getopt.getopt(
    sys.argv[1:], 'vh',
    ['maildir=', 'imapdconf=', 'user='])
  for opt, arg in opts:
    if opt == '-v':
      verbose += 1
    if opt == '-h':
      usage()
    if opt == '--imapdconf':
      imapdconf = arg
    if opt == '--maildir':
      maildir = arg
    if opt == '--user':
      mailbox = arg
  migrate()
    
