#!/usr/bin/env python

"""
    Purpose:
    Nanoblogger-to-Ikiwiki import tool

    Copyright:
    Copyright (C) 2007  Chris Lamb <lamby@debian.org>
    Copyright (C) 2010  W. Trevor King <wking@drexel.edu>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see .

    Usage: run --help as an argument with this script.

    Notes:
    I added some extra bits to include the [[!tag tags/foo]] stuff in the post,
    as it wasn't before, at all. I'll diff the versions out so you can see
    the mess I made :).

"""

import os, sys
import time
import re

from datetime import datetime
import codecs, htmlentitydefs


class Tag (object):
    def __init__(self, path):
        self.path = path
        lines = [x.strip() for x in open(path, 'r').readlines()]
        self.tag = lines[0]
        self.files = lines[1:]

    def is_tagged(self, path):
        return os.path.basename(path) in self.files

    @staticmethod
    def is_tag_file(filename):
        return filename.startswith('cat_') and filename.endswith('.db')


def parse_file(path, possible_tags):
    lines = open(path, 'r').readlines()
    post_dict = {}
    while True:  # parse header
        line = lines.pop(0)
        if line.startswith('-----'):
            break
        field,value = [x.strip() for x in line.split(':', 1)]
        post_dict[field.lower()] = value
    assert lines[0].startswith('BODY:'), lines[0]
    lines.pop(0)
    assert lines[-1].startswith('END-----'), lines[-1]
    lines.pop(-1)
    text = '\n'.join([unicode(x.rstrip(), 'utf-8') for x in lines])
    post_dict['text'] = text

    post_dict['timestamp'] = time.mktime(time.strptime(
            post_dict['date'].replace('EST ', '').replace('EDT ', ''), '%c'))
    if 'EDT' in post_dict['date']:
        post_dict['timestamp'] += 4*60*60
    elif 'EST' in post_dict['date']:
        post_dict['timestamp'] += 5*60*60
    else:
        raise NotImplementedError('unknown time zone in %s'
                                  % post_dict['date'])
    post_dict['tags'] = [t.tag for t in possible_tags if t.is_tagged(path)]
    return post_dict


def format_commit(post_dict, name, email, subdir, branch):
    stub = post_dict['title'].replace(' ', '_')
    if post_dict['format'].lower() == 'markdown':
        ext = 'mdwn'
    else:
        raise NotImplementedError('Unkown extension for %s'
                                  % post_dict['format'])
    commit_msg = '''Importing NanoBlogger post "%s"''' % (post_dict['title'])

    lines = [
        '[[!meta  title="%s"]]' % (post_dict['title'].replace('"', r"'")),
        '[[!meta  date="%s"]]' % datetime.fromtimestamp(post_dict['timestamp']),
        post_dict['text']]

    if len(post_dict['tags']) > 0:
        lines.append('')
    for tag in post_dict['tags']:
        lines.append(
            '[[!tag tags/%s]]' % (tag.replace(' ', '-').replace('/', '-').lower()))
    lines.append('')
    data = '\n'.join(lines).encode('utf-8', 'html_replace')
    ret = [
        "commit refs/heads/%s" % branch,
        "committer %s <%s> %d +0000" % (name, email, post_dict['timestamp']),
        "data %d" % len(commit_msg),
        commit_msg,
        "M 644 inline %s" % os.path.join(subdir, "%s.%s" % (stub, ext)),
        "data %d" % len(data),
        data,
    ]
    return '\n'.join(ret)


def main(name, email, data_dir, subdir, branch='master'):
    files = os.listdir(data_dir)
    tags = []
    for x in files:  # read tag (category) files
        if Tag.is_tag_file(x):
            tags.append(Tag(os.path.join(data_dir, x)))
    posts = []
    for x in files:
        if Tag.is_tag_file(x):
            continue
        if x.endswith('.db'):
            continue  # ignore master.db.  it just repeats tag info
        posts.append(parse_file(os.path.join(data_dir, x), tags))
    posts.sort(key=lambda x:x['timestamp'])
    for x in posts:
        print format_commit(x, name, email, subdir, branch)


if __name__ == "__main__":
    if len(sys.argv) not in (5, 6):
        print >>sys.stderr, "%s: usage: %s name email datadir subdir [branch] | git-fast-import " % (sys.argv[0], sys.argv[0])
    else:
        main(*sys.argv[1:])