#!/usr/bin/env python """ Purpose: Nanoblogger-to-Ikiwiki import tool Copyright: Copyright (C) 2007 Chris Lamb Copyright (C) 2010 W. Trevor King This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Usage: run --help as an argument with this script. Notes: I added some extra bits to include the [[!tag tags/foo]] stuff in the post, as it wasn't before, at all. I'll diff the versions out so you can see the mess I made :). """ import os, sys import time import re from datetime import datetime import codecs, htmlentitydefs class Tag (object): def __init__(self, path): self.path = path lines = [x.strip() for x in open(path, 'r').readlines()] self.tag = lines[0] self.files = lines[1:] def is_tagged(self, path): return os.path.basename(path) in self.files @staticmethod def is_tag_file(filename): return filename.startswith('cat_') and filename.endswith('.db') def parse_file(path, possible_tags): lines = open(path, 'r').readlines() post_dict = {} while True: # parse header line = lines.pop(0) if line.startswith('-----'): break field,value = [x.strip() for x in line.split(':', 1)] post_dict[field.lower()] = value assert lines[0].startswith('BODY:'), lines[0] lines.pop(0) assert lines[-1].startswith('END-----'), lines[-1] lines.pop(-1) text = '\n'.join([unicode(x.rstrip(), 'utf-8') for x in lines]) post_dict['text'] = text post_dict['timestamp'] = time.mktime(time.strptime( post_dict['date'].replace('EST ', '').replace('EDT ', ''), '%c')) if 'EDT' in post_dict['date']: post_dict['timestamp'] += 4*60*60 elif 'EST' in post_dict['date']: post_dict['timestamp'] += 5*60*60 else: raise NotImplementedError('unknown time zone in %s' % post_dict['date']) post_dict['tags'] = [t.tag for t in possible_tags if t.is_tagged(path)] return post_dict def format_commit(post_dict, name, email, subdir, branch): stub = post_dict['title'].replace(' ', '_') if post_dict['format'].lower() == 'markdown': ext = 'mdwn' else: raise NotImplementedError('Unkown extension for %s' % post_dict['format']) commit_msg = '''Importing NanoBlogger post "%s"''' % (post_dict['title']) lines = [ '[[!meta title="%s"]]' % (post_dict['title'].replace('"', r"'")), '[[!meta date="%s"]]' % datetime.fromtimestamp(post_dict['timestamp']), post_dict['text']] if len(post_dict['tags']) > 0: lines.append('') for tag in post_dict['tags']: lines.append( '[[!tag tags/%s]]' % (tag.replace(' ', '-').replace('/', '-').lower())) lines.append('') data = '\n'.join(lines).encode('utf-8', 'html_replace') ret = [ "commit refs/heads/%s" % branch, "committer %s <%s> %d +0000" % (name, email, post_dict['timestamp']), "data %d" % len(commit_msg), commit_msg, "M 644 inline %s" % os.path.join(subdir, "%s.%s" % (stub, ext)), "data %d" % len(data), data, ] return '\n'.join(ret) def main(name, email, data_dir, subdir, branch='master'): files = os.listdir(data_dir) tags = [] for x in files: # read tag (category) files if Tag.is_tag_file(x): tags.append(Tag(os.path.join(data_dir, x))) posts = [] for x in files: if Tag.is_tag_file(x): continue if x.endswith('.db'): continue # ignore master.db. it just repeats tag info posts.append(parse_file(os.path.join(data_dir, x), tags)) posts.sort(key=lambda x:x['timestamp']) for x in posts: print format_commit(x, name, email, subdir, branch) if __name__ == "__main__": if len(sys.argv) not in (5, 6): print >>sys.stderr, "%s: usage: %s name email datadir subdir [branch] | git-fast-import " % (sys.argv[0], sys.argv[0]) else: main(*sys.argv[1:])