#!/usr/bin/env python # # Copyright (C) 2009-2010, William Trevor King # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Produce `RFC 4287` compliant Atom 1.0 XML from the command line. .. _RFC 4287: http://tools.ietf.org/html/rfc4287 """ from optparse import OptionParser import sys import time from lxml import etree from lxml import objectify from lxml.html import XHTML_NAMESPACE __version__ = '0.2' GENERATOR_NAME = u'atomgen [based on lxml]' ATOM_NAMESPACE = 'http://www.w3.org/2005/Atom' ATOM = '{%s}' % ATOM_NAMESPACE XHTML = '{%s}' % XHTML_NAMESPACE NSMAP = { None : ATOM_NAMESPACE, 'html': XHTML_NAMESPACE, } def _id(url, time_published): """Convert a URL to an Atom ID Following Mark Pilgrim's suggestions_. >>> _id('http://example.com/blog#5', 0) u'tag:example.com,1970-01-01:/blog/5' Tags conform to RFC4151 tag syntax. You're restricted to one post per day with a single url. .. _suggestions: http://diveintomark.org/archives/2004/05/28/howto-atom-id .. _tag syntax: http://tools.ietf.org/html/rfc4151#section-2.1 """ # Discard everything before the domain name start = u"http://" if url.startswith(start): url = url[len(start):] # Change all # characters to / url = url.replace(u'#', u'/') # Extract the domain name end_of_domain_index = url.find(u'/') if end_of_domain_index == -1: domain = url trailer = u'' else: domain = url[0:end_of_domain_index] trailer = url[end_of_domain_index:] # Immediately after the domain name, insert a comma, year-month-date, colon time_string = time.strftime("%Y-%m-%d", time.gmtime(time_published)) url = u"tag:%s,%s:%s" % (domain, time_string, trailer) return url def _timestamp(seconds=None): """Return an `RFC 3339`_ timestamp. Complete date plus hours, minutes and seconds:: YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30Z) Where the the trailing `Z` designates times in UTC. >>> _timestamp(0) u'1970-01-01T00:00:00Z' .. _RFC 3339: http://www.ietf.org/rfc/rfc3339.txt """ utc = time.gmtime(seconds) string = time.strftime('%Y-%m-%dT%H:%M:%SZ', utc) return unicode(string) class Command (object): """A command exposed via the command line.""" name = None def run(self, argv): parser = self._get_parser() options,args = parser.parse_args(argv) return self._run(options, args) def _get_parser(self): raise NotImplementedError() def _run(self, options, args): raise NotImplementedError() class NewFeedCommand (Command): """Create a new feed >>> c = NewFeedCommand() >>> feed = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King', ... '--author-uri', 'http://www.physics.drexel.edu/~wking/', ... '--author-email', 'wking@drexel.edu', ... 'http://www.physics.drexel.edu/~wking/phys201']) >>> print etree.tostring(feed, pretty_print=True, xml_declaration=True, ... encoding='UTF-8') # doctest: +ELLIPSIS, +REPORT_UDIFF tag:www.physics.drexel.edu,...:/~wking/phys201 Physics 201 W. Trevor King wking@drexel.edu http://www.physics.drexel.edu/~wking/ atomgen [based on lxml] ... """ name = 'new' def _get_parser(self): usage = ['%prog [general-options] new [options] URI', '', 'Where', ' URI is a URI used to generate a unique ID for the feed'] parser = OptionParser(usage='\n'.join(usage)) parser.disable_interspersed_args() parser.add_option('-t', '--title', dest='title', metavar='TITLE', help='Feed title') parser.add_option('-a', '--author', dest='author', metavar='NAME', help='Feed author name') parser.add_option('-u', '--author-uri', dest='author_uri', metavar='URI', help='Feed author homepage URI') parser.add_option('-e', '--author-email', dest='author_email', metavar='EMAIL', help='Feed author email address') return parser def _run(self, options, args): uri = args[0] feed = objectify.Element(ATOM + 'feed', nsmap=NSMAP) tpub = time.time() etree.SubElement(feed, ATOM + 'id') feed.id = _id(uri, tpub) if options.title: etree.SubElement(feed, ATOM + 'title') feed.title = options.title if options.author or options.author_email or options.author_uri: etree.SubElement(feed, ATOM + 'author') if options.author: etree.SubElement(feed.author, ATOM + 'name') feed.author.name = options.author if options.author_email: etree.SubElement(feed.author, ATOM + 'email') feed.author.email = options.author_email if options.author_uri: etree.SubElement(feed.author, ATOM + 'uri') feed.author.uri = options.author_uri etree.SubElement(feed, ATOM + 'generator') feed.generator = GENERATOR_NAME feed.generator.attrib['version'] = __version__ etree.SubElement(feed, ATOM + 'updated') feed.updated = _timestamp(tpub) # remove http://codespeak.net/lxml/objectify/pytype namespace objectify.deannotate(feed) etree.cleanup_namespaces(feed) return feed class AddEntryCommand (Command): """Add an entry to an existing feed. >>> from os import close, remove >>> from StringIO import StringIO >>> from tempfile import mkstemp First, create a feed to edit. >>> c = NewFeedCommand() >>> feed = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King', ... '--author-uri', 'http://www.physics.drexel.edu/~wking/', ... '--author-email', 'wking@drexel.edu', ... 'http://www.physics.drexel.edu/~wking/phys201']) >>> fd,path = mkstemp(suffix='.atom', prefix='atomgen-') >>> close(fd) >>> root = etree.ElementTree(feed) >>> root.write(path) Now add an entry to that feed. >>> c = AddEntryCommand() >>> stdin = sys.stdin >>> sys.stdin = StringIO('Changes will be noted in this feed.') >>> feed = c.run(['--input', path, 'Feed purpose', ... 'http://www.physics.drexel.edu/~wking/phys201']) >>> sys.stdin = stdin >>> print etree.tostring(feed, pretty_print=True, xml_declaration=True, ... encoding='UTF-8') # doctest: +ELLIPSIS, +REPORT_UDIFF tag:www.physics.drexel.edu,...:/~wking/phys201 Physics 201 W. Trevor King wking@drexel.edu http://www.physics.drexel.edu/~wking/ atomgen [based on lxml] ... Feed purpose tag:www.physics.drexel.edu,...:/~wking/phys201 ... ... Changes will be noted in this feed. Note that we cannot move the html namespace declaration to the `` start tag until there is a way to update namespace maps on the fly. See `lxml bug 555602`_. .. _lxml bug 555602: https://bugs.launchpad.net/lxml/+bug/555602 Cleanup. >>> remove(path) """ name = 'add' def _get_parser(self): usage = ['%prog [general-options] add [options] TITLE LINK', '', 'Where', ' TITLE is the title of the new entry', ' LINK is the URI of that the entry refers to'] parser = OptionParser(usage='\n'.join(usage)) parser.disable_interspersed_args() parser.add_option('-i', '--input', dest='ifilename', metavar='FILE', help=('Input file for generated feed ' '(defaults to stdin)')) parser.add_option('-c', '--content', dest='content', metavar='FILE', help=('Input file for entry content ' '(defaults to stdin, unless input is stdin, ' 'in which case this option is required.)')) return parser def _run(self, options, args): title = unicode(args[0]) link = unicode(args[1]) parser = objectify.makeparser() if options.ifilename == None: assert options.content != None, ( 'Need to use one of --input or --content') root = objectify.parse(sys.stdin, parser=parser) else: root = objectify.parse(options.ifilename, parser=parser) feed = root.getroot() if options.content == None: content = sys.stdin.read() else: content = file(options.content, 'r').read() entry = etree.SubElement(feed, ATOM + 'entry') etree.SubElement(entry, ATOM + 'title') entry.title = title tpub = time.time() etree.SubElement(entry, ATOM + 'id') entry.id = _id(link, tpub) etree.SubElement(entry, ATOM + 'link') entry.link.attrib['href'] = link etree.SubElement(entry, ATOM + 'published') entry.published = _timestamp(tpub) etree.SubElement(entry, ATOM + 'updated') entry.updated = _timestamp(tpub) etree.SubElement(entry, ATOM + 'content') entry.content.attrib['type'] = 'xhtml' etree.SubElement(entry.content, XHTML + 'div') entry.content[XHTML + 'div'] = content if not hasattr(feed, u'updated') : etree.SubElement(feed, ATOM + 'updated') feed.updated = _timestamp(tpub) # remove http://codespeak.net/lxml/objectify/pytype namespace objectify.deannotate(feed) etree.cleanup_namespaces(feed) return feed def test(): import doctest doctest.testmod() if __name__ == "__main__" and True: commands = [NewFeedCommand(), AddEntryCommand()] command_dict = dict([(c.name, c) for c in commands]) usage = ['%prog [options] command [command-options]', '', 'Where command is one of'] usage.extend([' %s\t%s' % (c.name, c.__doc__.splitlines()[0]) for c in commands]) parser = OptionParser(usage='\n'.join(usage)) parser.disable_interspersed_args() parser.add_option('-o', '--output', dest='ofilename', metavar='FILE', help='Output file for generated feed (defaults to stdout)') parser.add_option('--test', dest='test', action='store_true', help='Run the module test suite') (options, args) = parser.parse_args() if options.test == True: test() sys.exit(0) command_name = args[0] command = command_dict[command_name] args = args[1:] feed = command.run(args) ostring = etree.tostring( feed, pretty_print=True, xml_declaration=True, encoding='UTF-8') if options.ofilename == None: print ostring, else: with file(options.ofilename, 'w') as of: of.write(ostring)