Physics 201

#!/usr/bin/env python
#
# Copyright (C) 2009-2010, William Trevor King <wking@drexel.edu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Produce `RFC 4287` compliant Atom 1.0 XML from the command line.

.. _RFC 4287: http://tools.ietf.org/html/rfc4287
"""

from optparse import OptionParser
import sys
import time

from lxml import etree
from lxml import objectify
from lxml.html import XHTML_NAMESPACE

__version__ = '0.2'
GENERATOR_NAME = u'atomgen [based on lxml]'

ATOM_NAMESPACE = 'http://www.w3.org/2005/Atom'

ATOM = '{%s}' % ATOM_NAMESPACE
XHTML = '{%s}' % XHTML_NAMESPACE

NSMAP = {
    None : ATOM_NAMESPACE,
    'html': XHTML_NAMESPACE,
    }


def _id(url, time_published):
    """Convert a URL to an Atom ID

    Following Mark Pilgrim's suggestions_.
    >>> _id('http://example.com/blog#5', 0)
    u'tag:example.com,1970-01-01:/blog/5'

    Tags conform to RFC4151 tag syntax.  You're restricted to one post
    per day with a single url.

    .. _suggestions: http://diveintomark.org/archives/2004/05/28/howto-atom-id
    .. _tag syntax: http://tools.ietf.org/html/rfc4151#section-2.1
    """
    # Discard everything before the domain name
    start = u"http://"
    if url.startswith(start):
        url = url[len(start):]
    # Change all # characters to /
    url = url.replace(u'#', u'/')
    # Extract the domain name
    end_of_domain_index = url.find(u'/')
    if end_of_domain_index == -1:
        domain = url
        trailer = u''
    else:
        domain = url[0:end_of_domain_index]
        trailer = url[end_of_domain_index:]
    # Immediately after the domain name, insert a comma, year-month-date, colon
    time_string = time.strftime("%Y-%m-%d", time.gmtime(time_published))
    url = u"tag:%s,%s:%s" % (domain, time_string, trailer)
    return url

def _timestamp(seconds=None):
    """Return an `RFC 3339`_ timestamp.

    Complete date plus hours, minutes and seconds::

        YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30Z)

    Where the the trailing `Z` designates times in UTC.

    >>> _timestamp(0)
    u'1970-01-01T00:00:00Z'

    .. _RFC 3339: http://www.ietf.org/rfc/rfc3339.txt
    """
    utc = time.gmtime(seconds)
    string = time.strftime('%Y-%m-%dT%H:%M:%SZ', utc)
    return unicode(string)


class Command (object):
    """A command exposed via the command line."""
    name = None

    def run(self, argv):
        parser = self._get_parser()
        options,args = parser.parse_args(argv)
        return self._run(options, args)

    def _get_parser(self):
        raise NotImplementedError()
    
    def _run(self, options, args):
        raise NotImplementedError()


class NewFeedCommand (Command):
    """Create a new feed

    >>> c = NewFeedCommand()
    >>> feed = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King',
    ...     '--author-uri', 'http://www.physics.drexel.edu/~wking/',
    ...     '--author-email', 'wking@drexel.edu',
    ...     'http://www.physics.drexel.edu/~wking/phys201'])
    >>> print etree.tostring(feed, pretty_print=True, xml_declaration=True,
    ...     encoding='UTF-8')  # doctest: +ELLIPSIS, +REPORT_UDIFF
    <?xml version='1.0' encoding='UTF-8'?>
    <feed xmlns="http://www.w3.org/2005/Atom">
      <id>tag:www.physics.drexel.edu,...:/~wking/phys201</id>
      <title>Physics 201</title>
      <author>
        <name>W. Trevor King</name>
        <email>wking@drexel.edu</email>
        <uri>http://www.physics.drexel.edu/~wking/</uri>
      </author>
      <generator version="0.2">atomgen [based on lxml]</generator>
      <updated>...</updated>
    </feed>
    <BLANKLINE>
    """
    name = 'new'

    def _get_parser(self):
        usage = ['%prog [general-options] new [options] URI',
                 '',
                 'Where',
                 '  URI is a URI used to generate a unique ID for the feed']
        parser = OptionParser(usage='\n'.join(usage))
        parser.disable_interspersed_args()
        parser.add_option('-t', '--title', dest='title', metavar='TITLE',
                          help='Feed title')
        parser.add_option('-a', '--author', dest='author', metavar='NAME',
                          help='Feed author name')
        parser.add_option('-u', '--author-uri', dest='author_uri',
                          metavar='URI', help='Feed author homepage URI')
        parser.add_option('-e', '--author-email', dest='author_email',
                          metavar='EMAIL', help='Feed author email address')
        return parser

    def _run(self, options, args):
        uri = args[0]

        feed = objectify.Element(ATOM + 'feed', nsmap=NSMAP)

        tpub = time.time()
        etree.SubElement(feed, ATOM + 'id')
        feed.id = _id(uri, tpub)

        if options.title:
            etree.SubElement(feed, ATOM + 'title')
            feed.title = options.title

        if options.author or options.author_email or options.author_uri:
            etree.SubElement(feed, ATOM + 'author')
        if options.author:
            etree.SubElement(feed.author, ATOM + 'name')
            feed.author.name = options.author
        if options.author_email:
            etree.SubElement(feed.author, ATOM + 'email')
            feed.author.email = options.author_email
        if options.author_uri:
            etree.SubElement(feed.author, ATOM + 'uri')
            feed.author.uri = options.author_uri

        etree.SubElement(feed, ATOM + 'generator')
        feed.generator = GENERATOR_NAME
        feed.generator.attrib['version'] = __version__

        etree.SubElement(feed, ATOM + 'updated')
        feed.updated = _timestamp(tpub)

        # remove http://codespeak.net/lxml/objectify/pytype namespace
        objectify.deannotate(feed)
        etree.cleanup_namespaces(feed)

        return feed


class AddEntryCommand (Command):
    """Add an entry to an existing feed.

    >>> from os import close, remove
    >>> from StringIO import StringIO
    >>> from tempfile import mkstemp

    First, create a feed to edit.

    >>> c = NewFeedCommand()
    >>> feed = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King',
    ...     '--author-uri', 'http://www.physics.drexel.edu/~wking/',
    ...     '--author-email', 'wking@drexel.edu',
    ...     'http://www.physics.drexel.edu/~wking/phys201'])
    >>> fd,path = mkstemp(suffix='.atom', prefix='atomgen-')
    >>> close(fd)
    >>> root = etree.ElementTree(feed)
    >>> root.write(path)

    Now add an entry to that feed.

    >>> c = AddEntryCommand()
    >>> stdin = sys.stdin
    >>> sys.stdin = StringIO('Changes will be noted in this feed.')
    >>> feed = c.run(['--input', path, 'Feed purpose',
    ...     'http://www.physics.drexel.edu/~wking/phys201'])
    >>> sys.stdin = stdin
    >>> print etree.tostring(feed, pretty_print=True, xml_declaration=True,
    ...     encoding='UTF-8')  # doctest: +ELLIPSIS, +REPORT_UDIFF
    <?xml version='1.0' encoding='UTF-8'?>
    <feed xmlns="http://www.w3.org/2005/Atom">
      <id>tag:www.physics.drexel.edu,...:/~wking/phys201</id>
      <title>Physics 201</title>
      <author>
        <name>W. Trevor King</name>
        <email>wking@drexel.edu</email>
        <uri>http://www.physics.drexel.edu/~wking/</uri>
      </author>
      <generator version="0.2">atomgen [based on lxml]</generator>
      <updated>...</updated>
      <entry>
        <title>Feed purpose</title>
        <id>tag:www.physics.drexel.edu,...:/~wking/phys201</id>
        <link href="http://www.physics.drexel.edu/~wking/phys201"/>
        <published>...</published>
        <updated>...</updated>
        <content type="xhtml">
          <html:div xmlns:html="http://www.w3.org/1999/xhtml">Changes will be noted in this feed.</html:div>
        </content>
      </entry>
    </feed>
    <BLANKLINE>

    Note that we cannot move the html namespace declaration to the
    `<feed>` start tag until there is a way to update namespace maps
    on the fly.  See `lxml bug 555602`_.

    .. _lxml bug 555602: https://bugs.launchpad.net/lxml/+bug/555602

    Cleanup.

    >>> remove(path)
    """
    name = 'add'

    def _get_parser(self):
        usage = ['%prog [general-options] add [options] TITLE LINK',
                 '',
                 'Where',
                 '  TITLE is the title of the new entry',
                 '  LINK is the URI of that the entry refers to']
        parser = OptionParser(usage='\n'.join(usage))
        parser.disable_interspersed_args()
        parser.add_option('-i', '--input', dest='ifilename', metavar='FILE',
                          help=('Input file for generated feed '
                                '(defaults to stdin)'))
        parser.add_option('-c', '--content', dest='content', metavar='FILE',
                          help=('Input file for entry content '
                                '(defaults to stdin, unless input is stdin, '
                                'in which case this option is required.)'))
        return parser

    def _run(self, options, args):
        title = unicode(args[0])
        link = unicode(args[1])

        parser = objectify.makeparser()

        if options.ifilename == None:
            assert options.content != None, (
                'Need to use one of --input or --content')
            root = objectify.parse(sys.stdin, parser=parser)
        else:
            root = objectify.parse(options.ifilename, parser=parser)

        feed = root.getroot()

        if options.content == None:
            content = sys.stdin.read()
        else:
            content = file(options.content, 'r').read()

        entry = etree.SubElement(feed, ATOM + 'entry')
        etree.SubElement(entry, ATOM + 'title')
        entry.title = title

        tpub = time.time()
        etree.SubElement(entry, ATOM + 'id')
        entry.id = _id(link, tpub)

        etree.SubElement(entry, ATOM + 'link')
        entry.link.attrib['href'] = link

        etree.SubElement(entry, ATOM + 'published')
        entry.published = _timestamp(tpub)

        etree.SubElement(entry, ATOM + 'updated')
        entry.updated = _timestamp(tpub)

        etree.SubElement(entry, ATOM + 'content')
        entry.content.attrib['type'] = 'xhtml'
        etree.SubElement(entry.content, XHTML + 'div')
        entry.content[XHTML + 'div'] = content

        if not hasattr(feed, u'updated') :
            etree.SubElement(feed, ATOM + 'updated')
        feed.updated = _timestamp(tpub)

        # remove http://codespeak.net/lxml/objectify/pytype namespace
        objectify.deannotate(feed)
        etree.cleanup_namespaces(feed)

        return feed


def test():
    import doctest
    doctest.testmod()


if __name__ == "__main__" and True:
    commands = [NewFeedCommand(), AddEntryCommand()]
    command_dict = dict([(c.name, c) for c in commands])
    usage = ['%prog [options] command [command-options]',
             '',
             'Where command is one of']
    usage.extend(['  %s\t%s' % (c.name, c.__doc__.splitlines()[0])
                  for c in commands])

    parser = OptionParser(usage='\n'.join(usage))
    parser.disable_interspersed_args()
    parser.add_option('-o', '--output', dest='ofilename', metavar='FILE',
                      help='Output file for generated feed (defaults to stdout)')
    parser.add_option('--test', dest='test', action='store_true',
                      help='Run the module test suite')
    (options, args) = parser.parse_args()

    if options.test == True:
        test()
        sys.exit(0)

    command_name = args[0]
    command = command_dict[command_name]
    args = args[1:]
    feed = command.run(args)

    ostring = etree.tostring(
            feed, pretty_print=True, xml_declaration=True, encoding='UTF-8')
    if options.ofilename == None:
        print ostring,
    else:
        with file(options.ofilename, 'w') as of:
            of.write(ostring)