#!/usr/bin/env python
# Copyright (C) 2010  W. Trevor King <wking@drexel.edu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""Grab stock prices by ticker symbol.
"""

import logging
from StringIO import StringIO
import time
import urllib2

from lxml import etree


class Grabber (object):
    "Base ckass for website-specific quote scrapers."
    def __init__(self, url, sleep=1):
        self._url = url
        self._sleep_seconds = sleep

    def quote(self, ticker):
        "Floating point quote for the given `ticker` symbol string."
        url = self._get_url(ticker)
        logging.info('get quote for %s from %s using %s'
                     % (ticker, url, self))
        info,html = self._get_html(url)
        quote = self._parse_html(html)
        return quote

    def _get_url(self, ticker):
        "URL listing the quote for the given `ticker` symbol string."
        return self._url % ticker

    def _get_html(self, url):
        "Page info and html associated with the given `url`."
        f = urllib2.urlopen(url)
        info = f.info()
        html = f.read()
        f.close()
        time.sleep(self._sleep_seconds)
        return (info, html)

    def _parse_html(self, html):
        """Extract the floating point quote from the page's `html`.

        This method must be overriden by website-specific subclasses.
        """
        raise NotImplementedError()


class GoogleGrabber (Grabber):
    """Grab quotes from Google Finance.

    From Google's `Terms of Service`_:

      5.3 You agree not to access (or attempt to access) any of the
      Services by any means other than through the interface that is
      provided by Google, unless you have been specifically allowed to
      do so in a separate agreement with Google. You specifically
      agree not to access (or attempt to access) any of the Services
      through any automated means (including use of scripts or web
      crawlers) and shall ensure that you comply with the instructions
      set out in any robots.txt file present on the Services.

    However, I think the distinction between "browser", which Google
    clearly does allow, and "script run interactively from the command
    line" is pretty blurry.

    .. _Terms of Service: http://www.google.com/accounts/TOS?loc=us
    """
    def __init__(self):
        super(GoogleGrabber, self).__init__(
            url='http://www.google.com/finance?q=%s')

    def _parse_html(self, html):
        """Extract quote from a snippet that looks like::

            <span class="pr">
              <span id="ref_29312_l">
                64.77
              </span>
            </span>
        """
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(html), parser)
        root = tree.getroot()
        span = root.xpath(".//span[@class='pr']")[0]
        text = ''.join(span.itertext()).strip()
        return float(text)


class YahooGrabber (Grabber):
    """Grab quotes from Yahoo! Finance.

    Yahoo's `Terms of Service`_ don't seem to have any explicitly
    relevant terms.

    .. _Terms of Service:
      http://info.yahoo.com/legal/us/yahoo/utos/utos-173.html
    """
    def __init__(self):
        super(YahooGrabber, self).__init__(
            url='http://finance.yahoo.com/q?s=%s')

    def _parse_html(self, html):
        """Extract quote from a snippet that looks like::

            <tr>
              <th ...>Last Trade:</th>
              <td ...>
                <big>
                  <b>
                    <span ...>
                      64.74
                    </span>
                  </b>
                </big>
              </td>
            </tr>

        For the implementation, see the `LXML tutorial`_.

        .. _LXML tutorial:
          http://codespeak.net/lxml/tutorial.html#using-xpath-to-find-text
        """
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(html), parser)
        root = tree.getroot()
        rows = root.xpath('.//tr')  #[[td/text() = 'Last Trade:']")
        for row in rows:
            has_label = row.xpath(".//th/text() = 'Last Trade:'")
            if has_label:
                break
        assert has_label, '\n---\n\n'.join([
                etree.tostring(row,  pretty_print=True) for row in rows])
        data = row.xpath('.//td')[0]
        text = ''.join(data.itertext()).strip()
        return float(text)


GRABBERS = {}
# Create a dictionary of (name, grabber) pairs.  For example
#   GRABBERS['google'] = GoogleGrabber
for name,obj in locals().items():
    match = False
    try:
        if issubclass(obj, Grabber) and obj != Grabber:
            match = True
    except TypeError:
        pass
    if match:
        n = name[:-len('Grabber')].lower()
        GRABBERS[n] = obj
del name, obj, match


if __name__ == '__main__':
    from optparse import OptionParser

    p = OptionParser(usage='%prog [options] TICKER ...')
    p.disable_interspersed_args()
    p.add_option('-v', '--verbose', dest='verbose', default=0, action='count',
                 help='increment verbosity')
    grabbers = sorted(GRABBERS.keys())
    p.add_option('-g', '--grabber', dest='grabber', default='yahoo',
                 type='choice', choices=grabbers,
                 help='select grabber from %s (%%default)' % grabbers)

    options,args = p.parse_args()

    log_levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
    log_level = log_levels[min(options.verbose, len(log_levels)-1)]
    logging.basicConfig(level=log_level)

    g = GRABBERS[options.grabber]()
    print '\t'.join([str(g.quote(ticker)) for ticker in args])