#!/usr/bin/env python # Copyright (C) 2010 W. Trevor King # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Grab stock prices by ticker symbol. """ import logging from StringIO import StringIO import time import urllib2 from lxml import etree class Grabber (object): "Base ckass for website-specific quote scrapers." def __init__(self, url, sleep=1): self._url = url self._sleep_seconds = sleep def quote(self, ticker): "Floating point quote for the given `ticker` symbol string." url = self._get_url(ticker) logging.info('get quote for %s from %s using %s' % (ticker, url, self)) info,html = self._get_html(url) quote = self._parse_html(html) return quote def _get_url(self, ticker): "URL listing the quote for the given `ticker` symbol string." return self._url % ticker def _get_html(self, url): "Page info and html associated with the given `url`." f = urllib2.urlopen(url) info = f.info() html = f.read() f.close() time.sleep(self._sleep_seconds) return (info, html) def _parse_html(self, html): """Extract the floating point quote from the page's `html`. This method must be overriden by website-specific subclasses. """ raise NotImplementedError() class GoogleGrabber (Grabber): """Grab quotes from Google Finance. From Google's `Terms of Service`_: 5.3 You agree not to access (or attempt to access) any of the Services by any means other than through the interface that is provided by Google, unless you have been specifically allowed to do so in a separate agreement with Google. You specifically agree not to access (or attempt to access) any of the Services through any automated means (including use of scripts or web crawlers) and shall ensure that you comply with the instructions set out in any robots.txt file present on the Services. However, I think the distinction between "browser", which Google clearly does allow, and "script run interactively from the command line" is pretty blurry. .. _Terms of Service: http://www.google.com/accounts/TOS?loc=us """ def __init__(self): super(GoogleGrabber, self).__init__( url='http://www.google.com/finance?q=%s') def _parse_html(self, html): """Extract quote from a snippet that looks like:: 64.77 """ parser = etree.HTMLParser() tree = etree.parse(StringIO(html), parser) root = tree.getroot() span = root.xpath(".//span[@class='pr']")[0] text = ''.join(span.itertext()).strip() return float(text) class YahooGrabber (Grabber): """Grab quotes from Yahoo! Finance. Yahoo's `Terms of Service`_ don't seem to have any explicitly relevant terms. .. _Terms of Service: http://info.yahoo.com/legal/us/yahoo/utos/utos-173.html """ def __init__(self): super(YahooGrabber, self).__init__( url='http://finance.yahoo.com/q?s=%s') def _parse_html(self, html): """Extract quote from a snippet that looks like:: Last Trade: 64.74 For the implementation, see the `LXML tutorial`_. .. _LXML tutorial: http://codespeak.net/lxml/tutorial.html#using-xpath-to-find-text """ parser = etree.HTMLParser() tree = etree.parse(StringIO(html), parser) root = tree.getroot() rows = root.xpath('.//tr') #[[td/text() = 'Last Trade:']") for row in rows: has_label = row.xpath(".//th/text() = 'Last Trade:'") if has_label: break assert has_label, '\n---\n\n'.join([ etree.tostring(row, pretty_print=True) for row in rows]) data = row.xpath('.//td')[0] text = ''.join(data.itertext()).strip() return float(text) GRABBERS = {} # Create a dictionary of (name, grabber) pairs. For example # GRABBERS['google'] = GoogleGrabber for name,obj in locals().items(): match = False try: if issubclass(obj, Grabber) and obj != Grabber: match = True except TypeError: pass if match: n = name[:-len('Grabber')].lower() GRABBERS[n] = obj del name, obj, match if __name__ == '__main__': from optparse import OptionParser p = OptionParser(usage='%prog [options] TICKER ...') p.disable_interspersed_args() p.add_option('-v', '--verbose', dest='verbose', default=0, action='count', help='increment verbosity') grabbers = sorted(GRABBERS.keys()) p.add_option('-g', '--grabber', dest='grabber', default='yahoo', type='choice', choices=grabbers, help='select grabber from %s (%%default)' % grabbers) options,args = p.parse_args() log_levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG] log_level = log_levels[min(options.verbose, len(log_levels)-1)] logging.basicConfig(level=log_level) g = GRABBERS[options.grabber]() print '\t'.join([str(g.quote(ticker)) for ticker in args])