"""
LOG.info('convert medline XML to BibTeX')
LOG.debug('convert from\n%s' % fetch_page)
p = Pipe(cmds=[['med2xml'], ['xml2bib', '-fc'], ['bibclean']],
stdin=fetch_page)
LOG.debug('converted to\n%s' % p.stdout)
return p.stdout
if __name__ == '__main__':
from optparse import OptionParser
usage_string = '\n'.join([
'',
' %prog [options] SEARCH_TERM'
' (print medline xml matching search)',
'| %prog -l [options] SEARCH_TERM'
' (print links to entries matching search)',
'| %prog -L [-f FILE] (list databases)',
'| %prog -X [-d DATABASE] [-F FIELD] [-f FILE]'
' (list fields in a database, or details on a single field)',
'',
'2008-2011, W. Trevor King.',
'',
'See the docstrings in %prog or',
' http://www.ncbi.nlm.nih.gov/books/NBK3837/',
' http://www.ncbi.nlm.nih.gov/entrez/query/static/'
'eutils_help.html',
' http://www.ncbi.nlm.nih.gov/entrez/query/static/'
'eutils_help.html#UserSystemRequirements',
' http://www.ncbi.nlm.nih.gov/corehtml/query/static/'
'einfo_help.html',
' http://www.ncbi.nlm.nih.gov/corehtml/query/static/'
'esearch_help.html',
' http://www.ncbi.nlm.nih.gov/corehtml/query/static/'
'efetch_help.html',
' http://www.ncbi.nlm.nih.gov/corehtml/query/static/'
'elink_help.html',
'for more details.'
])
parser = OptionParser(
usage=usage_string, version='%%prog %s' % __version__)
# Explaination by Jerry Stratton, http://www.hoboes.com/Mimsy/?ART=511
# "
# metavar is the name used in the help for that options required
# text, and dest is the name of the property you'll use to access
# the value of that option.
# "
parser.add_option('-d', '--database', dest='database',
help="Search DATABASE (default '%default')",
type='string', metavar='DATABASE', default='pubmed')
parser.add_option('-f', '--file', dest='filename',
help='write output to FILE (default stdout)',
type='string', metavar='FILE')
parser.add_option('-v', '--verbose', dest='verbose', action='count',
help=('Print minimal debugging information. Use twice '
'to get lots of debugging info.'),
default=0)
# mode control options
mode = 'search'
def set_mode(option, opt_str, value, parser):
global mode
long_option = option.get_opt_string()
if long_option == '--list-mode':
mode = 'list'
elif long_option == '--explain-mode':
mode = 'explain'
parser.add_option('-L', '--list-mode', callback=set_mode,
help='Run in list mode', action='callback')
parser.add_option('-X', '--explain-mode', callback=set_mode,
help='Run in explain mode', action='callback')
# search-fetch-xml-to-? options
output = 'bibtex'
def set_output(option, opt_str, value, parser):
global output
long_option = option.get_opt_string()
if long_option == '--output-xml':
output = 'medline'
if long_option == '--output-bibtex':
output = 'bibtex'
if long_option == '--output-link':
output = 'link'
parser.add_option('-x', '--output-xml', callback=set_output,
help='Output search results as Medline XML',
action='callback')
parser.add_option('-b', '--output-bibtex', callback=set_output,
help='Output search results as BibTeX',
action='callback')
parser.add_option('-F', '--field', dest='field',
help='Limit SEARCH_TERM to FIELD',
type='string', metavar='FIELD')
parser.add_option('-r', '--reldate', dest='reldate',
help='Limit search to dates within DAYS of today',
type='string', metavar='DAYS')
parser.add_option('--mindate', dest='mindate',
help=('Limit search to date after MINDATE '
"(e.g. '2001/1/1' or '2002')"),
type='string', metavar='MINDATE')
parser.add_option('--maxdate', dest='maxdate',
help=('Limit search to date after MAXDATE '
"(e.g. '2001/1/1' or '2002')"),
type='string', metavar='MAXDATE')
parser.add_option('-t', '--datetype', dest='datetype',
help=("Select field to apply date limits to "
"(e.g. 'edat' for Entrez date)"),
type='string', metavar='DATETYPE')
parser.add_option('-m', '--retmax', dest='retmax',
help=('Return at most RETMAX items from a successful '
'search (default %default)'),
type='int', metavar='RETMAX', default=20)
parser.add_option('-s', '--retstart', dest='retstart',
help=('Index of first returned search item from a '
'successful search (default %default)'),
type='int', metavar='RETSTART', default=0)
parser.add_option('-V', '--validate', dest='validate', action='store_true',
help=('Check that FIELD and field tags in SEARCH_TERM '
'are valid for DB'),
default=False)
# output link options
parser.add_option('-l', '--output-link', callback=set_output,
help='Output a link (instead of xml citations).',
action='callback')
parser.add_option('-c', '--link-cmd', dest='link_cmd',
help='Select link output',
type='string', metavar='LINK_CMD')
parser.add_option('-T', '--link-term', dest='link_term',
help='Limit links to those matching LINK_TERM',
type='string', metavar='LINK_TERM')
parser.add_option('-D', '--from-database', dest='dbfrom',
help='Limit links to those from FROMDATABASE)',
type='string', metavar='FROMDATABASE')
parser.add_option('-n', '--link-name', dest='linkname',
help='Limit links to a specific neighbor',
type='string', metavar='LINKNAME')
(options, args) = parser.parse_args()
parser.destroy()
# open the output file if specified
if options.filename == None:
outfile = _sys.stdout
else:
outfile = file(options.filename, 'w')
if options.verbose == 1:
LOG.setLevel(_logging.INFO)
elif options.verbose > 1:
LOG.setLevel(_logging.DEBUG)
LOG.debug('operating in %s mode' % mode)
if mode == 'list':
outfile.write('# available databases:\n')
LOG.info('run eInfo to get list of databases')
q = EUTILS_CLIENT.service.run_eInfo(tool=TOOL, email=EMAIL)
if hasattr(q, 'ERROR'):
raise Exception(q.ERROR)
for db in q.DbList.DbName:
outfile.write('%s\n' % db)
elif mode == 'explain':
LOG.info('run eInfo on %s' % options.database)
q = EUTILS_CLIENT.service.run_eInfo(
db=options.database, tool=TOOL, email=EMAIL)
if hasattr(q, 'ERROR'):
raise Exception(q.ERROR)
if options.field: # print specific info about this field
outfile.write(
'field %s in %s:\n' % (options.field, options.database))
fields = dict(
[(field.Name, field) for field in q.DbInfo.FieldList.Field])
field = fields[options.field]
attributes = sorted(
[(a, getattr(field, a)) for a in dir(field)
if not a.startswith('_')])
field_size = [0]
for attribute,value in attributes:
if len(attribute) > field_size[0]:
field_size[0] = len(attribute)
for attribute,value in attributes:
outfile.write(
'%*.*s\t%s\n'
% (field_size[0], field_size[0], attribute, value))
else: # print general info
outfile.write('database: %s\n' % q.DbInfo.DbName)
outfile.write('description: %s\n' % q.DbInfo.Description)
outfile.write('available fields:\n')
field_size = [0,0]
for field in q.DbInfo.FieldList.Field:
if len(field.Name) > field_size[0]:
field_size[0] = len(field.Name)
if len(field.FullName) > field_size[1]:
field_size[1] = len(field.FullName)
for field in q.DbInfo.FieldList.Field:
outfile.write(
'%*.*s\t%-*.*s\t%s\n'
% (field_size[0], field_size[0], field.Name,
field_size[1], field_size[1], field.FullName,
field.Description))
elif mode == 'search':
search_term = args[0]
LOG.debug('output %s' % output)
if options.mindate and not options.maxdate:
options.maxdate = _time.strftime('%Y/%M/%d')
LOG.info('fill in maximum date: %s' % options.maxdate)
elif options.maxdate and not options.mindate:
options.mindate = '0'
LOG.info('fill in minimum date: %s' % options.mindate)
LOG.info('run eEsearch on %s' % options.database)
q = EUTILS_CLIENT.service.run_eSearch(
db=options.database, term=search_term, tool=TOOL, email=EMAIL,
field=options.field, reldate=options.reldate,
mindate=options.mindate, maxdate=options.maxdate,
datetype=options.datetype,
RetStart=options.retstart, RetMax=options.retmax,
#sort=)
)
if hasattr(q, 'ERROR'):
raise Exception(q.ERROR)
if hasattr(q.IdList, 'Id'):
ret = int(len(q.IdList.Id))
else:
ret = 0
LOG.info('search returned %d of %d items' % (ret, int(q.Count)))
if ret > 0:
if output in ['medline', 'bibtex']:
e = None
try:
efetch_client = _Client(EFETCH_WSDL_URL % options.database)
except _TransportError, e:
if e.httpcode != 404:
raise
LOG.warn(str(e))
if e: # Fallback to straight URL fetch
params = {
'id': ','.join(q.IdList.Id),
'tool': TOOL,
'email': EMAIL,
'db': options.database,
'report': 'xml',
}
url = '%s?%s' % (
EFETCH_PLAIN_URL, _urllib.urlencode(params))
LOG.info('fallback to non-SOAP eFetch request: %s' % url)
f = _urllib.urlopen(url)
xml = f.read()
f.close()
# Remove wrapping HTML and unescape XML
#LOG.debug('raw data:\n%s' % xml)
xml = xml.split('', 1)[-1]
xml = xml.split('
', 1)[0]
xml = _unescape(xml, {'"': '"'})
#LOG.debug('xml data:\n%s' % xml)
if not xml.strip(): #
urls = [NCBI_PLAIN_URL % (options.database, id)
for id in q.IdList.Id]
LOG.warn(
'no meaningful output; try:\n%s' % '\n'.join(urls))
else: # Use SOAP eFetch
LOG.info('run eFetch on %s' % options.database)
f = efetch_client.service.run_eFetch(
id=','.join(q.IdList.Id), tool=TOOL, email=EMAIL)
if hasattr(f, 'ERROR'):
raise Exception(f.ERROR)
xml = efetch_client.last_received()
if output is None:
pass # we're bailing
elif output == 'medline':
outfile.write(str(xml).rstrip()+'\n')
elif output == 'bibtex':
outfile.write(medline_xml_to_bibtex(str(xml)))
elif output == 'link':
LOG.info('run eLink on %s' % options.database)
f = EUTILS_CLIENT.service.run_eLink(
db=options.database, id=','.join(q.IdList.Id),
#reldate=, mindate=, maxdate=, datetype=,
term=options.link_term, dbfrom=options.dbfrom,
linkname=options.linkname, cmd=options.link_cmd,
tool=TOOL, email=EMAIL)
outfile.write(str(EUTILS_CLIENT.last_received()).rstrip()+'\n')
else:
raise KeyError(output)
if options.filename != None:
outfile.close()