Update Doxylink to the latest development version.

This uses a completely new parsing system based on PyParsing (http://pyparsing.wikispaces.com/) but it should be faster and mush more resilient.
If this new dependency is deemed oer the top then PyParsing can probably be directly included in SVN as it is only 1 Python file.
This commit is contained in:
Matt Williams
2010-12-13 17:03:31 +00:00
parent 46c805759c
commit 755f5247d1
2 changed files with 382 additions and 102 deletions

View File

@ -1,61 +1,16 @@
# -*- coding: utf-8 -*-
"""
doxylink
~~~~~~~~
Sphinx extension to link to external Doxygen API documentation.
It works much like the extlinks extension but it does some more processing to link C++ symbols against their Doxygen HTML documentation.
When generating your Doxygen documentation, you need to instruct it to create a 'tag' file. This is an XML file which contains the mapping between symbols and HTML files. To make Doxygen create this file ensure that you have a line like::
GENERATE_TAGFILE = PolyVox.tag
in your ``Doxyfile``.
.. confval:: doxylink
The environment is set up with a dictionary mapping the interpereted text role
to a tuple of tag file and prefix:
.. code-block:: python
doxylink = {
'polyvox' : ('/home/matt/PolyVox.tag', '/home/matt/PolyVox/html/'),
'qtogre' : ('/home/matt/QtOgre.tag', '/home/matt/QtOgre/html/'),
}
This allows one to do:
.. code-block:: rst
:polyvox:`Array <PolyVox::Array>`.
:polyvox:`PolyVox::Volume`
:qtogre:`QtOgre::Log`
:polyvox:`tidyUpMemeory(int) <tidyUpMemory>`
:polyvox:`PolyVox::Array::operator[]`
:requires: Python 2.5
.. todo::
Make the extension atuomatically re-run if the tag file is altered on disc
Find a way to parse the tag file to a DOM tree *once* and then just reference it from then on.
Correct function overloading when arguments are given.
:copyright: Copyright 2010 by Matt Williams
:license: BSD, see LICENSE for details.
"""
from docutils import nodes, utils
import os
from sphinx.util.nodes import split_explicit_title
import xml.etree.ElementTree as ET
import urlparse
import re
import itertools
from docutils import nodes, utils
from sphinx.util.nodes import split_explicit_title
from sphinx.util.console import bold, standout
from parsing import normalise, ParseException
def find_url(doc, symbol):
"""
@ -110,31 +65,50 @@ def find_url(doc, symbol):
for compound in doc.findall('.//compound'):
if compound.find('name').text == namespace:
for member in compound.findall('member'):
# #If this compound object contains the matching member then return it
#If this compound object contains the matching member then return it
if member.find('name').text == endsymbol:
return {'file':member.find('anchorfile').text + '#' + member.find('anchor').text, 'kind':member.get('kind')}
return {'file':(member.findtext('anchorfile') or compound.findtext('filename')) + '#' + member.find('anchor').text, 'kind':member.get('kind')}
#Then we'll look at unqualified members
for member in doc.findall('.//member'):
if member.find('name').text == symbol:
return {'file':member.find('anchorfile').text + '#' + member.find('anchor').text, 'kind':member.get('kind')}
return {'file':(member.findtext('anchorfile') or compound.findtext('filename')) + '#' + member.find('anchor').text, 'kind':member.get('kind')}
return None
def parse_tag_file(doc):
"""
Takes in an XML free from a Doxygen tag file and returns a dictionary that looks something like:
Takes in an XML tree from a Doxygen tag file and returns a dictionary that looks something like:
.. code-block:: python
{'PolyVox': {'file': 'namespace_poly_vox.html', 'kind': 'namespace'},
'PolyVox::Array': {'file': 'class_poly_vox_1_1_array.html', 'kind': 'class'},
{'PolyVox': {'file': 'namespace_poly_vox.html',
'kind': 'namespace'},
'PolyVox::Array': {'file': 'class_poly_vox_1_1_array.html',
'kind': 'class'},
'PolyVox::Array1DDouble': {'file': 'namespace_poly_vox.html#a7a1f5fd5c4f7fbb4258a495d707b5c13',
'kind': 'typedef'},
'PolyVox::Array1DFloat': {'file': 'namespace_poly_vox.html#a879a120e49733eba1905c33f8a7f131b',
'kind': 'typedef'},
'PolyVox::Array1DInt16': {'file': 'namespace_poly_vox.html#aa1463ece448c6ebed55ab429d6ae3e43',
'kind': 'typedef'}}
'kind': 'typedef'},
'QScriptContext::throwError': {'arglist': {'( Error error, const QString & text )': 'qscriptcontext.html#throwError',
'( const QString & text )': 'qscriptcontext.html#throwError-2'},
'kind': 'function'},
'QScriptContext::toString': {'arglist': {'()': 'qscriptcontext.html#toString'},
'kind': 'function'}}
Note the different form for functions. This is required to allow for 'overloading by argument type'.
To access a filename for a symbol you do:
.. code-block:: python
symbol_mapping = mapping[symbol]
if symbol_mapping['kind'] == 'function':
url = symbol_mapping['arglist'][argument_string]
else:
url = symbol_mapping['file']
:Parameters:
doc : xml.etree.ElementTree
@ -142,22 +116,86 @@ def parse_tag_file(doc):
:return: a dictionary mapping fully qualified symbols to files
"""
mapping = {}
for compound in doc.findall(".//compound"):
if compound.get('kind') != 'namespace' and compound.get('kind') != 'class':
continue
function_list = [] #This is a list of function to be parsed and inserted into mapping at the end of the function.
for compound in doc.findall("./compound"):
compound_kind = compound.get('kind')
if compound_kind != 'namespace' and compound_kind != 'class':
continue #Skip everything that isn't a namespace or class
compound_name = compound.findtext('name')
compound_filename = compound.findtext('filename')
#If it's a compound we can simply add it
mapping[compound_name] = {'kind' : compound_kind, 'file' : compound_filename}
mapping[compound.findtext('name')] = {'kind' : compound.get('kind'), 'file' : compound.findtext('filename')}
for member in compound.findall('member'):
mapping[join(compound.findtext('name'), '::', member.findtext('name'))] = {'kind' : member.get('kind'), 'file' : join(member.findtext('anchorfile'),'#',member.findtext('anchor')), 'arglist' : member.findtext('arglist')}
#If the member doesn't have an <anchorfile> element, use the parent compounds <filename> instead
#This is the way it is in the qt.tag and is perhaps an artefact of old Doxygen
anchorfile = member.findtext('anchorfile') or compound_filename
member_symbol = join(compound_name, '::', member.findtext('name'))
member_kind = member.get('kind')
arglist_text = member.findtext('./arglist') #If it has an <arglist> then we assume it's a function. Empty <arglist> returns '', not None. Things like typedefs and enums can have empty arglists
if arglist_text and member_kind != 'variable' and member_kind != 'typedef' and member_kind != 'enumeration':
function_list.append((member_symbol, arglist_text, member_kind, join(anchorfile,'#',member.findtext('anchor'))))
else:
mapping[member_symbol] = {'kind' : member.get('kind'), 'file' : join(anchorfile,'#',member.findtext('anchor'))}
for old_tuple, normalised_tuple in zip(function_list, itertools.imap(normalise, (member_tuple[1] for member_tuple in function_list))):
member_symbol = old_tuple[0]
original_arglist = old_tuple[1]
kind = old_tuple[2]
anchor_link = old_tuple[3]
normalised_arglist = normalised_tuple[1]
if normalised_tuple[1] is not None: #This is a 'flag' for a ParseException having happened
if mapping.get(member_symbol):
mapping[member_symbol]['arglist'][normalised_arglist] = anchor_link
else:
mapping[member_symbol] = {'kind' : kind, 'arglist' : {normalised_arglist : anchor_link}}
else:
print 'Skipping %s %s%s. Error reported from parser was: %s' % (old_tuple[2], old_tuple[0], old_tuple[1], normalised_tuple[0])
#from pprint import pprint; pprint(mapping)
return mapping
def find_url2(mapping, symbol):
print "\n\nSearching for", symbol
"""
Return the URL for a given symbol.
This is where the magic happens.
.. todo::
Maybe print a list of all possible matches as a warning (but still only return the first)
:Parameters:
mapping : dictionary
A dictionary of the form returned by :py:func:`parse_tag_file`
symbol : string
The symbol to lookup in the file. E.g. something like 'PolyVox::Array' or 'tidyUpMemory'
:return: String representing the filename part of the URL
:raises:
LookupError
Raised if the symbol could not be matched in the file
"""
#print "\n\nSearching for", symbol
try:
symbol, normalised_arglist = normalise(symbol)
except ParseException as error:
raise LookupError(error)
#print symbol, normalised_arglist
#If we have an exact match then return it.
if mapping.get(symbol):
return mapping[symbol]
#print ('Exact match')
return return_from_mapping(mapping[symbol], normalised_arglist)
#If the user didn't pass in any arguments, i.e. `arguments == ''` then they don't care which version of the overloaded funtion they get.
#First we check for any mapping entries which even slightly match the requested symbol
#endswith_list = {}
@ -177,9 +215,9 @@ def find_url2(mapping, symbol):
#If there is only one match, return it.
if len(piecewise_list) is 1:
return piecewise_list.values()[0]
return return_from_mapping(piecewise_list.values()[0], normalised_arglist)
print("Still", len(piecewise_list), 'possible matches')
#print("Still", len(piecewise_list), 'possible matches')
#If there is more than one item in piecewise_list then there is an ambiguity
#Often this is due to the symbol matching the name of the constructor as well as the class name itself
@ -189,7 +227,7 @@ def find_url2(mapping, symbol):
if len(classes_list) is 1:
return classes_list.values()[0]
print("Still", len(classes_list), 'possible matches')
#print("Still", len(classes_list), 'possible matches')
#If we exhaused the list by requiring classes, use the list from before the filter.
if len(classes_list) == 0:
@ -198,28 +236,93 @@ def find_url2(mapping, symbol):
no_templates_list = find_url_remove_templates(classes_list, symbol)
if len(no_templates_list) is 1:
return no_templates_list.values()[0]
return return_from_mapping(no_templates_list.values()[0], normalised_arglist)
print("Still", len(no_templates_list), 'possible matches')
#print("Still", len(no_templates_list), 'possible matches')
#If not found by now, just return the first one in the list
if len(no_templates_list) != 0:
return no_templates_list.values()[0]
#TODO return a warning here?
return return_from_mapping(no_templates_list.values()[0], normalised_arglist)
#Else return None if the list is empty
else:
return None
LookupError('Could not find a match')
def return_from_mapping(mapping_entry, normalised_arglist=''):
"""
Return a mapping to a single URL in the form. This is needed since mapping entries for functions are more complicated due to function overriding.
If the mapping to be returned is not a function, this will simply return the mapping entry intact. If the entry is a function it will attempt to get the right version based on the function signature.
:Parameters:
mapping_entry : dict
should be a single entry from the large mapping file corresponding to a single symbol. If the symbol is a function, then ``mappingentry['arglist']`` will be a dictionary mapping normalised signatures to URLs
normalised_arglist : string
the normalised form of the arglist that the user has requested. This can be empty in which case the function will return just the first element of ``mappingentry['arglist']``. This parameter is ignored if ``mappingentry['kind'] != 'function'``
:return: dictionary something like:
.. code-block:: python
{'kind' : 'function', 'file' : 'something.html#foo'}
"""
#If it's a function we need to grab the right signature from the arglist.
if mapping_entry['kind'] == 'function':
#If the user has requested a specific function through specifying an arglist then get the right anchor
if normalised_arglist:
filename = mapping_entry['arglist'].get(normalised_arglist)
if not filename: #If we didn't get the filename because it's not in the mapping then we will just return a random one?
#TODO return a warning here!
filename = mapping_entry['arglist'].values()[0]
else:
#Otherwise just return the first entry (if they don't care they get whatever comes first)
filename = mapping_entry['arglist'].values()[0]
return {'kind' : 'function', 'file' : filename}
elif mapping_entry.get('arglist'):
#This arglist should only be one entry long and that entry should have '' as its key
return {'kind' : mapping_entry['kind'], 'file' : mapping_entry['arglist']['']}
#If it's not a function, then return it raw
return mapping_entry
def find_url_piecewise(mapping, symbol):
#Match the requested symbol reverse piecewise (split on '::') against the tag names to ensure they match exactly (modulo ambiguity)
#So, if in the mapping there is "PolyVox::Volume::FloatVolume" and "PolyVox::Volume" they would be split into:
# ['PolyVox', 'Volume', 'FloatVolume'] and ['PolyVox', 'Volume']
#and reversed:
# ['FloatVolume', 'Volume', 'PolyVox'] and ['Volume', 'PolyVox']
#and truncated to the shorter of the two:
# ['FloatVolume', 'Volume'] and ['Volume', 'PolyVox']
#If we're searching for the "PolyVox::Volume" symbol we would compare:
# ['Volume', 'PolyVox'] to ['FloatVolume', 'Volume', 'PolyVox']. That doesn't match so we look at the next in the mapping:
# ['Volume', 'PolyVox'] to ['Volume', 'PolyVox']. Good, so we add it to the list
"""
Match the requested symbol reverse piecewise (split on ``::``) against the tag names to ensure they match exactly (modulo ambiguity)
So, if in the mapping there is ``PolyVox::Volume::FloatVolume`` and ``PolyVox::Volume`` they would be split into:
.. code-block:: python
['PolyVox', 'Volume', 'FloatVolume'] and ['PolyVox', 'Volume']
and reversed:
.. code-block:: python
['FloatVolume', 'Volume', 'PolyVox'] and ['Volume', 'PolyVox']
and truncated to the shorter of the two:
.. code-block:: python
['FloatVolume', 'Volume'] and ['Volume', 'PolyVox']
If we're searching for the ``PolyVox::Volume`` symbol we would compare:
.. code-block:: python
['Volume', 'PolyVox'] to ['FloatVolume', 'Volume', 'PolyVox'].
That doesn't match so we look at the next in the mapping:
.. code-block:: python
['Volume', 'PolyVox'] to ['Volume', 'PolyVox'].
Good, so we add it to the list
"""
piecewise_list = {}
for item, data in mapping.items():
split_symbol = symbol.split('::')
@ -236,27 +339,27 @@ def find_url_piecewise(mapping, symbol):
#print split_symbol, split_item
if split_symbol == split_item:
print symbol + ' : ' + item
#print symbol + ' : ' + item
piecewise_list[item] = data
return piecewise_list
def find_url_classes(mapping, symbol):
#Prefer classes over names of constructors
"""Prefer classes over names of constructors"""
classes_list = {}
for item, data in mapping.items():
if data['kind'] == 'class':
print symbol + ' : ' + item
#print symbol + ' : ' + item
classes_list[item] = data
return classes_list
def find_url_remove_templates(mapping, symbol):
#Now, to disambiguate between "PolyVox::Array< 1, ElementType >::operator[]" and "PolyVox::Array::operator[]" matching "operator[]", we will ignore templated (as in C++ templates) tag names by removing names containing '<'
"""Now, to disambiguate between ``PolyVox::Array< 1, ElementType >::operator[]`` and ``PolyVox::Array::operator[]`` matching ``operator[]``, we will ignore templated (as in C++ templates) tag names by removing names containing ``<``"""
no_templates_list = {}
for item, data in mapping.items():
if '<' not in item:
print symbol + ' : ' + item
#print symbol + ' : ' + item
no_templates_list[item] = data
return no_templates_list
@ -271,42 +374,66 @@ def create_role(app, tag_filename, rootdir):
try:
tag_file = ET.parse(tag_filename)
mapping = parse_tag_file(tag_file)
except (IOError):
cache_name = os.path.basename(tag_filename)
app.info(bold('Checking tag file cache for %s: ' % cache_name), nonl=True)
if not hasattr(app.env, 'doxylink_cache'):
# no cache present at all, initialise it
app.info('No cache at all, rebuilding...')
mapping = parse_tag_file(tag_file)
app.env.doxylink_cache = { cache_name : {'mapping' : mapping, 'mtime' : os.path.getmtime(tag_filename)}}
elif not app.env.doxylink_cache.get(cache_name):
# Main cache is there but the specific sub-cache for this tag file is not
app.info('Sub cache is missing, rebuilding...')
mapping = parse_tag_file(tag_file)
app.env.doxylink_cache[cache_name] = {'mapping' : mapping, 'mtime' : os.path.getmtime(tag_filename)}
elif app.env.doxylink_cache[cache_name]['mtime'] < os.path.getmtime(tag_filename):
# tag file has been modified since sub-cache creation
app.info('Sub-cache is out of date, rebuilding...')
mapping = parse_tag_file(tag_file)
app.env.doxylink_cache[cache_name] = {'mapping' : mapping, 'mtime' : os.path.getmtime(tag_filename)}
else:
#The cache is up to date
app.info('Sub-cache is up-to-date')
except IOError:
tag_file = None
app.warn('Could not open tag file %s. Make sure your `doxylink` config variable is set correctly.' % tag_filename)
app.warn(standout('Could not open tag file %s. Make sure your `doxylink` config variable is set correctly.' % tag_filename))
def find_doxygen_link(name, rawtext, text, lineno, inliner, options={}, content=[]):
text = utils.unescape(text)
# from :name:`title <part>`
has_explicit_title, title, part = split_explicit_title(text)
warning_messages = []
if tag_file:
url = find_url(tag_file, part)
try:
url = find_url2(app.env.doxylink_cache[cache_name]['mapping'], part)
except LookupError as error:
warning_messages.append('Error while parsing `%s`. Is not a well-formed C++ function call or symbol. If this is not the case, it is a doxylink bug so please report it. Error reported was: %s' % (part, error))
if url:
#If it's an absolute path then the link will work regardless of the document directory
if os.path.isabs(rootdir):
#Also check if it is a URL (i.e. it has a 'scheme' like 'http' or 'file')
if os.path.isabs(rootdir) or urlparse.urlparse(rootdir).scheme:
full_url = join(rootdir, url['file'])
#But otherwise we need to add the relative path of the current document to the root source directory to the link
else:
relative_path_to_docsrc = os.path.relpath(app.env.srcdir, os.path.dirname(inliner.document.current_source))
full_url = join(relative_path_to_docsrc, os.sep, rootdir, url['file'])
full_url = join(relative_path_to_docsrc, '/', rootdir, url['file']) #We always use the '/' here rather than os.sep since this is a web link avoids problems like documentation/.\../library/doc/ (mixed slashes)
if url['kind'] == 'function' and app.config.add_function_parentheses:
if url['kind'] == 'function' and app.config.add_function_parentheses and not normalise(title)[1]:
title = join(title, '()')
pnode = nodes.reference(title, title, internal=False, refuri=full_url)
return [pnode], []
#By here, no match was found
env = app.env
env.warn(env.docname, 'Could not find match for `%s` in `%s` tag file' % (part, tag_filename), lineno)
warning_messages.append('Could not find match for `%s` in `%s` tag file' % (part, tag_filename))
else:
env = app.env
env.warn(env.docname, 'Could not find match for `%s` because tag file not found' % (part), lineno)
warning_messages.append('Could not find match for `%s` because tag file not found' % (part))
pnode = nodes.inline(rawsource=title, text=title)
return [pnode], []
return [pnode], [inliner.reporter.warning(message, line=lineno) for message in warning_messages]
return find_doxygen_link