Update Doxylink to the latest development version.

This uses a completely new parsing system based on PyParsing (http://pyparsing.wikispaces.com/) but it should be faster and mush more resilient. If this new dependency is deemed oer the top then PyParsing can probably be directly included in SVN as it is only 1 Python file.
2010-12-13 17:03:31 +00:00
parent 46c805759c
commit 755f5247d1
2 changed files with 382 additions and 102 deletions
--- a/documentation/_extensions/sphinxcontrib/doxylink.py
+++ b/documentation/_extensions/sphinxcontrib/doxylink.py
@@ -1,61 +1,16 @@
 # -*- coding: utf-8 -*-
-"""
-doxylink
-~~~~~~~~
-
-Sphinx extension to link to external Doxygen API documentation.
-
-It works much like the extlinks extension but it does some more processing to link C++ symbols against their Doxygen HTML documentation.
-
-When generating your Doxygen documentation, you need to instruct it to create a 'tag' file. This is an XML file which contains the mapping between symbols and HTML files. To make Doxygen create this file ensure that you have a line like::
-
-	GENERATE_TAGFILE = PolyVox.tag
-
-in your ``Doxyfile``.
-
-.. confval:: doxylink
-
-	The environment is set up with a dictionary mapping the interpereted text role
-	to a tuple of tag file and prefix:
-
-	.. code-block:: python
-
-		doxylink = {
-			'polyvox' : ('/home/matt/PolyVox.tag', '/home/matt/PolyVox/html/'),
-			'qtogre' : ('/home/matt/QtOgre.tag', '/home/matt/QtOgre/html/'),
-		}
-
-This allows one to do:
-
-.. code-block:: rst
-
-	:polyvox:`Array <PolyVox::Array>`.
-	:polyvox:`PolyVox::Volume`
-	:qtogre:`QtOgre::Log`
-	:polyvox:`tidyUpMemeory(int) <tidyUpMemory>`
-	:polyvox:`PolyVox::Array::operator[]`
-
-:requires: Python 2.5
-
-.. todo::
-	
-	Make the extension atuomatically re-run if the tag file is altered on disc
-	
-	Find a way to parse the tag file to a DOM tree *once* and then just reference it from then on.
-	
-	Correct function overloading when arguments are given.
-
-:copyright: Copyright 2010 by Matt Williams
-:license: BSD, see LICENSE for details.
-"""
-
-from docutils import nodes, utils

 import os
-
-from sphinx.util.nodes import split_explicit_title
-
 import xml.etree.ElementTree as ET
+import urlparse
+import re
+import itertools
+
+from docutils import nodes, utils
+from sphinx.util.nodes import split_explicit_title
+from sphinx.util.console import bold, standout
+
+from parsing import normalise, ParseException

 def find_url(doc, symbol):
 	"""
@@ -110,31 +65,50 @@ def find_url(doc, symbol):
 		for compound in doc.findall('.//compound'):
 			if compound.find('name').text == namespace:
 				for member in compound.findall('member'):
-#					#If this compound object contains the matching member then return it
+					#If this compound object contains the matching member then return it
 					if member.find('name').text == endsymbol:
-						return {'file':member.find('anchorfile').text + '#' + member.find('anchor').text, 'kind':member.get('kind')}
+						return {'file':(member.findtext('anchorfile') or compound.findtext('filename')) + '#' + member.find('anchor').text, 'kind':member.get('kind')}
 	
 	#Then we'll look at unqualified members
 	for member in doc.findall('.//member'):
 		if member.find('name').text == symbol:
-			return {'file':member.find('anchorfile').text + '#' + member.find('anchor').text, 'kind':member.get('kind')}
+			return {'file':(member.findtext('anchorfile') or compound.findtext('filename')) + '#' + member.find('anchor').text, 'kind':member.get('kind')}
 	
 	return None

 def parse_tag_file(doc):
 	"""
-	Takes in an XML free from a Doxygen tag file and returns a dictionary that looks something like:
+	Takes in an XML tree from a Doxygen tag file and returns a dictionary that looks something like:
 	
 	.. code-block:: python
 	
-		{'PolyVox': {'file': 'namespace_poly_vox.html', 'kind': 'namespace'},
-		 'PolyVox::Array': {'file': 'class_poly_vox_1_1_array.html', 'kind': 'class'},
+		{'PolyVox': {'file': 'namespace_poly_vox.html',
+		             'kind': 'namespace'},
+		 'PolyVox::Array': {'file': 'class_poly_vox_1_1_array.html',
+		                    'kind': 'class'},
 		 'PolyVox::Array1DDouble': {'file': 'namespace_poly_vox.html#a7a1f5fd5c4f7fbb4258a495d707b5c13',
 		                            'kind': 'typedef'},
 		 'PolyVox::Array1DFloat': {'file': 'namespace_poly_vox.html#a879a120e49733eba1905c33f8a7f131b',
 		                           'kind': 'typedef'},
 		 'PolyVox::Array1DInt16': {'file': 'namespace_poly_vox.html#aa1463ece448c6ebed55ab429d6ae3e43',
-		                           'kind': 'typedef'}}
+		                           'kind': 'typedef'},
+		 'QScriptContext::throwError': {'arglist': {'( Error error, const QString & text )': 'qscriptcontext.html#throwError',
+		                                            '( const QString & text )': 'qscriptcontext.html#throwError-2'},
+		                                'kind': 'function'},
+		 'QScriptContext::toString': {'arglist': {'()': 'qscriptcontext.html#toString'},
+		                              'kind': 'function'}}
+	
+	Note the different form for functions. This is required to allow for 'overloading by argument type'.
+	
+	To access a filename for a symbol you do:
+	
+	.. code-block:: python
+	
+		symbol_mapping = mapping[symbol]
+		if symbol_mapping['kind'] == 'function':
+			url = symbol_mapping['arglist'][argument_string]
+		else:
+			url = symbol_mapping['file']
 	
 	:Parameters:
 		doc : xml.etree.ElementTree
@@ -142,22 +116,86 @@ def parse_tag_file(doc):
 	
 	:return: a dictionary mapping fully qualified symbols to files
 	"""
+	
 	mapping = {}
-	for compound in doc.findall(".//compound"):
-		if compound.get('kind') != 'namespace' and compound.get('kind') != 'class':
-			continue
+	function_list = [] #This is a list of function to be parsed and inserted into mapping at the end of the function.
+	for compound in doc.findall("./compound"):
+		compound_kind = compound.get('kind')
+		if compound_kind != 'namespace' and compound_kind != 'class':
+			continue #Skip everything that isn't a namespace or class
+		
+		compound_name = compound.findtext('name')
+		compound_filename = compound.findtext('filename')
+		
+		#If it's a compound we can simply add it
+		mapping[compound_name] = {'kind' : compound_kind, 'file' : compound_filename}
 		
-		mapping[compound.findtext('name')] = {'kind' : compound.get('kind'), 'file' : compound.findtext('filename')}
 		for member in compound.findall('member'):
-			mapping[join(compound.findtext('name'), '::', member.findtext('name'))] = {'kind' : member.get('kind'), 'file' : join(member.findtext('anchorfile'),'#',member.findtext('anchor')), 'arglist' : member.findtext('arglist')}
+			
+			#If the member doesn't have an <anchorfile> element, use the parent compounds <filename> instead
+			#This is the way it is in the qt.tag and is perhaps an artefact of old Doxygen
+			anchorfile = member.findtext('anchorfile') or compound_filename
+			member_symbol = join(compound_name, '::', member.findtext('name'))
+			member_kind = member.get('kind')
+			arglist_text = member.findtext('./arglist') #If it has an <arglist> then we assume it's a function. Empty <arglist> returns '', not None. Things like typedefs and enums can have empty arglists
+			
+			if arglist_text and member_kind != 'variable' and member_kind != 'typedef' and member_kind != 'enumeration':
+				function_list.append((member_symbol, arglist_text, member_kind, join(anchorfile,'#',member.findtext('anchor'))))
+			else:
+				mapping[member_symbol] = {'kind' : member.get('kind'), 'file' : join(anchorfile,'#',member.findtext('anchor'))}
+	
+	for old_tuple, normalised_tuple in zip(function_list, itertools.imap(normalise, (member_tuple[1] for member_tuple in function_list))):
+		member_symbol = old_tuple[0]
+		original_arglist = old_tuple[1]
+		kind = old_tuple[2]
+		anchor_link = old_tuple[3]
+		normalised_arglist = normalised_tuple[1]
+		if normalised_tuple[1] is not None: #This is a 'flag' for a ParseException having happened
+			if mapping.get(member_symbol):
+				mapping[member_symbol]['arglist'][normalised_arglist] = anchor_link
+			else:
+				mapping[member_symbol] = {'kind' : kind, 'arglist' : {normalised_arglist : anchor_link}}
+		else:
+			print 'Skipping %s %s%s. Error reported from parser was: %s' % (old_tuple[2], old_tuple[0], old_tuple[1], normalised_tuple[0])
+	
+	#from pprint import pprint; pprint(mapping)
 	return mapping

 def find_url2(mapping, symbol):
-	print "\n\nSearching for", symbol
+	"""
+	Return the URL for a given symbol.
+	
+	This is where the magic happens.
+	
+	.. todo::
+		
+		Maybe print a list of all possible matches as a warning (but still only return the first)
+	
+	:Parameters:
+		mapping : dictionary
+			A dictionary of the form returned by :py:func:`parse_tag_file`
+		symbol : string
+			The symbol to lookup in the file. E.g. something like 'PolyVox::Array' or 'tidyUpMemory'
+	
+	:return: String representing the filename part of the URL
+	
+	:raises:
+		LookupError
+			Raised if the symbol could not be matched in the file
+	"""
+	#print "\n\nSearching for", symbol
+	try:
+		symbol, normalised_arglist =  normalise(symbol)
+	except ParseException as error:
+		raise LookupError(error)
+	#print symbol, normalised_arglist
 	
 	#If we have an exact match then return it.
 	if mapping.get(symbol):
-		return mapping[symbol]
+		#print ('Exact match')
+		return return_from_mapping(mapping[symbol], normalised_arglist)
+	
+	#If the user didn't pass in any arguments, i.e. `arguments == ''` then they don't care which version of the overloaded funtion they get.
 	
 	#First we check for any mapping entries which even slightly match the requested symbol
 	#endswith_list = {}
@@ -177,9 +215,9 @@ def find_url2(mapping, symbol):
 	
 	#If there is only one match, return it.
 	if len(piecewise_list) is 1:
-		return piecewise_list.values()[0]
+		return return_from_mapping(piecewise_list.values()[0], normalised_arglist)
 	
-	print("Still", len(piecewise_list), 'possible matches')
+	#print("Still", len(piecewise_list), 'possible matches')
 	
 	#If there is more than one item in piecewise_list then there is an ambiguity
 	#Often this is due to the symbol matching the name of the constructor as well as the class name itself
@@ -189,7 +227,7 @@ def find_url2(mapping, symbol):
 	if len(classes_list) is 1:
 		return classes_list.values()[0]
 	
-	print("Still", len(classes_list), 'possible matches')
+	#print("Still", len(classes_list), 'possible matches')
 	
 	#If we exhaused the list by requiring classes, use the list from before the filter.
 	if len(classes_list) == 0:
@@ -198,28 +236,93 @@ def find_url2(mapping, symbol):
 	no_templates_list = find_url_remove_templates(classes_list, symbol)
 	
 	if len(no_templates_list) is 1:
-		return no_templates_list.values()[0]
+		return return_from_mapping(no_templates_list.values()[0], normalised_arglist)
 	
-	print("Still", len(no_templates_list), 'possible matches')
+	#print("Still", len(no_templates_list), 'possible matches')
 	
 	#If not found by now, just return the first one in the list
 	if len(no_templates_list) != 0:
-		return no_templates_list.values()[0]
+		#TODO return a warning here?
+		return return_from_mapping(no_templates_list.values()[0], normalised_arglist)
 	#Else return None if the list is empty
 	else:
-		return None
+		LookupError('Could not find a match')
+
+def return_from_mapping(mapping_entry, normalised_arglist=''):
+	"""
+	Return a mapping to a single URL in the form. This is needed since mapping entries for functions are more complicated due to function overriding.
+	
+	If the mapping to be returned is not a function, this will simply return the mapping entry intact. If the entry is a function it will attempt to get the right version based on the function signature.
+	
+	:Parameters:
+		mapping_entry : dict
+			should be a single entry from the large mapping file corresponding to a single symbol. If the symbol is a function, then ``mappingentry['arglist']`` will be a dictionary mapping normalised signatures to URLs
+		normalised_arglist : string
+			the normalised form of the arglist that the user has requested. This can be empty in which case the function will return just the first element of ``mappingentry['arglist']``. This parameter is ignored if ``mappingentry['kind'] != 'function'``
+	
+	:return: dictionary something like:
+	
+		.. code-block:: python
+		
+			{'kind' : 'function', 'file' : 'something.html#foo'}
+	
+	"""
+	#If it's a function we need to grab the right signature from the arglist.
+	if mapping_entry['kind'] == 'function':
+		#If the user has requested a specific function through specifying an arglist then get the right anchor
+		if normalised_arglist:
+			filename = mapping_entry['arglist'].get(normalised_arglist)
+			if not filename: #If we didn't get the filename because it's not in the mapping then we will just return a random one?
+				#TODO return a warning here!
+				filename = mapping_entry['arglist'].values()[0]
+		else:
+			#Otherwise just return the first entry (if they don't care they get whatever comes first)
+			filename = mapping_entry['arglist'].values()[0]
+		
+		return {'kind' : 'function', 'file' : filename}
+	elif mapping_entry.get('arglist'):
+		#This arglist should only be one entry long and that entry should have '' as its key
+		return {'kind' : mapping_entry['kind'], 'file' : mapping_entry['arglist']['']}
+	
+	#If it's not a function, then return it raw
+	return mapping_entry

 def find_url_piecewise(mapping, symbol):
-	#Match the requested symbol reverse piecewise (split on '::') against the tag names to ensure they match exactly (modulo ambiguity)
-	#So, if in the mapping there is "PolyVox::Volume::FloatVolume" and "PolyVox::Volume" they would be split into:
-	#    ['PolyVox', 'Volume', 'FloatVolume'] and ['PolyVox', 'Volume']
-	#and reversed:
-	#    ['FloatVolume', 'Volume', 'PolyVox'] and ['Volume', 'PolyVox']
-	#and truncated to the shorter of the two:
-	#    ['FloatVolume', 'Volume'] and ['Volume', 'PolyVox']
-	#If we're searching for the "PolyVox::Volume" symbol we would compare:
-	#    ['Volume', 'PolyVox'] to ['FloatVolume', 'Volume', 'PolyVox']. That doesn't match so we look at the next in the mapping:
-	#    ['Volume', 'PolyVox'] to ['Volume', 'PolyVox']. Good, so we add it to the list
+	"""
+	Match the requested symbol reverse piecewise (split on ``::``) against the tag names to ensure they match exactly (modulo ambiguity)
+	So, if in the mapping there is ``PolyVox::Volume::FloatVolume`` and ``PolyVox::Volume`` they would be split into:
+	
+	.. code-block:: python
+	
+		['PolyVox', 'Volume', 'FloatVolume'] and ['PolyVox', 'Volume']
+	
+	and reversed:
+	
+	.. code-block:: python
+	
+		['FloatVolume', 'Volume', 'PolyVox'] and ['Volume', 'PolyVox']
+	
+	and truncated to the shorter of the two:
+	
+	.. code-block:: python
+	
+		['FloatVolume', 'Volume'] and ['Volume', 'PolyVox']
+	
+	If we're searching for the ``PolyVox::Volume`` symbol we would compare:
+	
+	.. code-block:: python
+	
+		['Volume', 'PolyVox'] to ['FloatVolume', 'Volume', 'PolyVox'].
+	
+	That doesn't match so we look at the next in the mapping:
+	
+	.. code-block:: python
+	
+		['Volume', 'PolyVox'] to ['Volume', 'PolyVox'].
+	
+	Good, so we add it to the list
+	
+	"""
 	piecewise_list = {}
 	for item, data in mapping.items():
 		split_symbol = symbol.split('::')
@@ -236,27 +339,27 @@ def find_url_piecewise(mapping, symbol):
 		#print split_symbol, split_item
 		
 		if split_symbol == split_item:
-			print symbol + ' : ' + item
+			#print symbol + ' : ' + item
 			piecewise_list[item] = data
 	
 	return piecewise_list

 def find_url_classes(mapping, symbol):
-	#Prefer classes over names of constructors
+	"""Prefer classes over names of constructors"""
 	classes_list = {}
 	for item, data in mapping.items():
 		if data['kind'] == 'class':
-			print symbol + ' : ' + item
+			#print symbol + ' : ' + item
 			classes_list[item] = data
 	
 	return classes_list

 def find_url_remove_templates(mapping, symbol):
-	#Now, to disambiguate between "PolyVox::Array< 1, ElementType >::operator[]" and "PolyVox::Array::operator[]" matching "operator[]", we will ignore templated (as in C++ templates) tag names by removing names containing '<'
+	"""Now, to disambiguate between ``PolyVox::Array< 1, ElementType >::operator[]`` and ``PolyVox::Array::operator[]`` matching ``operator[]``, we will ignore templated (as in C++ templates) tag names by removing names containing ``<``"""
 	no_templates_list = {}
 	for item, data in mapping.items():
 		if '<' not in item:
-			print symbol + ' : ' + item
+			#print symbol + ' : ' + item
 			no_templates_list[item] = data
 	
 	return no_templates_list
@@ -271,42 +374,66 @@ def create_role(app, tag_filename, rootdir):
 	
 	try:
 		tag_file = ET.parse(tag_filename)
-		mapping = parse_tag_file(tag_file)
-	except (IOError):
+		
+		cache_name = os.path.basename(tag_filename)
+		
+		app.info(bold('Checking tag file cache for %s: ' % cache_name), nonl=True)
+		if not hasattr(app.env, 'doxylink_cache'):
+			# no cache present at all, initialise it
+			app.info('No cache at all, rebuilding...')
+			mapping = parse_tag_file(tag_file)
+			app.env.doxylink_cache = { cache_name : {'mapping' : mapping, 'mtime' : os.path.getmtime(tag_filename)}}
+		elif not app.env.doxylink_cache.get(cache_name):
+			# Main cache is there but the specific sub-cache for this tag file is not
+			app.info('Sub cache is missing, rebuilding...')
+			mapping = parse_tag_file(tag_file)
+			app.env.doxylink_cache[cache_name] = {'mapping' : mapping, 'mtime' : os.path.getmtime(tag_filename)}
+		elif app.env.doxylink_cache[cache_name]['mtime'] < os.path.getmtime(tag_filename):
+			# tag file has been modified since sub-cache creation
+			app.info('Sub-cache is out of date, rebuilding...')
+			mapping = parse_tag_file(tag_file)
+			app.env.doxylink_cache[cache_name] = {'mapping' : mapping, 'mtime' : os.path.getmtime(tag_filename)}
+		else:
+			#The cache is up to date
+			app.info('Sub-cache is up-to-date')
+	except IOError:
 		tag_file = None
-		app.warn('Could not open tag file %s. Make sure your `doxylink` config variable is set correctly.' % tag_filename)
+		app.warn(standout('Could not open tag file %s. Make sure your `doxylink` config variable is set correctly.' % tag_filename))
 	
 	def find_doxygen_link(name, rawtext, text, lineno, inliner, options={}, content=[]):
 		text = utils.unescape(text)
 		# from :name:`title <part>`
 		has_explicit_title, title, part = split_explicit_title(text)
-		
+		warning_messages = []
 		if tag_file:
 			url = find_url(tag_file, part)
+			try:
+				url = find_url2(app.env.doxylink_cache[cache_name]['mapping'], part)
+			except LookupError as error:
+				warning_messages.append('Error while parsing `%s`. Is not a well-formed C++ function call or symbol. If this is not the case, it is a doxylink bug so please report it. Error reported was: %s' % (part, error))
 			if url:
 				
 				#If it's an absolute path then the link will work regardless of the document directory
-				if os.path.isabs(rootdir):
+				#Also check if it is a URL (i.e. it has a 'scheme' like 'http' or 'file')
+				if os.path.isabs(rootdir) or urlparse.urlparse(rootdir).scheme:
 					full_url = join(rootdir, url['file'])
 				#But otherwise we need to add the relative path of the current document to the root source directory to the link
 				else:
 					relative_path_to_docsrc = os.path.relpath(app.env.srcdir, os.path.dirname(inliner.document.current_source))
-					full_url = join(relative_path_to_docsrc, os.sep, rootdir, url['file'])
+					full_url = join(relative_path_to_docsrc, '/', rootdir, url['file']) #We always use the '/' here rather than os.sep since this is a web link avoids problems like documentation/.\../library/doc/ (mixed slashes)
 				
-				if url['kind'] == 'function' and app.config.add_function_parentheses:
+				if url['kind'] == 'function' and app.config.add_function_parentheses and not normalise(title)[1]:
 					title = join(title, '()')
 				
 				pnode = nodes.reference(title, title, internal=False, refuri=full_url)
 				return [pnode], []
 			#By here, no match was found
-			env = app.env
-			env.warn(env.docname, 'Could not find match for `%s` in `%s` tag file' % (part, tag_filename), lineno)
+			warning_messages.append('Could not find match for `%s` in `%s` tag file' % (part, tag_filename))
 		else:
-			env = app.env
-			env.warn(env.docname, 'Could not find match for `%s` because tag file not found' % (part), lineno)
+			warning_messages.append('Could not find match for `%s` because tag file not found' % (part))
 		
 		pnode = nodes.inline(rawsource=title, text=title)
-		return [pnode], []
+		return [pnode], [inliner.reporter.warning(message, line=lineno) for message in warning_messages]
 	
 	return find_doxygen_link