Update Doxylink to the latest development version.

This uses a completely new parsing system based on PyParsing (http://pyparsing.wikispaces.com/) but it should be faster and mush more resilient. If this new dependency is deemed oer the top then PyParsing can probably be directly included in SVN as it is only 1 Python file.
2010-12-13 17:03:31 +00:00 · 2010-12-13 17:03:31 +00:00 · 755f5247d1
commit 755f5247d1
parent 46c805759c
2 changed files with 382 additions and 102 deletions
--- a/documentation/_extensions/sphinxcontrib/doxylink.py
+++ b/documentation/_extensions/sphinxcontrib/doxylink.py
@ -1,61 +1,16 @@
 # -*- coding: utf-8 -*-
-"""
-doxylink
-~~~~~~~~
-
-Sphinx extension to link to external Doxygen API documentation.
-
-It works much like the extlinks extension but it does some more processing to link C++ symbols against their Doxygen HTML documentation.
-
-When generating your Doxygen documentation, you need to instruct it to create a 'tag' file. This is an XML file which contains the mapping between symbols and HTML files. To make Doxygen create this file ensure that you have a line like::
-
-	GENERATE_TAGFILE = PolyVox.tag
-
-in your ``Doxyfile``.
-
-.. confval:: doxylink
-
-	The environment is set up with a dictionary mapping the interpereted text role
-	to a tuple of tag file and prefix:
-
-	.. code-block:: python
-
-		doxylink = {
-			'polyvox' : ('/home/matt/PolyVox.tag', '/home/matt/PolyVox/html/'),
-			'qtogre' : ('/home/matt/QtOgre.tag', '/home/matt/QtOgre/html/'),
-		}
-
-This allows one to do:
-
-.. code-block:: rst
-
-	:polyvox:`Array <PolyVox::Array>`.
-	:polyvox:`PolyVox::Volume`
-	:qtogre:`QtOgre::Log`
-	:polyvox:`tidyUpMemeory(int) <tidyUpMemory>`
-	:polyvox:`PolyVox::Array::operator[]`
-
-:requires: Python 2.5
-
-.. todo::
-	
-	Make the extension atuomatically re-run if the tag file is altered on disc
-	
-	Find a way to parse the tag file to a DOM tree *once* and then just reference it from then on.
-	
-	Correct function overloading when arguments are given.
-
-:copyright: Copyright 2010 by Matt Williams
-:license: BSD, see LICENSE for details.
-"""
-
-from docutils import nodes, utils

 import os
-
-from sphinx.util.nodes import split_explicit_title
-
 import xml.etree.ElementTree as ET
+import urlparse
+import re
+import itertools
+
+from docutils import nodes, utils
+from sphinx.util.nodes import split_explicit_title
+from sphinx.util.console import bold, standout
+
+from parsing import normalise, ParseException

 def find_url(doc, symbol):
 	"""
@ -110,31 +65,50 @@ def find_url(doc, symbol):
 		for compound in doc.findall('.//compound'):
 			if compound.find('name').text == namespace:
 				for member in compound.findall('member'):
-#					#If this compound object contains the matching member then return it
+					#If this compound object contains the matching member then return it
 					if member.find('name').text == endsymbol:
-						return {'file':member.find('anchorfile').text + '#' + member.find('anchor').text, 'kind':member.get('kind')}
+						return {'file':(member.findtext('anchorfile') or compound.findtext('filename')) + '#' + member.find('anchor').text, 'kind':member.get('kind')}
 	
 	#Then we'll look at unqualified members
 	for member in doc.findall('.//member'):
 		if member.find('name').text == symbol:
-			return {'file':member.find('anchorfile').text + '#' + member.find('anchor').text, 'kind':member.get('kind')}
+			return {'file':(member.findtext('anchorfile') or compound.findtext('filename')) + '#' + member.find('anchor').text, 'kind':member.get('kind')}
 	
 	return None

 def parse_tag_file(doc):
 	"""
-	Takes in an XML free from a Doxygen tag file and returns a dictionary that looks something like:
+	Takes in an XML tree from a Doxygen tag file and returns a dictionary that looks something like:
 	
 	.. code-block:: python
 	
-		{'PolyVox': {'file': 'namespace_poly_vox.html', 'kind': 'namespace'},
-		 'PolyVox::Array': {'file': 'class_poly_vox_1_1_array.html', 'kind': 'class'},
+		{'PolyVox': {'file': 'namespace_poly_vox.html',
+		             'kind': 'namespace'},
+		 'PolyVox::Array': {'file': 'class_poly_vox_1_1_array.html',
+		                    'kind': 'class'},
 		 'PolyVox::Array1DDouble': {'file': 'namespace_poly_vox.html#a7a1f5fd5c4f7fbb4258a495d707b5c13',
 		                            'kind': 'typedef'},
 		 'PolyVox::Array1DFloat': {'file': 'namespace_poly_vox.html#a879a120e49733eba1905c33f8a7f131b',
 		                           'kind': 'typedef'},
 		 'PolyVox::Array1DInt16': {'file': 'namespace_poly_vox.html#aa1463ece448c6ebed55ab429d6ae3e43',
-		                           'kind': 'typedef'}}
+		                           'kind': 'typedef'},
+		 'QScriptContext::throwError': {'arglist': {'( Error error, const QString & text )': 'qscriptcontext.html#throwError',
+		                                            '( const QString & text )': 'qscriptcontext.html#throwError-2'},
+		                                'kind': 'function'},
+		 'QScriptContext::toString': {'arglist': {'()': 'qscriptcontext.html#toString'},
+		                              'kind': 'function'}}
+	
+	Note the different form for functions. This is required to allow for 'overloading by argument type'.
+	
+	To access a filename for a symbol you do:
+	
+	.. code-block:: python
+	
+		symbol_mapping = mapping[symbol]
+		if symbol_mapping['kind'] == 'function':
+			url = symbol_mapping['arglist'][argument_string]
+		else:
+			url = symbol_mapping['file']
 	
 	:Parameters:
 		doc : xml.etree.ElementTree
@ -142,22 +116,86 @@ def parse_tag_file(doc):
 	
 	:return: a dictionary mapping fully qualified symbols to files
 	"""
+	
 	mapping = {}
-	for compound in doc.findall(".//compound"):
-		if compound.get('kind') != 'namespace' and compound.get('kind') != 'class':
-			continue
+	function_list = [] #This is a list of function to be parsed and inserted into mapping at the end of the function.
+	for compound in doc.findall("./compound"):
+		compound_kind = compound.get('kind')
+		if compound_kind != 'namespace' and compound_kind != 'class':
+			continue #Skip everything that isn't a namespace or class
+		
+		compound_name = compound.findtext('name')
+		compound_filename = compound.findtext('filename')
+		
+		#If it's a compound we can simply add it
+		mapping[compound_name] = {'kind' : compound_kind, 'file' : compound_filename}
 		
-		mapping[compound.findtext('name')] = {'kind' : compound.get('kind'), 'file' : compound.findtext('filename')}
 		for member in compound.findall('member'):
-			mapping[join(compound.findtext('name'), '::', member.findtext('name'))] = {'kind' : member.get('kind'), 'file' : join(member.findtext('anchorfile'),'#',member.findtext('anchor')), 'arglist' : member.findtext('arglist')}
+			
+			#If the member doesn't have an <anchorfile> element, use the parent compounds <filename> instead
+			#This is the way it is in the qt.tag and is perhaps an artefact of old Doxygen
+			anchorfile = member.findtext('anchorfile') or compound_filename
+			member_symbol = join(compound_name, '::', member.findtext('name'))
+			member_kind = member.get('kind')
+			arglist_text = member.findtext('./arglist') #If it has an <arglist> then we assume it's a function. Empty <arglist> returns '', not None. Things like typedefs and enums can have empty arglists
+			
+			if arglist_text and member_kind != 'variable' and member_kind != 'typedef' and member_kind != 'enumeration':
+				function_list.append((member_symbol, arglist_text, member_kind, join(anchorfile,'#',member.findtext('anchor'))))
+			else:
+				mapping[member_symbol] = {'kind' : member.get('kind'), 'file' : join(anchorfile,'#',member.findtext('anchor'))}
+	
+	for old_tuple, normalised_tuple in zip(function_list, itertools.imap(normalise, (member_tuple[1] for member_tuple in function_list))):
+		member_symbol = old_tuple[0]
+		original_arglist = old_tuple[1]
+		kind = old_tuple[2]
+		anchor_link = old_tuple[3]
+		normalised_arglist = normalised_tuple[1]
+		if normalised_tuple[1] is not None: #This is a 'flag' for a ParseException having happened
+			if mapping.get(member_symbol):
+				mapping[member_symbol]['arglist'][normalised_arglist] = anchor_link
+			else:
+				mapping[member_symbol] = {'kind' : kind, 'arglist' : {normalised_arglist : anchor_link}}
+		else:
+			print 'Skipping %s %s%s. Error reported from parser was: %s' % (old_tuple[2], old_tuple[0], old_tuple[1], normalised_tuple[0])
+	
+	#from pprint import pprint; pprint(mapping)
 	return mapping

 def find_url2(mapping, symbol):
-	print "\n\nSearching for", symbol
+	"""
+	Return the URL for a given symbol.
+	
+	This is where the magic happens.
+	
+	.. todo::
+		
+		Maybe print a list of all possible matches as a warning (but still only return the first)
+	
+	:Parameters:
+		mapping : dictionary
+			A dictionary of the form returned by :py:func:`parse_tag_file`
+		symbol : string
+			The symbol to lookup in the file. E.g. something like 'PolyVox::Array' or 'tidyUpMemory'
+	
+	:return: String representing the filename part of the URL
+	
+	:raises:
+		LookupError
+			Raised if the symbol could not be matched in the file
+	"""
+	#print "\n\nSearching for", symbol
+	try:
+		symbol, normalised_arglist =  normalise(symbol)
+	except ParseException as error:
+		raise LookupError(error)
+	#print symbol, normalised_arglist
 	
 	#If we have an exact match then return it.
 	if mapping.get(symbol):
-		return mapping[symbol]
+		#print ('Exact match')
+		return return_from_mapping(mapping[symbol], normalised_arglist)
+	
+	#If the user didn't pass in any arguments, i.e. `arguments == ''` then they don't care which version of the overloaded funtion they get.
 	
 	#First we check for any mapping entries which even slightly match the requested symbol
 	#endswith_list = {}
@ -177,9 +215,9 @@ def find_url2(mapping, symbol):
 	
 	#If there is only one match, return it.
 	if len(piecewise_list) is 1:
-		return piecewise_list.values()[0]
+		return return_from_mapping(piecewise_list.values()[0], normalised_arglist)
 	
-	print("Still", len(piecewise_list), 'possible matches')
+	#print("Still", len(piecewise_list), 'possible matches')
 	
 	#If there is more than one item in piecewise_list then there is an ambiguity
 	#Often this is due to the symbol matching the name of the constructor as well as the class name itself
@ -189,7 +227,7 @@ def find_url2(mapping, symbol):
 	if len(classes_list) is 1:
 		return classes_list.values()[0]
 	
-	print("Still", len(classes_list), 'possible matches')
+	#print("Still", len(classes_list), 'possible matches')
 	
 	#If we exhaused the list by requiring classes, use the list from before the filter.
 	if len(classes_list) == 0:
@ -198,28 +236,93 @@ def find_url2(mapping, symbol):
 	no_templates_list = find_url_remove_templates(classes_list, symbol)
 	
 	if len(no_templates_list) is 1:
-		return no_templates_list.values()[0]
+		return return_from_mapping(no_templates_list.values()[0], normalised_arglist)
 	
-	print("Still", len(no_templates_list), 'possible matches')
+	#print("Still", len(no_templates_list), 'possible matches')
 	
 	#If not found by now, just return the first one in the list
 	if len(no_templates_list) != 0:
-		return no_templates_list.values()[0]
+		#TODO return a warning here?
+		return return_from_mapping(no_templates_list.values()[0], normalised_arglist)
 	#Else return None if the list is empty
 	else:
-		return None
+		LookupError('Could not find a match')
+
+def return_from_mapping(mapping_entry, normalised_arglist=''):
+	"""
+	Return a mapping to a single URL in the form. This is needed since mapping entries for functions are more complicated due to function overriding.
+	
+	If the mapping to be returned is not a function, this will simply return the mapping entry intact. If the entry is a function it will attempt to get the right version based on the function signature.
+	
+	:Parameters:
+		mapping_entry : dict
+			should be a single entry from the large mapping file corresponding to a single symbol. If the symbol is a function, then ``mappingentry['arglist']`` will be a dictionary mapping normalised signatures to URLs
+		normalised_arglist : string
+			the normalised form of the arglist that the user has requested. This can be empty in which case the function will return just the first element of ``mappingentry['arglist']``. This parameter is ignored if ``mappingentry['kind'] != 'function'``
+	
+	:return: dictionary something like:
+	
+		.. code-block:: python
+		
+			{'kind' : 'function', 'file' : 'something.html#foo'}
+	
+	"""
+	#If it's a function we need to grab the right signature from the arglist.
+	if mapping_entry['kind'] == 'function':
+		#If the user has requested a specific function through specifying an arglist then get the right anchor
+		if normalised_arglist:
+			filename = mapping_entry['arglist'].get(normalised_arglist)
+			if not filename: #If we didn't get the filename because it's not in the mapping then we will just return a random one?
+				#TODO return a warning here!
+				filename = mapping_entry['arglist'].values()[0]
+		else:
+			#Otherwise just return the first entry (if they don't care they get whatever comes first)
+			filename = mapping_entry['arglist'].values()[0]
+		
+		return {'kind' : 'function', 'file' : filename}
+	elif mapping_entry.get('arglist'):
+		#This arglist should only be one entry long and that entry should have '' as its key
+		return {'kind' : mapping_entry['kind'], 'file' : mapping_entry['arglist']['']}
+	
+	#If it's not a function, then return it raw
+	return mapping_entry

 def find_url_piecewise(mapping, symbol):
-	#Match the requested symbol reverse piecewise (split on '::') against the tag names to ensure they match exactly (modulo ambiguity)
-	#So, if in the mapping there is "PolyVox::Volume::FloatVolume" and "PolyVox::Volume" they would be split into:
-	#    ['PolyVox', 'Volume', 'FloatVolume'] and ['PolyVox', 'Volume']
-	#and reversed:
-	#    ['FloatVolume', 'Volume', 'PolyVox'] and ['Volume', 'PolyVox']
-	#and truncated to the shorter of the two:
-	#    ['FloatVolume', 'Volume'] and ['Volume', 'PolyVox']
-	#If we're searching for the "PolyVox::Volume" symbol we would compare:
-	#    ['Volume', 'PolyVox'] to ['FloatVolume', 'Volume', 'PolyVox']. That doesn't match so we look at the next in the mapping:
-	#    ['Volume', 'PolyVox'] to ['Volume', 'PolyVox']. Good, so we add it to the list
+	"""
+	Match the requested symbol reverse piecewise (split on ``::``) against the tag names to ensure they match exactly (modulo ambiguity)
+	So, if in the mapping there is ``PolyVox::Volume::FloatVolume`` and ``PolyVox::Volume`` they would be split into:
+	
+	.. code-block:: python
+	
+		['PolyVox', 'Volume', 'FloatVolume'] and ['PolyVox', 'Volume']
+	
+	and reversed:
+	
+	.. code-block:: python
+	
+		['FloatVolume', 'Volume', 'PolyVox'] and ['Volume', 'PolyVox']
+	
+	and truncated to the shorter of the two:
+	
+	.. code-block:: python
+	
+		['FloatVolume', 'Volume'] and ['Volume', 'PolyVox']
+	
+	If we're searching for the ``PolyVox::Volume`` symbol we would compare:
+	
+	.. code-block:: python
+	
+		['Volume', 'PolyVox'] to ['FloatVolume', 'Volume', 'PolyVox'].
+	
+	That doesn't match so we look at the next in the mapping:
+	
+	.. code-block:: python
+	
+		['Volume', 'PolyVox'] to ['Volume', 'PolyVox'].
+	
+	Good, so we add it to the list
+	
+	"""
 	piecewise_list = {}
 	for item, data in mapping.items():
 		split_symbol = symbol.split('::')
@ -236,27 +339,27 @@ def find_url_piecewise(mapping, symbol):
 		#print split_symbol, split_item
 		
 		if split_symbol == split_item:
-			print symbol + ' : ' + item
+			#print symbol + ' : ' + item
 			piecewise_list[item] = data
 	
 	return piecewise_list

 def find_url_classes(mapping, symbol):
-	#Prefer classes over names of constructors
+	"""Prefer classes over names of constructors"""
 	classes_list = {}
 	for item, data in mapping.items():
 		if data['kind'] == 'class':
-			print symbol + ' : ' + item
+			#print symbol + ' : ' + item
 			classes_list[item] = data
 	
 	return classes_list

 def find_url_remove_templates(mapping, symbol):
-	#Now, to disambiguate between "PolyVox::Array< 1, ElementType >::operator[]" and "PolyVox::Array::operator[]" matching "operator[]", we will ignore templated (as in C++ templates) tag names by removing names containing '<'
+	"""Now, to disambiguate between ``PolyVox::Array< 1, ElementType >::operator[]`` and ``PolyVox::Array::operator[]`` matching ``operator[]``, we will ignore templated (as in C++ templates) tag names by removing names containing ``<``"""
 	no_templates_list = {}
 	for item, data in mapping.items():
 		if '<' not in item:
-			print symbol + ' : ' + item
+			#print symbol + ' : ' + item
 			no_templates_list[item] = data
 	
 	return no_templates_list
@ -271,42 +374,66 @@ def create_role(app, tag_filename, rootdir):
 	
 	try:
 		tag_file = ET.parse(tag_filename)
-		mapping = parse_tag_file(tag_file)
-	except (IOError):
+		
+		cache_name = os.path.basename(tag_filename)
+		
+		app.info(bold('Checking tag file cache for %s: ' % cache_name), nonl=True)
+		if not hasattr(app.env, 'doxylink_cache'):
+			# no cache present at all, initialise it
+			app.info('No cache at all, rebuilding...')
+			mapping = parse_tag_file(tag_file)
+			app.env.doxylink_cache = { cache_name : {'mapping' : mapping, 'mtime' : os.path.getmtime(tag_filename)}}
+		elif not app.env.doxylink_cache.get(cache_name):
+			# Main cache is there but the specific sub-cache for this tag file is not
+			app.info('Sub cache is missing, rebuilding...')
+			mapping = parse_tag_file(tag_file)
+			app.env.doxylink_cache[cache_name] = {'mapping' : mapping, 'mtime' : os.path.getmtime(tag_filename)}
+		elif app.env.doxylink_cache[cache_name]['mtime'] < os.path.getmtime(tag_filename):
+			# tag file has been modified since sub-cache creation
+			app.info('Sub-cache is out of date, rebuilding...')
+			mapping = parse_tag_file(tag_file)
+			app.env.doxylink_cache[cache_name] = {'mapping' : mapping, 'mtime' : os.path.getmtime(tag_filename)}
+		else:
+			#The cache is up to date
+			app.info('Sub-cache is up-to-date')
+	except IOError:
 		tag_file = None
-		app.warn('Could not open tag file %s. Make sure your `doxylink` config variable is set correctly.' % tag_filename)
+		app.warn(standout('Could not open tag file %s. Make sure your `doxylink` config variable is set correctly.' % tag_filename))
 	
 	def find_doxygen_link(name, rawtext, text, lineno, inliner, options={}, content=[]):
 		text = utils.unescape(text)
 		# from :name:`title <part>`
 		has_explicit_title, title, part = split_explicit_title(text)
-		
+		warning_messages = []
 		if tag_file:
 			url = find_url(tag_file, part)
+			try:
+				url = find_url2(app.env.doxylink_cache[cache_name]['mapping'], part)
+			except LookupError as error:
+				warning_messages.append('Error while parsing `%s`. Is not a well-formed C++ function call or symbol. If this is not the case, it is a doxylink bug so please report it. Error reported was: %s' % (part, error))
 			if url:
 				
 				#If it's an absolute path then the link will work regardless of the document directory
-				if os.path.isabs(rootdir):
+				#Also check if it is a URL (i.e. it has a 'scheme' like 'http' or 'file')
+				if os.path.isabs(rootdir) or urlparse.urlparse(rootdir).scheme:
 					full_url = join(rootdir, url['file'])
 				#But otherwise we need to add the relative path of the current document to the root source directory to the link
 				else:
 					relative_path_to_docsrc = os.path.relpath(app.env.srcdir, os.path.dirname(inliner.document.current_source))
-					full_url = join(relative_path_to_docsrc, os.sep, rootdir, url['file'])
+					full_url = join(relative_path_to_docsrc, '/', rootdir, url['file']) #We always use the '/' here rather than os.sep since this is a web link avoids problems like documentation/.\../library/doc/ (mixed slashes)
 				
-				if url['kind'] == 'function' and app.config.add_function_parentheses:
+				if url['kind'] == 'function' and app.config.add_function_parentheses and not normalise(title)[1]:
 					title = join(title, '()')
 				
 				pnode = nodes.reference(title, title, internal=False, refuri=full_url)
 				return [pnode], []
 			#By here, no match was found
-			env = app.env
-			env.warn(env.docname, 'Could not find match for `%s` in `%s` tag file' % (part, tag_filename), lineno)
+			warning_messages.append('Could not find match for `%s` in `%s` tag file' % (part, tag_filename))
 		else:
-			env = app.env
-			env.warn(env.docname, 'Could not find match for `%s` because tag file not found' % (part), lineno)
+			warning_messages.append('Could not find match for `%s` because tag file not found' % (part))
 		
 		pnode = nodes.inline(rawsource=title, text=title)
-		return [pnode], []
+		return [pnode], [inliner.reporter.warning(message, line=lineno) for message in warning_messages]
 	
 	return find_doxygen_link

--- a/documentation/_extensions/sphinxcontrib/parsing.py
+++ b/documentation/_extensions/sphinxcontrib/parsing.py
@ -0,0 +1,153 @@
+#import multiprocessing
+import itertools
+
+from pyparsing import Word, Literal, alphas, nums, alphanums, OneOrMore, Optional, SkipTo, ParseException, Group, ZeroOrMore, Suppress, Combine, delimitedList, quotedString, nestedExpr, ParseResults, oneOf
+
+# define punctuation - reuse of expressions helps packratting work better
+LPAR,RPAR,LBRACK,RBRACK,COMMA,EQ = map(Literal,"()[],=")
+
+#Qualifier to go in front of type in the argument list (unsigned const int foo)
+qualifier = OneOrMore(oneOf('const unsigned typename struct enum'))
+
+def turn_parseresults_to_list(s, loc, toks):
+	return ParseResults(normalise_templates(toks[0].asList()))
+
+def normalise_templates(toks, isinstance=isinstance, basestring=basestring):
+	s_list = ['<']
+	s_list_append = s_list.append #lookup append func once, instead of many times
+	for tok in toks:
+		if isinstance(tok, basestring): #See if it's a string
+			s_list_append(' ' + tok)
+		else:
+			#If it's not a string
+			s_list_append(normalise_templates(tok))
+	s_list_append(' >')
+	return ''.join(s_list)
+
+#Skip pairs of brackets.
+angle_bracket_pair = nestedExpr(opener='<',closer='>').setParseAction(turn_parseresults_to_list)
+#TODO Fix for nesting brackets
+parentheses_pair = LPAR + SkipTo(RPAR) + RPAR
+square_bracket_pair = LBRACK + SkipTo(RBRACK) + RBRACK
+
+#The raw type of the input, i.e. 'int' in (unsigned const int * foo)
+#TODO I guess this should be a delimited list (by '::') of name and angle brackets
+input_type = Combine(Word(alphanums + ':_') + Optional(angle_bracket_pair + Optional(Word(alphanums + ':_'))))
+
+#A number. e.g. -1, 3.6 or 5
+number = Word('-.' + nums)
+
+#The name of the argument. We will ignore this but it must be matched anyway.
+input_name = OneOrMore(Word(alphanums + '_') | angle_bracket_pair | parentheses_pair | square_bracket_pair)
+
+#Grab the '&', '*' or '**' type bit in (const QString & foo, int ** bar)
+pointer_or_reference = oneOf('* &')
+
+#The '=QString()' or '=false' bit in (int foo = 4, bool bar = false)
+default_value = Literal('=') + OneOrMore(number | quotedString | input_type | parentheses_pair | angle_bracket_pair | square_bracket_pair | Word('|&^'))
+
+#A combination building up the interesting bit -- the argument type, e.g. 'const QString &', 'int' or 'char*'
+argument_type = Optional(qualifier, default='')("qualifier") + \
+                input_type("input_type") + \
+                Optional(pointer_or_reference, default='')("pointer_or_reference1") + \
+                Optional('const')('const_pointer_or_reference') + \
+                Optional(pointer_or_reference, default='')("pointer_or_reference2")
+
+#Argument + variable name + default
+argument = Group(argument_type('argument_type') + Optional(input_name) + Optional(default_value))
+
+#List of arguments in parentheses with an optional 'const' on the end
+arglist = LPAR + delimitedList(argument)('arg_list') + Optional(COMMA + '...')('var_args') + RPAR
+
+def normalise(symbol):
+	"""
+	Takes a c++ symbol or funtion and splits it into symbol and a normalised argument list.
+	
+	:Parameters:
+		symbol : string
+			A C++ symbol or function definition like ``PolyVox::Volume``, ``Volume::printAll() const``
+	
+	:return:
+		a tuple consisting of two strings: ``(qualified function name or symbol, normalised argument list)``
+	"""
+	
+	try:
+		bracket_location = symbol.index('(')
+		#Split the input string into everything before the opening bracket and everything else
+		function_name = symbol[:bracket_location]
+		arglist_input_string = symbol[bracket_location:]
+	except ValueError:
+		#If there's no brackets, then there's no function signature. This means the passed in symbol is just a type name
+		return symbol, ''
+	
+	#This is a very common signature so we'll make a special case for it. It requires no parsing anyway
+	if arglist_input_string.startswith('()'):
+		if arglist_input_string in ('()', '()=0'):
+			return function_name, arglist_input_string
+		elif arglist_input_string in ('() const ', '() const', '() const =0'):
+			return function_name, '() const'
+	
+	#By now we're left with something like "(blah, blah)", "(blah, blah) const" or "(blah, blah) const =0"
+	try:
+		closing_bracket_location = arglist_input_string.rindex(')')
+		arglist_suffix = arglist_input_string[closing_bracket_location+1:]
+		arglist_input_string = arglist_input_string[:closing_bracket_location+1]
+	except ValueError:
+		#This shouldn't happen.
+		print 'Could not find closing bracket in %s' % arglist_input_string
+		raise
+	
+	try:
+		result = arglist.parseString(arglist_input_string)
+	except ParseException as error:
+		#print symbol
+		#print pe
+		return str(error), None
+	else:
+		#Will be a list or normalised string arguments
+		#e.g. ['OBMol&', 'vector< int >&', 'OBBitVec&', 'OBBitVec&', 'int', 'int']
+		normalised_arg_list = []
+		
+		#Cycle through all the matched arguments
+		for arg in result.arg_list:
+			#Here is where we build up our normalised form of the argument
+			argument_string_list = ['']
+			if arg.qualifier:
+				argument_string_list.append(''.join((arg.qualifier,' ')))
+			argument_string_list.append(arg.input_type)
+		
+			#Functions can have a funny combination of *, & and const between the type and the name so build up a list of theose here:
+			const_pointer_ref_list = []
+			const_pointer_ref_list.append(arg.pointer_or_reference1)
+			if arg.const_pointer_or_reference:
+				const_pointer_ref_list.append(''.join((' ', arg.const_pointer_or_reference, ' ')))
+			# same here
+			const_pointer_ref_list.append(arg.pointer_or_reference2)
+			#And combine them into a single normalised string and add them to the argument list
+			argument_string_list.extend(const_pointer_ref_list)
+		
+			#Finally we join our argument string and add it to our list
+			normalised_arg_list.append(''.join(argument_string_list))
+		
+		#If the function contains a variable number of arguments (int foo, ...) then add them on.
+		if result.var_args:
+			normalised_arg_list.append('...')
+		
+		#Combine all the arguments and put parentheses around it
+		normalised_arg_list_string = ''.join(['(', ', '.join(normalised_arg_list), ')'])
+		
+		#Add a const onto the end
+		if 'const' in arglist_suffix:
+			normalised_arg_list_string += ' const'
+		
+		return function_name, normalised_arg_list_string
+	
+	#TODO Maybe this should raise an exception?
+	return None
+
+def normalise_list(list_of_symbols):
+	#normalise_pool = multiprocessing.Pool(multiprocessing.cpu_count() * 2)
+	#results = normalise_pool.map(normalise, list_of_symbols)
+	#normalise_pool.terminate()
+	results = itertools.imap(normalise, list_of_symbols)
+	return results