adding developers

2020-02-10 12:35:22 +01:00
parent eeb20a670a
commit 48ade4bc0b
13 changed files with 345 additions and 14 deletions
--- a/code/grammar_listing.lark
+++ b/code/grammar_listing.lark
@@ -8,10 +8,10 @@ property: "- " _key ": " _value "\n"
 _key: /(?! ).+?(?=:)(?<! )/                      // key: everything until next ":", not beginning or ending with a space
 _value: /.+(?<! )/                               // everything until the end of the line, not ending with a space

-name: /.+?(?= \()/                              // developer name: everything until " ("
+name: /.+?(?= \()/                               // developer name: everything until " ("
 number: /[0-9]+/

-COMMENT: /^\[comment\]: #.*$\n/m                // [comment]: # xxx
+COMMENT: /^\[comment\]: #.*$\n/m                 // [comment]: # xxx
 _E: /^$\n/m                                      // empty new line

 %ignore COMMENT
--- a/code/maintenance_collect_developer_infos.py
+++ b/code/maintenance_collect_developer_infos.py
@@ -0,0 +1,129 @@
+"""
+Checks the entries and tries to detect additional developer content, by retrieving websites or logging information from
+stored Git repositories.
+"""
+
+import os
+import requests
+from bs4 import BeautifulSoup
+from utils import constants as c, utils, osg, osg_github
+
+
+def developer_info_lookup(name):
+    for dev in developer_info:
+        if name == dev['name']:
+            return dev
+    return None
+
+# author names in SF that aren't the author names how we have them
+SF_alias_list = {'Erik Johansson (aka feneur)': 'Erik Johansson', 'Itms': 'Nicolas Auvray', 'Wraitii': 'Lancelot de Ferrière', 'Simzer': 'Simon Laszlo', 'armin bajramovic': 'Armin Bajramovic'}
+
+if __name__ == "__main__":
+
+    # read developer info
+    developer_info = osg.read_developer_info()
+    osg.write_developer_info(developer_info)
+
+    # assemble info
+    entries = osg.assemble_infos()
+
+    # loop over infos
+    developers = ''
+    try:
+        i = 0
+        #active = False
+        for entry in entries:
+
+            #if entry['name'] == 'Aleph One':
+            #    active = True
+            #if not active:
+            #    continue
+
+            # for testing purposes
+            i += 1
+            if i > 40:
+                break
+
+            # print
+            entry_name = '{} - {}'.format(entry['file'], entry['name'])
+            print(entry_name)
+            content = ''
+
+            entry_developer = entry.get('developer', [])
+
+            # parse home
+            home = entry['home']
+            # sourceforge project site
+            prefix = 'https://sourceforge.net/projects/'
+            url = [x for x in home if x.startswith(prefix)]
+            if len(url) == 1:
+                url = url[0]
+                print(' sourceforge project site: {}'.format(url))
+                url = 'https://sourceforge.net/p/' + url[len(prefix):] + '_members/'
+                response = requests.get(url)
+                soup = BeautifulSoup(response.text, 'html.parser')
+                authors = soup.find('div', id='content_base').find('table').find_all('tr')
+                authors = [author.find_all('td') for author in authors]
+                authors = [author[1].a['href'] for author in authors if len(author) == 3]
+                for author in authors:
+                    # sometimes author already contains the full url, sometimes not
+                    url = 'https://sourceforge.net' + author if not author.startswith('http') else author
+                    response = requests.get(url)
+                    url = response.url # could be different now
+                    if 'auth/?return_to' in url:
+                        # for some reason authorisation is forbidden
+                        author_name = author
+                        nickname = author
+                    else:
+                        soup = BeautifulSoup(response.text, 'html.parser')
+                        author_name = soup.h1.get_text()
+                        author_name = SF_alias_list.get(author_name, author_name) # replace by alias if possible
+                        nickname = soup.find('dl', class_= 'personal-data').find('dd').get_text()
+                        nickname = nickname.replace('\n', '').strip()
+                    dev = developer_info_lookup(author_name)
+                    in_devs = dev and 'contact' in dev and nickname + '@SF' in dev['contact']
+                    in_entry = author_name in entry_developer
+                    if in_devs and in_entry:
+                        continue  # already existing in entry and devs
+                    content += ' {} : {}@SF'.format(author_name, nickname)
+                    if not in_devs:
+                        content += ' (not in devs)'
+                    if not in_entry:
+                        content += ' (not in entry)'
+                    content += '\n'
+
+            # parse source repository
+            repos = entry.get('code repository', [])
+
+            # Github
+            urls = [x for x in repos if x.startswith('https://github.com/')]
+            urls = []
+            for url in urls:
+                print(' github repo: {}'.format(url))
+                github_info = osg_github.retrieve_repo_info(url)
+                for contributor in github_info['contributors']:
+                    name = contributor.name
+                    dev = developer_info_lookup(name)
+                    in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev['contact']
+                    in_entry = name in entry_developer
+                    if in_devs and in_entry:
+                        continue  # already existing in entry and devs
+                    content += ' {}: {}@GH'.format(name, contributor.login)
+                    if contributor.blog:
+                        content += ' url: {}'.format(contributor.blog)
+                    if not in_devs:
+                        content += ' (not in devs)'
+                    if not in_entry:
+                        content += ' (not in entry)'
+                    content += '\n'
+
+            if content:
+                developers += '{}\n\n{}\n'.format(entry_name, content)
+
+
+    except RuntimeError as e:
+        raise(e)
+        # pass
+    finally:
+        # store developer info
+        utils.write_text(os.path.join(c.root_path, 'collected_developer_info.txt'), developers)
--- a/code/requirements.txt
+++ b/code/requirements.txt
@@ -1,2 +1,3 @@
 pygithub
-lark-parser
+lark-parser
+BeautifulSoup
--- a/code/utils/osg.py
+++ b/code/utils/osg.py
@@ -15,7 +15,7 @@ class ListingTransformer(lark.Transformer):
        raise lark.Discard

    def property(self, x):
-        return (x[0].value, x[1].value)
+        return (x[0].value.lower(), x[1].value)

    def name(self, x):
        return ('name', x[0].value)
@@ -71,6 +71,8 @@ code_dependencies_without_entry = {'OpenGL': 'https://www.opengl.org/',
 regex_sanitize_name = re.compile(r"[^A-Za-z 0-9-+]+")
 regex_sanitize_name_space_eater = re.compile(r" +")

+valid_developer_fields = ('name', 'games', 'contact', 'organization', 'home')
+
 comment_string = '[comment]: # (partly autogenerated content, edit with care, read the manual before)'


@@ -378,7 +380,22 @@ def read_developer_info():
    developer_file = os.path.join(c.root_path, 'developer.md')
    grammar_file = os.path.join(c.code_path, 'grammar_listing.lark')
    transformer = ListingTransformer()
-    return read_and_parse(developer_file, grammar_file, transformer)
+    developers =  read_and_parse(developer_file, grammar_file, transformer)
+    # now transform a bit more
+    for index, dev in enumerate(developers):
+        for field in dev.keys():
+            if field not in valid_developer_fields:
+                raise RuntimeError('Unknown developer field "{}" for developer: {}.'.format(field, dev['name']))
+        for field in ('name', 'organization'):
+            if field in dev:
+                dev[field] = dev[field].strip()
+        for field in ('games', 'contact'):
+            if field in dev:
+                content = dev[field]
+                content = content.split(',')
+                content = [x.strip() for x in content]
+                dev[field] = content
+    return developers


 def write_developer_info(developers):
@@ -386,7 +403,38 @@ def write_developer_info(developers):

    :return:
    """
+    # comment
+    content = '{}\n'.format(comment_string)
+
+    # number of developer
+    content += '# Developer ({})\n\n'.format(len(developers))
+
+    # sort by name
+    developers.sort(key=lambda x: str.casefold(x['name']))
+
+    # iterate over them
+    for dev in developers:
+        # developer name
+        content += '## {} ({})\n\n'.format(dev['name'], len(dev['games']))
+
+        # games
+        content += '- Games: {}\n'.format(', '.join(sorted(dev['games'], key=str.casefold)))
+
+        # all the remaining in alphabetical order
+        for field in sorted(dev.keys()):
+            if field not in ('name', 'games'):
+                value = dev[field]
+                field = field.capitalize()
+                if isinstance(value, str):
+                    content += '- {}: {}\n'.format(field, value)
+                else:
+                    content += '- {}: {}\n'.format(field, ', '.join(sorted(value, key=str.casefold)))
+        content += '\n'
+
+    # write
    developer_file = os.path.join(c.root_path, 'developer.md')
+    utils.write_text(developer_file, content)
+


 def read_inspirations_info():
--- a/code/utils/osg_github.py
+++ b/code/utils/osg_github.py
@@ -5,16 +5,45 @@ Everything specific to the Github API (via PyGithub).
 from github import Github


+def normalize_repo_name(repo):
+    """
+    Bring repo to style xxx/yyy
+    """
+    prefix = 'https://github.com/'
+    if repo.startswith(prefix):
+        repo = repo[len(prefix):]
+    suffix = '.git'
+    if repo.endswith(suffix):
+        repo = repo[:-len(suffix)]
+    return repo
+
+
+def repo_get_contributors(repo):
+    contributors = []
+    c = repo.get_contributors()
+    for i in range(c.totalCount):
+        contributors.append(c[i])
+    return contributors
+
+
 def retrieve_repo_info(repos):
    """
-    For a list of Github repos, retrieves repo information
+    For a list of Github repos, retrieves repo information.
+
+    Repos must be have the style xxx/yyy example: "PyGithub/PyGithub"
    """
+    single_repo = isinstance(repos, str)
+    if single_repo:
+        repos = (repos,)
    result = []
    g = Github()
    for repo in repos:
+        repo = normalize_repo_name(repo)
        r = g.get_repo(repo)
-        e = {'archived': r.archived, 'description': r.description, 'language': r.language,
-             'last modified': r.last_modified, 'open issues count': r.open_issues_count,
+        e = {'archived': r.archived, 'contributors': repo_get_contributors(r), 'description': r.description,
+             'language': r.language, 'last modified': r.last_modified, 'open issues count': r.open_issues_count,
             'stars count': r.stargazers_count, 'topics': r.topics, 'repo': repo}
        result.append(e)
+    if single_repo:
+        result = result[0]
    return result