developer import from sourceforge

2021-01-06 16:13:17 +01:00
parent 489adf0f88
commit 32ae77c7da
99 changed files with 2873 additions and 189 deletions
--- a/code/backlog.txt
+++ b/code/backlog.txt
@ -191,6 +191,13 @@ https://gamejolt.com/ (search there)
 https://games.kde.org/ (all of them)
 https://games.kde.org/old/kde_arcade.php
 https://gdevelop-app.com/
+https://github.com/FaronBracy/RogueSharp
+https://github.com/jmorton06/Lumos
+https://github.com/codenamecpp/carnage3d
+https://github.com/zhangdoa/InnocenceEngine
+https://github.com/marukrap/RoguelikeDevResources
+http://www.gjt.org/ (all there)
+https://github.blog/2014-01-06-github-game-off-ii-winners/
 https://github.com/00-Evan/shattered-pixel-dungeon
 https://github.com/00-Evan/shattered-pixel-dungeon-gdx
 https://github.com/acedogblast/Project-Uranium-Godot
--- a/code/github_import.py
+++ b/code/github_import.py
@ -0,0 +1,28 @@
+"""
+Uses the Github API to learn more about the Github projects.
+"""
+
+# Github
+urls = [x for x in repos if x.startswith('https://github.com/')]
+urls = []
+for url in urls:
+    print(' github repo: {}'.format(url))
+    github_info = osg_github.retrieve_repo_info(url)
+    for contributor in github_info['contributors']:
+        name = contributor.name
+        dev = developer_info_lookup(name)
+        in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev['contact']
+        in_entry = name in entry_developer
+        if in_devs and in_entry:
+            continue  # already existing in entry and devs
+        content += ' {}: {}@GH'.format(name, contributor.login)
+        if contributor.blog:
+            content += ' url: {}'.format(contributor.blog)
+        if not in_devs:
+            content += ' (not in devs)'
+        if not in_entry:
+            content += ' (not in entry)'
+        content += '\n'
+
+if __name__ == "__main__":
+
--- a/code/maintenance_developers.py
+++ b/code/maintenance_developers.py
@ -3,119 +3,8 @@ Checks the entries and tries to detect additional developer content, by retrievi
 stored Git repositories.
 """

-import os
-import sys
-import requests
-from utils import osg, osg_ui
-from bs4 import BeautifulSoup
-from utils import constants as c, utils, osg, osg_github
-
-
-# author names in SF that aren't the author names how we have them
-SF_alias_list = {'Erik Johansson (aka feneur)': 'Erik Johansson', 'Itms': 'Nicolas Auvray',
-                 'Wraitii': 'Lancelot de Ferrière', 'Simzer': 'Simon Laszlo', 'armin bajramovic': 'Armin Bajramovic'}
-
-def test():
-    # loop over infos
-    developers = ''
-    try:
-        i = 0
-        # active = False
-        for entry in entries:
-
-            # if entry['Name'] == 'Aleph One':
-            #    active = True
-            # if not active:
-            #    continue
-
-            # for testing purposes
-            i += 1
-            if i > 40:
-                break
-
-            # print
-            entry_name = '{} - {}'.format(entry['file'], entry['Name'])
-            print(entry_name)
-            content = ''
-
-            entry_developer = entry.get('developer', [])
-
-            # parse home
-            home = entry['home']
-            # sourceforge project site
-            prefix = 'https://sourceforge.net/projects/'
-            url = [x for x in home if x.startswith(prefix)]
-            if len(url) == 1:
-                url = url[0]
-                print(' sourceforge project site: {}'.format(url))
-                url = 'https://sourceforge.net/p/' + url[len(prefix):] + '_members/'
-                response = requests.get(url)
-                soup = BeautifulSoup(response.text, 'html.parser')
-                authors = soup.find('div', id='content_base').find('table').find_all('tr')
-                authors = [author.find_all('td') for author in authors]
-                authors = [author[1].a['href'] for author in authors if len(author) == 3]
-                for author in authors:
-                    # sometimes author already contains the full url, sometimes not
-                    url = 'https://sourceforge.net' + author if not author.startswith('http') else author
-                    response = requests.get(url)
-                    url = response.url  # could be different now
-                    if 'auth/?return_to' in url:
-                        # for some reason authorisation is forbidden
-                        author_name = author
-                        nickname = author
-                    else:
-                        soup = BeautifulSoup(response.text, 'html.parser')
-                        author_name = soup.h1.get_text()
-                        author_name = SF_alias_list.get(author_name, author_name)  # replace by alias if possible
-                        nickname = soup.find('dl', class_='personal-data').find('dd').get_text()
-                        nickname = nickname.replace('\n', '').strip()
-                    dev = developer_info_lookup(author_name)
-                    in_devs = dev and 'contact' in dev and nickname + '@SF' in dev['contact']
-                    in_entry = author_name in entry_developer
-                    if in_devs and in_entry:
-                        continue  # already existing in entry and devs
-                    content += ' {} : {}@SF'.format(author_name, nickname)
-                    if not in_devs:
-                        content += ' (not in devs)'
-                    if not in_entry:
-                        content += ' (not in entry)'
-                    content += '\n'
-
-            # parse source repository
-            repos = entry.get('code repository', [])
-
-            # Github
-            urls = [x for x in repos if x.startswith('https://github.com/')]
-            urls = []
-            for url in urls:
-                print(' github repo: {}'.format(url))
-                github_info = osg_github.retrieve_repo_info(url)
-                for contributor in github_info['contributors']:
-                    name = contributor.name
-                    dev = developer_info_lookup(name)
-                    in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev['contact']
-                    in_entry = name in entry_developer
-                    if in_devs and in_entry:
-                        continue  # already existing in entry and devs
-                    content += ' {}: {}@GH'.format(name, contributor.login)
-                    if contributor.blog:
-                        content += ' url: {}'.format(contributor.blog)
-                    if not in_devs:
-                        content += ' (not in devs)'
-                    if not in_entry:
-                        content += ' (not in entry)'
-                    content += '\n'
-
-            if content:
-                developers += '{}\n\n{}\n'.format(entry_name, content)
-
-    except RuntimeError as e:
-        raise e
-        # pass
-    finally:
-        # store developer info
-        utils.write_text(os.path.join(c.root_path, 'collected_developer_info.txt'), developers)
-
+from utils import osg_ui
+from utils import osg


 class DevelopersMaintainer:
@ -202,6 +91,16 @@ class DevelopersMaintainer:
        self.entries = osg.read_entries()
        print('{} entries read'.format(len(self.entries)))

+    def special_ops(self):
+        # need entries loaded
+        if not self.entries:
+            print('entries not yet loaded')
+            return
+        for entry in self.entries:
+            for developer in entry.get('Developer', []):
+                if developer.comment:
+                    print('{:<25} - {:<25} - {}'.format(entry['File'], developer.value, developer.comment))
+

 if __name__ == "__main__":

@ -214,6 +113,7 @@ if __name__ == "__main__":
        'Check for orphans': m.check_for_orphans,
        'Check for games in developers not listed': m.check_for_missing_developers_in_entries,
        'Update developers from entries': m.update_developers_from_entries,
+        'Special': m.special_ops,
        'Read entries': m.read_entries
    }

--- a/code/maintenance_entries.py
+++ b/code/maintenance_entries.py
@ -859,21 +859,29 @@ class EntriesMaintainer:
            print('entries not yet loaded')
            return

-        # combine content keywords
-        n = len('content ')
+        # cvs without any git
        for entry in self.entries:
-            keywords = entry['Keyword']
-            content = [keyword for keyword in keywords if keyword.startswith('content')]
-            if len(content) > 1:
-                # remove from keywords
-                keywords = [keyword for keyword in keywords if keyword not in content]
-                # remove prefix
-                content = [str(keyword)[n:].strip() for keyword in content]
-                # join with +
-                content = 'content {}'.format(' + '.join(content))
-                keywords.append(osg_parse.ValueWithComment(content))
-                entry['Keyword'] = keywords
-                print('fixed "{}"'.format(entry['File']))
+            repos = entry['Code repository']
+            cvs = [repo for repo in repos if 'cvs' in repo]
+            git = [repo for repo in repos if 'git' in repo]
+            if len(cvs) > 0 and len(git) == 0:
+                print('Entry "{}" with repos: {}'.format(entry['File'], repos))
+
+        # # combine content keywords
+        # n = len('content ')
+        # for entry in self.entries:
+        #     keywords = entry['Keyword']
+        #     content = [keyword for keyword in keywords if keyword.startswith('content')]
+        #     if len(content) > 1:
+        #         # remove from keywords
+        #         keywords = [keyword for keyword in keywords if keyword not in content]
+        #         # remove prefix
+        #         content = [str(keyword)[n:].strip() for keyword in content]
+        #         # join with +
+        #         content = 'content {}'.format(' + '.join(content))
+        #         keywords.append(osg_parse.ValueWithComment(content))
+        #         entry['Keyword'] = keywords
+        #         print('fixed "{}"'.format(entry['File']))

        print('special ops finished')

--- a/code/sourceforge_import.py
+++ b/code/sourceforge_import.py
@ -0,0 +1,152 @@
+"""
+Scrapes Sourceforge project sites and adds (mostly developer) information to our database.
+""" # TODO sourceforge sites that are not existing anymore but we have an archive link, also scrape
+
+import os
+import json
+import requests
+from bs4 import BeautifulSoup
+from utils import constants as c, utils, osg, osg_parse
+
+sf_entries_file = os.path.join(c.code_path, 'sourceforge_entries.txt')
+prefix = 'https://sourceforge.net/projects/'
+
+# author names in SF that aren't the author names how we have them
+SF_alias_list = {'Erik Johansson (aka feneur)': 'Erik Johansson', 'Itms': 'Nicolas Auvray', 'baris yuksel': 'Baris Yuksel',
+                 'Wraitii': 'Lancelot de Ferrière', 'Simzer': 'Simon Laszlo', 'armin bajramovic': 'Armin Bajramovic',
+                 'bleu tailfly': 'bleutailfly', 'dlh': 'DLH', 'Bjorn Hansen': 'Bjørn Hansen'}
+
+SF_ignore_list = ('', 'Arianne Integration Bot')
+
+
+def collect_sourceforge_entries():
+    """
+    Reads the entries of the database and collects all entries with sourceforge as project site
+    """
+
+    # read entries
+    entries = osg.read_entries()
+    print('{} entries read'.format(len(entries)))
+
+    # loop over entries
+    files = []
+    for entry in entries:
+        urls = [x for x in entry['Home'] if x.startswith(prefix)]
+        if urls:
+            files.append(entry['File'])
+
+    # write to file
+    print('{} entries with sourceforge projects'.format(len(files)))
+    utils.write_text(sf_entries_file, json.dumps(files, indent=1))
+
+
+def sourceforge_import():
+    """
+
+    :return:
+    """
+    files = json.loads(utils.read_text(sf_entries_file))
+
+    all_developers = osg.read_developers()
+    print(' {} developers read'.format(len(all_developers)))
+    all_developers_changed = False
+
+    # all exceptions that happen will be eaten (but will end the execution)
+    try:
+        # loop over each entry
+        for index, file in enumerate(files):
+            print(' process {}'.format(file))
+
+            # read entry
+            entry = osg.read_entry(file)
+            developers = entry.get('Developer', [])
+            urls = [x.value for x in entry['Home'] if x.startswith('https://sourceforge.net/projects/')]
+
+            entry_changed = False
+
+            for url in urls:
+                print('  sf project {}'.format(url))
+
+                if not url.endswith('/'):
+                    print('error: sf project does not end with slash')
+                    url += '/'
+
+                # members
+                url_members = 'https://sourceforge.net/p/' + url[len(prefix):] + '_members/'
+                response = requests.get(url_members)
+                if response.status_code != 200:
+                    raise RuntimeError('url {} not accessible'.format(url_members))
+                soup = BeautifulSoup(response.text, 'html.parser')
+                authors = soup.find('div', id='content_base').find('table').find_all('tr')
+                authors = [author.find_all('td') for author in authors]
+                authors = [author[1].a['href'] for author in authors if len(author) == 3]
+                for author in authors:
+                    # sometimes author already contains the full url, sometimes not
+                    url_author = 'https://sourceforge.net' + author if not author.startswith('http') else author
+                    response = requests.get(url_author)
+                    url_author = response.url  # could be different now
+                    if 'auth/?return_to' in url_author:
+                        # for some reason authorisation is forbidden or page was not available (happens for example for /u/kantaros)
+                        author_name = author[3:-1]
+                        nickname = author_name
+                    else:
+                        soup = BeautifulSoup(response.text, 'html.parser')
+                        author_name = soup.h1.get_text()
+                        author_name = SF_alias_list.get(author_name, author_name)  # replace by alias if possible
+                        nickname = soup.find('dl', class_='personal-data').find('dd').get_text()
+                        nickname = nickname.replace('\n', '').strip()
+                    nickname += '@SF' # our indication of the platform to search for
+
+                    if author_name in SF_ignore_list:
+                        continue
+
+                    # look author up in entry developers
+                    if author_name not in developers:
+                        print('   dev "{}" added to entry {}'.format(author_name, file))
+                        entry['Developer'] = entry.get('Developer', []) + [osg_parse.ValueWithComment(author_name)]
+                        entry_changed = True
+                        developers = entry.get('Developer', [])
+
+                    # look author and SF nickname up in developers data base
+                    if author_name in all_developers:
+                        dev = all_developers[author_name]
+                        if not nickname in dev.get('Contact', []):
+                            print(' existing dev "{}" added nickname ({}) to developer database'.format(author_name, nickname))
+                            # check that name has not already @SF contact
+                            if any(x.endswith('@SF') for x in dev.get('Contact', [])):
+                                print('warning: already SF contact')
+                            all_developers[author_name]['Contact'] = dev.get('Contact', []) + [nickname]
+                            all_developers_changed = True
+                    else:
+                        print('   dev "{}" ({}) added to developer database'.format(author_name, nickname))
+                        all_developers[author_name] = {'Name': author_name, 'Contact': nickname, 'Games': [entry['Title']]}
+                        all_developers_changed = True
+
+            if entry_changed:
+                # save entry
+                osg.write_entry(entry)
+                print('  entry updated')
+    except:
+        raise
+    finally:
+        # shorten file list
+        utils.write_text(sf_entries_file, json.dumps(files[index:], indent=1))
+
+        # save entry
+        osg.write_entry(entry)
+        print(' entry updated')
+
+        # maybe save all developers
+        if all_developers_changed:
+            # save all developers
+            osg.write_developers(all_developers)
+            print('developers database updated')
+
+
+if __name__ == "__main__":
+
+    # collect entries
+    # collect_sourceforge_entries()
+
+    # import information from sf
+    sourceforge_import()
--- a/code/utils/osg.py
+++ b/code/utils/osg.py
@ -260,6 +260,35 @@ def read_entries():
    return entries


+def read_entry(file):
+    """
+    Reads a single entry
+    :param file: the entry file (without path)
+    :return: the entry
+    """
+
+    # setup parser and transformer
+    grammar_file = os.path.join(c.code_path, 'grammar_entries.lark')
+    grammar = utils.read_text(grammar_file)
+    parse = osg_parse.create(grammar, osg_parse.EntryTransformer)
+
+    # read entry file
+    content = utils.read_text(os.path.join(c.entries_path, file))
+    if not content.endswith('\n'):
+        content += '\n'
+
+    # parse and transform entry content
+    try:
+        entry = parse(content)
+        entry = [('File', file),] + entry # add file information to the beginning
+        entry = check_and_process_entry(entry)
+    except Exception as e:
+        print('{} - {}'.format(file, e))
+        raise RuntimeError(e)
+
+    return entry
+
+
 def check_and_process_entry(entry):
    message = ''

--- a/code/utils/osg_parse.py
+++ b/code/utils/osg_parse.py
@ -21,11 +21,11 @@ class ListingTransformer(lark.Transformer):

    def property(self, x):
        """
-        The key of a property will be converted to lower case and the value part is the second part
+        Key is first part, values are following.
        :param x:
        :return:
        """
-        return x[0], x[1:]
+        return x[0].value, x[1:]

    def name(self, x):
        """