a few additions from backlog and working on scripts

2019-09-16 12:37:52 +02:00
parent 49424eb857
commit 4fab32b287
32 changed files with 359 additions and 243 deletions
--- a/tools/archives.json
+++ b/tools/archives.json
@ -282,7 +282,9 @@
  "https://github.com/aburch/simutrans.git",
  "https://github.com/acmepjz/meandmyshadow.git",
  "https://github.com/adamenkov/jet-story.git",
+  "https://github.com/afritz1/OpenTESArena.git",
  "https://github.com/ahmetkasif/KittenMaxit.git",
+  "https://github.com/ajweeks/FlexEngine.git",
  "https://github.com/ajxs/jsFO.git",
  "https://github.com/akarnokd/open-ig.git",
  "https://github.com/albertz/openlierox.git",
@ -290,6 +292,7 @@
  "https://github.com/alexdantas/www.git",
  "https://github.com/alexknvl/fonline.git",
  "https://github.com/alphaonex86/CatchChallenger.git",
+  "https://github.com/amroibrahim/DIYDoom.git",
  "https://github.com/anael-seghezzi/Maratis-4.git",
  "https://github.com/andrettin/wyrmsun.git",
  "https://github.com/andrewfenn/Hardwar.git",
--- a/tools/backlog.txt
+++ b/tools/backlog.txt
@ -87,11 +87,8 @@ https://fedoraproject.org/wiki/SIGs/Games#List_of_games_we_will_NOT_package
 https://flathub.org/home (use it for Linux packaging) / https://flathub.org/apps/category/Game
 https://freegamer.blogspot.com/2015/11/top-3-open-source-pinball-games.html
 https://futurepinball.com/
-https://github.com/afritz1/OpenTESArena
-https://github.com/ajweeks/FlexEngine
 https://github.com/amerkoleci/alimer
 https://github.com/amerkoleci/vortice
-https://github.com/amroibrahim/DIYDoom
 https://github.com/Anthonymcqueen21/Pygame---Alien-Invasion
 https://github.com/ao-libre/ao-cliente
 https://github.com/apsillers/lords-of-the-fey
@ -123,6 +120,7 @@ https://github.com/cubei/FlappyCow
 https://github.com/cyberegoorg/cetech
 https://github.com/DaemonEngine/Daemon
 https://github.com/Dariasteam/TowerJumper
+https://github.com/DeflatedPickle/FAOSDance
 https://github.com/demonixis/C3DE
 https://github.com/digitall/scummvm-deskadv
 https://github.com/DigitalPulseSoftware/NazaraEngine
@ -141,6 +139,7 @@ https://github.com/freeboardgame/FreeBoardGame.org
 https://github.com/FreeCol/freecol
 https://github.com/gamearians
 https://github.com/GentenStudios/quartz-engine
+https://github.com/grantjenks/free-python-games (check all)
 https://github.com/GunshipPenguin/open_flood
 https://github.com/hedgewars/hw
 https://github.com/hparcells/cards-against-humanity
@ -172,6 +171,7 @@ https://github.com/pelya/commandergenius
 https://github.com/pld-linux
 https://github.com/ptitSeb/gl4es
 https://github.com/raysan5/rfxgen
+https://github.com/Realm667/WolfenDoom
 https://github.com/romlok/godot-gdhexgrid
 https://github.com/RonenNess/GeonBit.UI
 https://github.com/RPG-Paper-Maker/RPG-Paper-Maker
@ -220,7 +220,6 @@ https://pyweek.org/4/entries/ (Ascent of Justice)
 https://revolutionarygamesstudio.com/ Trhive
 https://salsa.debian.org/games-team/etw
 https://scratch.mit.edu/ (https://en.scratch-wiki.info/wiki/Scratch_Source_Code)
-https://secretchronicles.org/en/
 https://sourceforge.net/projects/actiongame/
 https://sourceforge.net/projects/deng/
 https://sourceforge.net/projects/ettu/
--- a/tools/libregamewiki_import.py
+++ b/tools/libregamewiki_import.py
@ -7,48 +7,23 @@ Unique left column names in the game info boxes:
 ['Code license', 'Code licenses', 'Developer', 'Developers', 'Engine', 'Engines', 'Genre', 'Genres', 'Libraries', 'Library', 'Media license', 'Media licenses', 'P. language', 'P. languages', 'Platforms']
 """

+import os
 import requests
 import json
-from bs4 import BeautifulSoup, NavigableString
-from utils.utils import *
+from bs4 import BeautifulSoup
+from utils import constants, utils, osg


-def key_selection_gameinfobox(a, b):
+def download_lgw_content():
    """
-    Checks which of the two elements in a is in b or none but not both
+
+    :return:
    """
-    if len(a) != 2:
-        raise RuntimeError()
-    c = [x in b for x in a]
-    if all(c):
-        raise RuntimeError
-    if not any(c):
-        return None, None
-    d = [(k, i) for (i, k) in enumerate(a) if c[i]]
-    return d[0]
-
-
-def extract_field_content(key, idx, info):
-    """
-    From a game info field.
-    """
-    content = info[key].get_text()
-    content = content.split(',')
-    content = [x.strip() for x in content]
-    content = [x if not (x.endswith('[1]') or x.endswith('[2]')) else x[:-3] for x in content]  # remove trailing [1,2]
-    content = [x.strip() for x in content]
-    if not content:
-        raise RuntimeError
-    if (len(content) > 1 and idx == 0) or (len(content) == 1 and idx == 1):
-        print(' warning: {} Sg./Pl. mismatch'.format(key))
-    return content
-
-
-if __name__ == "__main__":

    # parameters
    base_url = 'https://libregamewiki.org'
-    ignored_gameinfos = ['Contribute', 'Origin', 'Release date', 'Latest release']
+    destination_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
+    utils.recreate_directory(destination_path)

    # read and process the base url (get all games and categories)
    url = base_url + '/Category:Games'
@ -69,89 +44,70 @@ if __name__ == "__main__":
            break
        url = base_url + next_page['href']

+    # remove all those that start with user
+    games = [game for game in games if not any(game[1].startswith(x) for x in ('User:', 'Template:', 'Bullet'))]
+
    print('current number of games in LGW {}'.format(len(games)))

-    # parse games
-    counter = 0
-    unique_gameinfo_fields = set()
-    entries = []
    for game in games:
+        print(game[1])
        url = base_url + game[0]
+        destination_file = os.path.join(destination_path, osg.canonical_game_name(game[0][1:]) + '.html')
+
        text = requests.get(url).text
+        utils.write_text(destination_file, text)
+
+
+def parse_lgw_content():
+
+    # paths
+    import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
+    entries_file = os.path.join(import_path, '_lgw.json')
+
+    # iterate over all imported files
+    files = os.listdir(import_path)
+    entries = []
+    for file in files:
+        if file == '_lgw.json':
+            continue
+
+        text = utils.read_text(os.path.join(import_path, file))
+
+        # parse the html
        soup = BeautifulSoup(text, 'html.parser')
-        title = soup.h1.string
+        title = soup.h1.get_text()
        print(title)
        entry = {'name': title}

+        # get all external links
+        links = [(x['href'], x.get_text()) for x in soup.find_all('a', href=True)]
+        links = [x for x in links if x[0].startswith('http') and not x[0].startswith('https://libregamewiki.org/')]
+        entry['external links'] = links
+
+        # get meta description
+        description = soup.find('meta', attrs={"name":"description"})
+        entry['description'] = description['content']
+
        # parse gameinfobox
-        info = soup.find('div', class_='gameinfobox')
-        if not info:
+        infos = soup.find('div', class_='gameinfobox')
+        if not infos:
            print(' no gameinfobox')
        else:
-            info = info.find_all('tr')
-            info = [(x.th.string, x.td) for x in info if x.th and x.th.string]
-            info = [x for x in info if x[0] not in ignored_gameinfos]
-            info = dict(info)
-            unique_gameinfo_fields.update(info.keys())
-
-            # consume fields of gameinfobox
-            # genre
-            key, idx = key_selection_gameinfobox(('Genre', 'Genres'), info.keys())
-            if key:
-                genres = extract_field_content(key, idx, info)
-                entry['genre']
-                del info[key]
-
-            # platforms
-            key = 'Platforms'
-            if key in info:
-                platforms = extract_field_content(key, 1, info)
-                # platforms = [x if x != 'Mac' else 'macOS' for x in platforms] # replace Mac with macOS
-                entry['platform'] = platforms
-                del info[key]
-
-            # developer
-            key, idx = key_selection_gameinfobox(('Developer', 'Developers'), info.keys())
-            if key:
-                entry['developer'] = extract_field_content(key, idx, info)
-                del info[key]
-
-            # code license
-            key, idx = key_selection_gameinfobox(('Code license', 'Code licenses'), info.keys())
-            if key:
-                entry['code license'] = extract_field_content(key, idx, info)
-                del info[key]
-
-            # media license
-            key, idx = key_selection_gameinfobox(('Media license', 'Media licenses'), info.keys())
-            if key:
-                entry['assets license'] = extract_field_content(key, idx, info)
-                del info[key]
-
-            # engine
-            key, idx = key_selection_gameinfobox(('Engine', 'Engines'), info.keys())
-            if key:
-                entry['engine'] = extract_field_content(key, idx, info)
-                del info[key]
-
-            # library
-            key, idx = key_selection_gameinfobox(('Library', 'Libraries'), info.keys())
-            if key:
-                entry['library'] = extract_field_content(key, idx, info)
-                del info[key]
-
-            # programming language
-            key, idx = key_selection_gameinfobox(('P. language', 'P. languages'), info.keys())
-            if key:
-                languages = extract_field_content(key, idx, info)
-                languages = [x for x in languages if x != 'HTML5'] # ignore HTML5
-                entry['code language'] = languages
-                del info[key]
-
-            # unconsumed
-            if info:
-                print('unconsumed gameinfo keys {}'.format(info.keys()))
-                raise RuntimeError()
+            infos = infos.find_all('tr')
+            for x in infos:
+                if x.th and x.td:
+                    # row with header
+                    key = x.th.get_text()
+                    content = x.td.get_text()
+                    content = content.split(',')
+                    content = [x.strip() for x in content]
+                    entry[key] = content
+                if not x.th and x.td:
+                    # row without header: contribute section
+                    x = x.find_all('li')
+                    x = [(x.a.string, x.a['href']) for x in x if x.a]
+                    for key, content in x:
+                        entry[key] = content

        # parse "for available as package in"
        tables = soup.find_all('table', class_='wikitable')
@ -187,18 +143,56 @@ if __name__ == "__main__":
        entry['categories'] = categories

        entries.append(entry)
-        # print(entry)

-        counter += 1
-        if counter > 20:
-            # break
-            pass
-
-    unique_gameinfo_fields = sorted(list(unique_gameinfo_fields))
-    print('unique gameinfo fields: {}'.format(unique_gameinfo_fields))

    # save entries
-    json_path = os.path.join(os.path.dirname(__file__), 'lgw_import.json')
    text = json.dumps(entries, indent=1)
-    write_text(json_path, text)
+    utils.write_text(entries_file, text)

+
+def clean_lgw_content():
+
+    # paths
+    import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
+    entries_file = os.path.join(import_path, '_lgw.json')
+    cleaned_entries_file = os.path.join(import_path, '_lgw.cleaned.json')
+
+    # load entries
+    text = utils.read_text(entries_file)
+    entries = json.loads(text)
+
+    # rename keys
+    key_replacements = (('developer', ('Developer', 'Developers')), ('code license', ('Code license', 'Code licenses')), ('engine', ('Engine', 'Engines')), ('genre', ('Genre', 'Genres')))
+    for index, entry in enumerate(entries):
+        for new_key, old_keys in key_replacements:
+            for key in old_keys:
+                if key in entry:
+                    entry[new_key] = entry[key]
+                    del entry[key]
+                    break
+
+        entries[index] = entry
+
+    # check for unique field names
+    unique_fields = set()
+    for entry in entries:
+        unique_fields.update(entry.keys())
+    print('unique lgw fields: {}'.format(sorted(list(unique_fields))))
+
+    # which fields are mandatory
+    for entry in entries:
+        remove_fields = [field for field in unique_fields if field not in entry]
+        unique_fields -= set(remove_fields)
+    print('mandatory lgw fields: {}'.format(sorted(list(unique_fields))))
+
+
+if __name__ == "__main__":
+
+    # stage one
+    # download_lgw_content()
+
+    # stage two
+    # parse_lgw_content()
+
+    # stage three
+    clean_lgw_content()
--- a/tools/libregamewiki_synchronization.py
+++ b/tools/libregamewiki_synchronization.py
@ -243,7 +243,7 @@ if __name__ == "__main__":

            # determine file name
            print('create new entry for {}'.format(lgw_name))
-            file_name = derive_canonical_file_name(lgw_name)
+            file_name = canonical_game_name(lgw_name) + '.md'
            target_file = os.path.join(games_path, file_name)
            if os.path.isfile(target_file):
                print('warning: file {} already existing, save under slightly different name'.format(file_name))
--- a/tools/maintenance.py
+++ b/tools/maintenance.py
@ -13,7 +13,9 @@ import http.client
 import datetime
 import json
 import textwrap
-from utils.osg import *
+import os
+import re
+from utils import constants as c, utils, osg


 def update_readme_and_tocs(infos):
@ -29,12 +31,12 @@ def update_readme_and_tocs(infos):
    print('update readme and toc files')

    # delete content of toc path
-    for file in os.listdir(tocs_path):
-        os.remove(os.path.join(tocs_path, file))
+    for file in os.listdir(c.tocs_path):
+        os.remove(os.path.join(c.tocs_path, file))

    # read readme
-    readme_file = os.path.join(root_path, 'README.md')
-    readme_text = read_text(readme_file)
+    readme_file = os.path.join(c.root_path, 'README.md')
+    readme_text = utils.read_text(readme_file)

    # compile regex for identifying the building blocks in the readme
    regex = re.compile(r"(.*?)(\[comment\]: # \(start.*?end of autogenerated content\))(.*)", re.DOTALL)
@ -55,7 +57,7 @@ def update_readme_and_tocs(infos):

    # create by category
    categories_text = []
-    for keyword in recommended_keywords:
+    for keyword in osg.recommended_keywords:
        infos_filtered = [x for x in infos if keyword in x['keywords']]
        title = keyword.capitalize()
        name = keyword.replace(' ', '-')
@ -67,7 +69,7 @@ def update_readme_and_tocs(infos):

    # create by platform
    platforms_text = []
-    for platform in valid_platforms:
+    for platform in osg.valid_platforms:
        infos_filtered = [x for x in infos if platform in x.get('platform', [])]
        title = platform
        name = platform.lower()
@ -80,7 +82,7 @@ def update_readme_and_tocs(infos):
    text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + tocs_text + "\n[comment]: # (end of autogenerated content)" + end

    # write to readme
-    write_text(readme_file, text)
+    utils.write_text(readme_file, text)


 def create_toc(title, file, entries):
@ -88,7 +90,7 @@ def create_toc(title, file, entries):

    """
    # file path
-    toc_file = os.path.join(tocs_path, file)
+    toc_file = os.path.join(c.tocs_path, file)

    # header line
    text = '[comment]: # (autogenerated content, do not edit)\n# {}\n\n'.format(title)
@ -105,7 +107,7 @@ def create_toc(title, file, entries):
    text += '\n'.join(rows)

    # write to toc file
-    write_text(toc_file, text)
+    utils.write_text(toc_file, text)


 def check_validity_external_links():
@ -127,7 +129,7 @@ def check_validity_external_links():
    ignored_urls = ('https://git.tukaani.org/xz.git')

    # iterate over all entries
-    for _, entry_path, content in entry_iterator(games_path):
+    for _, entry_path, content in osg.entry_iterator():

            # apply regex
            matches = regex.findall(content)
@ -169,12 +171,12 @@ def check_template_leftovers():
    print('check for template leftovers')

    # load template and get all lines
-    text = read_text(os.path.join(root_path, 'template.md'))
+    text = utils.read_text(os.path.join(c.root_path, 'template.md'))
    text = text.split('\n')
    check_strings = [x for x in text if x and not x.startswith('##')]

    # iterate over all entries
-    for _, entry_path, content in entry_iterator(games_path):
+    for _, entry_path, content in osg.entry_iterator():

        for check_string in check_strings:
            if content.find(check_string) >= 0:
@ -196,7 +198,7 @@ def fix_entries():
    regex = re.compile(r"(.*)- Keywords:([^\n]*)(.*)", re.DOTALL)

    # iterate over all entries
-    for entry, entry_path, content in entry_iterator(games_path):
+    for entry, entry_path, content in osg.entry_iterator():

        # match with regex
        matches = regex.findall(content)
@ -211,7 +213,7 @@ def fix_entries():
        elements = list(set(elements))

        # get category out
-        for keyword in recommended_keywords:
+        for keyword in osg.recommended_keywords:
            if keyword in elements:
                elements.remove(keyword)
                category = keyword
@ -243,13 +245,13 @@ def fix_entries():

        if new_content != content:
            # write again
-            write_text(entry_path, new_content)
+            utils.write_text(entry_path, new_content)

    # code dependencies
    regex = re.compile(r"(.*)- Code dependencies:([^\n]*)(.*)", re.DOTALL)

    # iterate over all entries
-    for entry, entry_path, content in entry_iterator(games_path):
+    for entry, entry_path, content in osg.entry_iterator():
        # match with regex
        matches = regex.findall(content)

@ -279,13 +281,13 @@ def fix_entries():

        if new_content != content:
            # write again
-            write_text(entry_path, new_content)
+            utils.write_text(entry_path, new_content)

    # build systems
    regex = re.compile(r"(.*)- Build system:([^\n]*)(.*)", re.DOTALL)

    # iterate over all entries
-    for entry, entry_path, content in entry_iterator(games_path):
+    for entry, entry_path, content in osg.entry_iterator():
        # match with regex
        matches = regex.findall(content)

@ -311,7 +313,7 @@ def fix_entries():

        if new_content != content:
            # write again
-            write_text(entry_path, new_content)
+            utils.write_text(entry_path, new_content)


 def update_statistics(infos):
@ -324,7 +326,7 @@ def update_statistics(infos):
    print('update statistics')

    # start the page
-    statistics_file = os.path.join(root_path, 'statistics.md')
+    statistics_file = os.path.join(c.root_path, 'statistics.md')
    statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'

    # total number
@ -524,7 +526,7 @@ def update_statistics(infos):
    statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n'

    # write to statistics file
-    write_text(statistics_file, statistics)
+    utils.write_text(statistics_file, statistics)


 def export_json(infos):
@ -585,9 +587,9 @@ def export_json(infos):
    db['data'] = entries

    # output
-    json_path = os.path.join(games_path, os.path.pardir, 'docs', 'data.json')
+    json_path = os.path.join(c.games_path, os.path.pardir, 'docs', 'data.json')
    text = json.dumps(db, indent=1)
-    write_text(json_path, text)
+    utils.write_text(json_path, text)


 def git_repo(repo):
@ -710,9 +712,9 @@ def export_primary_code_repositories_json():
        primary_repos[k] = sorted(set(v))

    # write them to tools/git
-    json_path = os.path.join(root_path, 'tools', 'archives.json')
+    json_path = os.path.join(c.root_path, 'tools', 'archives.json')
    text = json.dumps(primary_repos, indent=1)
-    write_text(json_path, text)
+    utils.write_text(json_path, text)


 def export_git_code_repositories_json():
@ -739,40 +741,31 @@ def export_git_code_repositories_json():
    urls.sort()

    # write them to tools/git
-    json_path = os.path.join(root_path, 'tools', 'git_repositories.json')
+    json_path = os.path.join(c.root_path, 'tools', 'git_repositories.json')
    text = json.dumps(urls, indent=1)
-    write_text(json_path, text)
+    utils.write_text(json_path, text)


 def sort_text_file(file, name):
    """
    Reads a text file, splits in lines, removes duplicates, sort, writes back.
    """
-    text = read_text(file)
+    text = utils.read_text(file)
    text = text.split('\n')
    text = sorted(list(set(text)), key=str.casefold)
    print('{} contains {} items'.format(name, len(text)))
    text = '\n'.join(text)
-    write_text(file, text)
-
-def strip_url(url):
-    for prefix in ('http://', 'https://'):
-        if url.startswith(prefix):
-            url = url[len(prefix):]
-    for suffix in ('/', '.git'):
-        if url.endswith(suffix):
-            url = url[:-len(suffix)]
-    return url
+    utils.write_text(file, text)

 def clean_backlog(stripped_game_urls):

    # read backlog and split
-    file = os.path.join(root_path, 'tools', 'backlog.txt')
-    text = read_text(file)
+    file = os.path.join(c.root_path, 'tools', 'backlog.txt')
+    text = utils.read_text(file)
    text = text.split('\n')

    # remove those that are in stripped_game_urls
-    text = [x for x in text if strip_url(x) not in stripped_game_urls]
+    text = [x for x in text if utils.strip_url(x) not in stripped_game_urls]

    # remove duplicates and sort
    text = sorted(list(set(text)), key=str.casefold)
@ -780,18 +773,14 @@ def clean_backlog(stripped_game_urls):

    # join and save again
    text = '\n'.join(text)
-    write_text(file, text)
+    utils.write_text(file, text)
+

 if __name__ == "__main__":

-    # paths
-    root_path  = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
-    games_path = os.path.join(root_path, 'games')
-    tocs_path = os.path.join(games_path, 'tocs')
-
    # backlog
-    game_urls = extract_links(games_path)
-    stripped_game_urls = [strip_url(x) for x in game_urls]
+    game_urls = osg.extract_links()
+    stripped_game_urls = [utils.strip_url(x) for x in game_urls]
    clean_backlog(stripped_game_urls)


@ -802,7 +791,7 @@ if __name__ == "__main__":
    fix_entries()

    # assemble info
-    infos = assemble_infos(games_path)
+    infos = osg.assemble_infos()

    # recount and write to readme and to tocs
    update_readme_and_tocs(infos)
@ -823,5 +812,5 @@ if __name__ == "__main__":
    # check_validity_external_links()

    # sort backlog and rejected
-    # sort_text_file(os.path.join(root_path, 'tools', 'backlog.txt'), 'backlog')
-    sort_text_file(os.path.join(root_path, 'tools', 'rejected.txt'), 'rejected games list')
+    # sort_text_file(os.path.join(c.root_path, 'tools', 'backlog.txt'), 'backlog')
+    sort_text_file(os.path.join(c.root_path, 'tools', 'rejected.txt'), 'rejected games list')
--- a/tools/osgameclones_synchronization.py
+++ b/tools/osgameclones_synchronization.py
@ -229,9 +229,9 @@ if __name__ == "__main__":
                    urls = osgc_entry['url']
                    if type(urls) == str:
                        urls = [urls]
+                    urls = [strip_url(url) for url in urls]
                    our_urls = our_entry['home']
-                    our_urls = [x.replace('http://', '').replace('https://', '') for x in our_urls]
-                    urls = [x.replace('http://', '').replace('https://', '') for x in urls]
+                    our_urls = [strip_url(url) for url in our_urls]
                    for url in urls:
                        if url not in our_urls:
                            p += ' home url {} missing\n'.format(url)
@ -309,7 +309,7 @@ if __name__ == "__main__":

            # determine file name
            print('create new entry for {}'.format(osgc_name))
-            file_name = derive_canonical_file_name(osgc_name)
+            file_name = canonical_game_name(osgc_name) + '.md'
            target_file = os.path.join(games_path, file_name)
            if os.path.isfile(target_file):
                print('warning: file {} already existing, save under slightly different name'.format(file_name))
--- a/tools/utils/constants.py
+++ b/tools/utils/constants.py
@ -0,0 +1,12 @@
+"""
+Paths, properties.
+"""
+
+import os
+
+# paths
+root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
+games_path = os.path.join(root_path, 'games')
+tocs_path = os.path.join(games_path, 'tocs')
+
+local_properties_file = os.path.join(root_path, 'local.properties')
--- a/tools/utils/osg.py
+++ b/tools/utils/osg.py
@ -3,8 +3,9 @@ Specific functions working on the games.
 """

 import re
+import os
 from difflib import SequenceMatcher
-from utils.utils import *
+from utils import utils, constants as c

 essential_fields = ('Home', 'State', 'Keywords', 'Code repository', 'Code language', 'Code license')
 valid_fields = ('Home', 'Media', 'State', 'Play', 'Download', 'Platform', 'Keywords', 'Code repository', 'Code language',
@ -19,38 +20,37 @@ def game_name_similarity(a, b):
    return SequenceMatcher(None, str.casefold(a), str.casefold(b)).ratio()


-def entry_iterator(games_path):
+def entry_iterator():
    """

    """

    # get all entries (ignore everything starting with underscore)
-    entries = os.listdir(games_path)
+    entries = os.listdir(c.games_path)

    # iterate over all entries
    for entry in entries:
-        entry_path = os.path.join(games_path, entry)
+        entry_path = os.path.join(c.games_path, entry)

        # ignore directories ("tocs" for example)
        if os.path.isdir(entry_path):
            continue

        # read entry
-        content = read_text(entry_path)
+        content = utils.read_text(entry_path)

        # yield
        yield entry, entry_path, content


-def derive_canonical_file_name(name):
+def canonical_game_name(name):
    """
-    Derives a canonical file name from a game name
+    Derives a canonical game name from an actual game name (suitable for file names, ...)
    """
    name = regex_sanitize_name.sub('', name)
    name = regex_sanitize_name_space_eater.sub('_', name)
    name = name.replace('_-_', '-')
    name = name.casefold()
-    name = name + '.md'
    return name


@ -193,7 +193,7 @@ def parse_entry(content):
    return info


-def assemble_infos(games_path):
+def assemble_infos():
    """
    Parses all entries and assembles interesting infos about them.
    """
@ -204,7 +204,7 @@ def assemble_infos(games_path):
    infos = []

    # iterate over all entries
-    for entry, _, content in entry_iterator(games_path):
+    for entry, _, content in entry_iterator():

        # parse entry
        info = parse_entry(content)
@ -213,12 +213,12 @@ def assemble_infos(games_path):
        info['file'] = entry

        # check canonical file name
-        canonical_file_name = derive_canonical_file_name(info['name'])
+        canonical_file_name = canonical_game_name(info['name']) + '.md'
        # we also allow -X with X =2..9 as possible extension (because of duplicate canonical file names)
        if canonical_file_name != entry and canonical_file_name != entry[:-5] + '.md':
            print('file {} should be {}'.format(entry, canonical_file_name))
-            source_file = os.path.join(games_path, entry)
-            target_file = os.path.join(games_path, canonical_file_name)
+            source_file = os.path.join(c.games_path, entry)
+            target_file = os.path.join(c.games_path, canonical_file_name)
            if not os.path.isfile(target_file):
                pass
                # os.rename(source_file, target_file)
@ -228,7 +228,8 @@ def assemble_infos(games_path):

    return infos

-def extract_links(games_path):
+
+def extract_links():
    """
    Parses all entries and extracts http(s) links from them
    """
@ -238,7 +239,7 @@ def extract_links(games_path):

    # iterate over all entries
    urls = set()
-    for _, _, content in entry_iterator(games_path):
+    for _, _, content in entry_iterator():

        # apply regex
        matches = regex.findall(content)
--- a/tools/utils/osg_github.py
+++ b/tools/utils/osg_github.py
@ -0,0 +1,5 @@
+"""
+Everything specific to the Github API (via PyGithub).
+"""
+
+from github import Github
--- a/tools/utils/utils.py
+++ b/tools/utils/utils.py
@ -9,7 +9,6 @@ import tarfile
 import time
 import urllib.request
 import zipfile
-import errno
 import stat


@ -266,4 +265,34 @@ def unzip(zip_file, destination_directory):
    # done creating files, now update dir dt
    for name in dirs:
        date_time = dirs[name]
-        os.utime(name, (date_time, date_time))
+        os.utime(name, (date_time, date_time))
+
+
+def strip_url(url):
+    for prefix in ('http://', 'https://'):
+        if url.startswith(prefix):
+            url = url[len(prefix):]
+    for prefix in ('www'):
+        if url.startswith(prefix):
+            url = url[len(prefix):]
+    for suffix in ('/', '.git', '/en', '/index.html'):
+        if url.endswith(suffix):
+            url = url[:-len(suffix)]
+    return url
+
+
+def load_properties(filepath, sep='=', comment_char='#'):
+    """
+    Read the file as a properties file (in Java).
+    """
+    properties = {}
+    with open(filepath, "rt") as file:
+        for line in file:
+            line = line.strip()
+            if not line.startswith(comment_char):
+                line = line.split(sep)
+                assert(len(line)==2)
+                key = line[0].strip()
+                value = line[1].strip()
+                properties[key] = value
+    return properties