rename of folder tools to code

2020-01-10 16:46:46 +01:00
parent 30ce6187eb
commit 10625a854a
40 changed files with 30 additions and 25 deletions
--- a/code/README.md
+++ b/code/README.md
@ -0,0 +1,8 @@
+update.py
+---------
+
+Clones and/or pulls many git repositories from the open source games entries, so that one has an archive of them.
+
+Currently requires at least 130 GB space!
+
+Run update.py to update the archive. URLs are stored in archives.json.
--- a/code/archive_detect_submodules.py
+++ b/code/archive_detect_submodules.py
@ -0,0 +1,59 @@
+"""
+Detects the submodules in the Git repositories via "git show HEAD:.gitmodules" and adds them to the list of
+repositories to be checked out. Works on bare repositories.
+"""
+
+import json
+import re
+import urllib.parse
+
+from utils.utils import *
+from utils.archive import *
+
+if __name__ == '__main__':
+
+    regex_submodules = re.compile(r"url = (\S*)", re.MULTILINE)
+
+    # get this folder
+    root_folder = os.path.realpath(os.path.dirname(__file__))
+    archive_folder = os.path.join(root_folder, 'archive')
+
+    # read archives.json
+    text = read_text(os.path.join(root_folder, 'archives.json'))
+    archives = json.loads(text)
+
+    # loop over all git archives
+    submodules = []
+    for repo in archives['git']:
+        git_folder = git_folder_name(repo)
+        folder = os.path.join(archive_folder, 'git', git_folder)
+        if not os.path.isdir(folder):
+            print('Warning: folder {} does not exist'.format(git_folder))
+            continue
+        os.chdir(folder)
+        try:
+            content = subprocess_run(['git', 'show', 'HEAD:.gitmodules'], False)
+        except:
+            continue
+        matches = regex_submodules.findall(content)
+        # resolve relative urls
+        matches = [urllib.parse.urljoin(repo, x) if x.startswith('..') else x for x in matches]
+        submodules.extend(matches)
+
+    # transform git://github.com to https://github.com
+    for a, b in (('git://github.com', 'https://github.com'), ('git@github.com:', 'https://github.com/'), ('git+ssh://git@github.com', 'https://github.com')):
+        submodules = [b + x[len(a):] if x.startswith(a) else x for x in submodules]
+
+    # let all github repos end on ".git"
+    submodules = [x + '.git' if 'github.com' in x and not x.endswith('.git') else x for x in submodules]
+
+    # eliminate those which are duplicates and those which are in archives already
+    submodules = set(submodules) - set(archives['git'])
+    submodules = sorted(list(submodules))
+
+    # TODO single dots are not yet resolved correctly, for example in https://github.com/henkboom/pax-britannica.git
+    submodules = [x for x in submodules if not any([x.startswith(y) for y in ('.', 'git@')])]
+
+    # store them
+    print('found {} submodules'.format(len(submodules)))
+    write_text(os.path.join(root_folder, 'archives.git-submodules.json'), json.dumps(submodules, indent=1))
--- a/code/archive_update.py
+++ b/code/archive_update.py
@ -0,0 +1,178 @@
+"""
+Clones and/or pulls all the gits listed in archives.json
+
+Requires: git executable in the path
+
+Uses 'git clone --mirror' to set up the git locally.
+
+Warning: This may take a long time on the first run and may need a lot of storage space!
+
+TODO are really all existing branches cloned and pulled? (see https://stackoverflow.com/questions/67699/how-to-clone-all-remote-branches-in-git)
+TODO Sourceforge git clone may not work all the time (restarting the script sometimes helps..)
+
+Note: May need to set http.postBuffer (https://stackoverflow.com/questions/17683295/git-bash-error-rpc-failed-result-18-htp-code-200b-1kib-s)
+"""
+
+import json
+
+from utils.utils import *
+from utils.archive import *
+
+
+def git_clone(url, folder):
+    subprocess_run(["git", "clone", "--mirror", url, folder])
+
+
+def git_update(folder):
+    os.chdir(folder)
+    subprocess_run(["git", "fetch", "--all"])
+
+
+def svn_folder_name(url):
+    replaces = {
+        'https://svn.code.sf.net/p': 'sourceforge'
+    }
+    return derive_folder_name(url, replaces)
+
+
+def svn_clone(url, folder):
+    subprocess_run(["svn", "checkout", url, folder])
+
+
+def svn_update(folder):
+    os.chdir(folder)
+    subprocess_run(["svn", "update"])
+
+
+def hg_folder_name(url):
+    replaces = {
+        'https://bitbucket.org': 'bitbucket',
+        'https://hg.code.sf.net/p': 'sourceforge',
+        'http://hg.': ''
+    }
+    return derive_folder_name(url, replaces)
+
+
+def hg_clone(url, folder):
+    subprocess_run(["hg", "clone", url, folder])
+
+
+def hg_update(folder):
+    os.chdir(folder)
+    subprocess_run(['hg', 'pull', '-u'])
+
+
+def run_update(type, urls, type_folder=None):
+    if type_folder is None:
+        type_folder = type
+    print('update {} {} archives'.format(len(urls), type))
+    base_folder = os.path.join(archive_folder, type_folder)
+    if not os.path.exists(base_folder):
+        os.mkdir(base_folder)
+
+    # get derived folder names
+    folders = [folder_name[type](url) for url in urls]
+
+    # find those folders not used anymore
+    existing_folders = [x for x in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, x))]
+    unused_folders = [x for x in existing_folders if x not in folders]
+    print('{} unused archives'.format(len(unused_folders)))
+    if unused_folders:
+        print(unused_folders)
+
+    # new folder, need to clone
+    new_folders = [x for x in folders if x not in existing_folders]
+    print('{} new archives, will clone'.format(len(new_folders)))
+
+    # add root to folders
+    folders = [os.path.join(base_folder, x) for x in folders]
+    os.chdir(base_folder)
+    for folder, url in zip(folders, urls):
+        if not os.path.isdir(folder):
+            print('clone {} into {}'.format(url, folder[len(base_folder):]))
+            try:
+                clone[type](url, folder)
+            except RuntimeError as e:
+                print('error occurred while cloning, will skip')
+
+    # at the end update them all
+    for folder in folders:
+        print('update {}'.format(os.path.basename(folder)))
+        if not os.path.isdir(folder):
+            print('folder not existing, wanted to update, will skip')
+            continue
+        print('update {}'.format(folder[len(base_folder):]))
+        try:
+            update[type](folder)
+        except RuntimeError as e:
+            print('error occurred while updating, will skip')
+
+
+def run_info(type, urls):
+    print('collect info on {}'.format(type))
+
+    # get derived folder names
+    folders = [os.path.join(type, folder_name[type](url)) for url in urls]
+
+    # collect information
+    info = []
+    for folder in folders:
+        print(folder)
+        path = os.path.join(archive_folder, folder)
+        size = folder_size(path) if os.path.isdir(path) else -1
+        info.append([size, folder])
+    return info
+
+
+if __name__ == '__main__':
+
+    supported_types = ['git', 'hg', 'svn']
+
+    folder_name = {
+        'git': git_folder_name,
+        'svn': svn_folder_name,
+        'hg': hg_folder_name,
+    }
+
+    clone = {
+        'git': git_clone,
+        'svn': svn_clone,
+        'hg': hg_clone,
+    }
+
+    update = {
+        'git': git_update,
+        'svn': svn_update,
+        'hg': hg_update,
+    }
+
+    # get this folder
+    root_folder = os.path.realpath(os.path.dirname(__file__))
+    archive_folder = os.path.join(root_folder, 'archive')
+
+    # read archives.json
+    text = read_text(os.path.join(root_folder, 'archives.json'))
+    archives = json.loads(text)
+
+    # read archives.git-submodules.json
+    text = read_text(os.path.join(root_folder, 'archives.git-submodules.json'))
+    archives_git_submodules = json.loads(text)
+
+    # run update on submodules
+    # run_update('git', archives_git_submodules, 'git-submodules')
+
+    # update
+    for type in archives:
+        if type not in supported_types:
+            continue
+        urls = archives[type]
+        run_update(type, urls)
+
+    # collect info
+    infos = []
+    for type in archives:
+        urls = archives[type]
+        infos.extend(run_info(type, urls))
+    infos.sort(key=lambda x: x[0], reverse=True)
+    text = json.dumps(infos, indent=1)
+    write_text(os.path.join(archive_folder, 'infos.json'), text)
--- a/code/archives.git-submodules.json
+++ b/code/archives.git-submodules.json
@ -0,0 +1,556 @@
+[
+ "git://git.guelker.eu/pod-cpp.git",
+ "git://git.guelker.eu/tinyclipboard.git",
+ "git://git.thousandparsec.net/git/schemepy.git",
+ "http://luajit.org/git/luajit-2.0.git",
+ "https://bitbucket.org/ecwolf/sdl.git",
+ "https://bitbucket.org/ecwolf/sdl_mixer-for-ecwolf.git",
+ "https://bitbucket.org/ecwolf/sdl_net.git",
+ "https://bitbucket.org/nyan_developer/nya-engine.git",
+ "https://boringssl.googlesource.com/boringssl",
+ "https://chromium.googlesource.com/breakpad/breakpad",
+ "https://github.com/Aleph-One-Marathon/data-marathon-2.git",
+ "https://github.com/Aleph-One-Marathon/data-marathon-infinity.git",
+ "https://github.com/Aleph-One-Marathon/data-marathon.git",
+ "https://github.com/AmrikSadhra/g3log.git",
+ "https://github.com/AndresTraks/BulletSharpPInvoke.git",
+ "https://github.com/BalazsJako/ImGuiColorTextEdit.git",
+ "https://github.com/BrianGladman/sha.git",
+ "https://github.com/CallumDev/FontConfigSharp.git",
+ "https://github.com/CallumDev/lidgren-network-gen3.git",
+ "https://github.com/DaemonEngine/CBSE-Toolchain.git",
+ "https://github.com/DaemonEngine/breakpad.git",
+ "https://github.com/DaemonEngine/crunch.git",
+ "https://github.com/DaemonEngine/recastnavigation.git",
+ "https://github.com/DusteDdk/list.git",
+ "https://github.com/Extrawurst/cimgui.git",
+ "https://github.com/FIX94/fixNES.git",
+ "https://github.com/FNA-XNA/FNA.git",
+ "https://github.com/Facepunch/Facepunch.Steamworks.git",
+ "https://github.com/GTA-ASM/MirrorLite.git",
+ "https://github.com/GameFoundry/bsf.git",
+ "https://github.com/GentenStudios/bgfx.cmake.git",
+ "https://github.com/GlPortal/RadixEngine.git",
+ "https://github.com/GlPortal/documentation.git",
+ "https://github.com/GlPortal/glportal-data.git",
+ "https://github.com/Grumbel/uitest.git",
+ "https://github.com/Jakz/openmom-editor.git",
+ "https://github.com/JonnyH/glm.git",
+ "https://github.com/JonnyH/libsmacker.git",
+ "https://github.com/JonnyH/miniz.git",
+ "https://github.com/JonnyH/physfs-hg-import.git",
+ "https://github.com/JuliaStrings/utf8proc.git",
+ "https://github.com/KhronosGroup/SPIRV-Headers.git",
+ "https://github.com/KhronosGroup/Vulkan-Headers.git",
+ "https://github.com/LaurentGomila/SFML.git",
+ "https://github.com/LibVNC/libvncserver.git",
+ "https://github.com/Librelancer/Collada141.git",
+ "https://github.com/Lyndir/Pearl.git",
+ "https://github.com/Lyndir/love-lyndir.client.git",
+ "https://github.com/MegaGlest/megaglest-data.git",
+ "https://github.com/MegaGlest/megaglest-masterserver.git",
+ "https://github.com/MegaGlest/mojosetup-fork.git",
+ "https://github.com/Microsoft/DirectXTex.git",
+ "https://github.com/MonoGame/MonoGame.Dependencies.git",
+ "https://github.com/OpenFodder/data.git",
+ "https://github.com/Orphis/boost-cmake.git",
+ "https://github.com/Perlmint/glew-cmake.git",
+ "https://github.com/Ponup/api-cpp-client.git",
+ "https://github.com/Ponup/engine-desktop.git",
+ "https://github.com/REGoth-project/BsZenLib.git",
+ "https://github.com/REGoth-project/CAB-Installer-Extractor.git",
+ "https://github.com/Return-To-The-Roots/languages.git",
+ "https://github.com/Return-To-The-Roots/libendian.git",
+ "https://github.com/Return-To-The-Roots/liblobby.git",
+ "https://github.com/Return-To-The-Roots/libsiedler2.git",
+ "https://github.com/Return-To-The-Roots/libutil.git",
+ "https://github.com/Return-To-The-Roots/mygettext.git",
+ "https://github.com/Return-To-The-Roots/s25edit.git",
+ "https://github.com/Return-To-The-Roots/s25update.git",
+ "https://github.com/RigsOfRods/content.git",
+ "https://github.com/RigsOfRods/ogre-pagedgeometry.git",
+ "https://github.com/Robmaister/SharpFont.git",
+ "https://github.com/Sebanisu/zzzDeArchive.git",
+ "https://github.com/SirCmpwn/fNbt.git",
+ "https://github.com/StbSharp/StbImageSharp.git",
+ "https://github.com/StbSharp/StbImageWriteSharp.git",
+ "https://github.com/SuperTux/SDL_ttf.git",
+ "https://github.com/SuperTux/physfs.git",
+ "https://github.com/SuperTux/sexp-cpp.git",
+ "https://github.com/SuperTux/tinygettext.git",
+ "https://github.com/SuperV1234/SSVLuaWrapper.git",
+ "https://github.com/SuperV1234/SSVMenuSystem.git",
+ "https://github.com/SuperV1234/SSVStart.git",
+ "https://github.com/SuperV1234/SSVUtils.git",
+ "https://github.com/SuperV1234/vrm_cmake.git",
+ "https://github.com/SuperV1234/vrm_pp.git",
+ "https://github.com/TalonBraveInfo/newton-dynamics.git",
+ "https://github.com/TalonBraveInfo/platform.git",
+ "https://github.com/TartanLlama/optional.git",
+ "https://github.com/Tencent/rapidjson.git",
+ "https://github.com/TheAssemblyArmada/BaseConfig.git",
+ "https://github.com/TheAssemblyArmada/Baseconfig.git",
+ "https://github.com/TheAssemblyArmada/CaptainsLog.git",
+ "https://github.com/TheAssemblyArmada/SetSail.git",
+ "https://github.com/TheAssemblyArmada/gamemath.git",
+ "https://github.com/ThePhD/sol2.git",
+ "https://github.com/Try/MoltenTempest.git",
+ "https://github.com/Try/ZenLib.git",
+ "https://github.com/TsudaKageyu/minhook.git",
+ "https://github.com/Unvanquished/libRocket.git",
+ "https://github.com/UnvanquishedAssets/unvanquished_src.dpkdir.git",
+ "https://github.com/ValyriaTear/luabind.git",
+ "https://github.com/ValyriaTear/vt-utils.git",
+ "https://github.com/Wohlstand/libADLMIDI/.git",
+ "https://github.com/aap/geniedoc.git",
+ "https://github.com/accumulators.git",
+ "https://github.com/ajweeks/glm.git",
+ "https://github.com/albertodemichelis/squirrel.git",
+ "https://github.com/alecthomas/entityx.git",
+ "https://github.com/algorithm.git",
+ "https://github.com/align.git",
+ "https://github.com/anttisalonen/libcommon.git",
+ "https://github.com/antze-k/miso.git",
+ "https://github.com/anura-engine/imgui.git",
+ "https://github.com/any.git",
+ "https://github.com/arescentral/antares-data.git",
+ "https://github.com/arescentral/antares-test-data.git",
+ "https://github.com/arescentral/procyon.git",
+ "https://github.com/array.git",
+ "https://github.com/arsenm/sanitizers-cmake.git",
+ "https://github.com/aseprite/freetype2.git",
+ "https://github.com/asio.git",
+ "https://github.com/assert.git",
+ "https://github.com/assign.git",
+ "https://github.com/atomic.git",
+ "https://github.com/atrinik/atrinik-sound.git",
+ "https://github.com/atrinik/resources.git",
+ "https://github.com/auriamg/macdylibbundler.git",
+ "https://github.com/auto_index.git",
+ "https://github.com/bazelregistry/sdl2.git",
+ "https://github.com/bcp.git",
+ "https://github.com/beast.git",
+ "https://github.com/bimap.git",
+ "https://github.com/bind.git",
+ "https://github.com/bjorn/tiled.git",
+ "https://github.com/boost_install.git",
+ "https://github.com/boostbook.git",
+ "https://github.com/boostdep.git",
+ "https://github.com/build.git",
+ "https://github.com/bulletphysics/bullet3.git",
+ "https://github.com/butter-cat-games/cimguilibs.git",
+ "https://github.com/c42f/tinyformat.git",
+ "https://github.com/callable_traits.git",
+ "https://github.com/canta-media.git",
+ "https://github.com/castano/nvidia-texture-tools.git",
+ "https://github.com/cflavio/yyagl.git",
+ "https://github.com/check_build.git",
+ "https://github.com/chocolate-doom/quickcheck.git",
+ "https://github.com/chrono.git",
+ "https://github.com/circular_buffer.git",
+ "https://github.com/cocos2d/bindings-generator.git",
+ "https://github.com/cocos2d/cocos2d-console.git",
+ "https://github.com/cocos2d/cocos2d-html5.git",
+ "https://github.com/colobot/colobot-data.git",
+ "https://github.com/compatibility.git",
+ "https://github.com/compute.git",
+ "https://github.com/conatuscreative/fnalibs.git",
+ "https://github.com/concept_check.git",
+ "https://github.com/config.git",
+ "https://github.com/container.git",
+ "https://github.com/container_hash.git",
+ "https://github.com/context.git",
+ "https://github.com/contract.git",
+ "https://github.com/conversion.git",
+ "https://github.com/convert.git",
+ "https://github.com/core.git",
+ "https://github.com/coroutine.git",
+ "https://github.com/coroutine2.git",
+ "https://github.com/crawl/crawl-fonts.git",
+ "https://github.com/crawl/crawl-freetype.git",
+ "https://github.com/crawl/crawl-libpng.git",
+ "https://github.com/crawl/crawl-lua.git",
+ "https://github.com/crawl/crawl-luajit.git",
+ "https://github.com/crawl/crawl-pcre.git",
+ "https://github.com/crawl/crawl-sdl2-image.git",
+ "https://github.com/crawl/crawl-sdl2-mixer.git",
+ "https://github.com/crawl/crawl-sdl2.git",
+ "https://github.com/crawl/crawl-sqlite.git",
+ "https://github.com/crawl/crawl-zlib.git",
+ "https://github.com/crc.git",
+ "https://github.com/cxong/tinydir.git",
+ "https://github.com/date_time.git",
+ "https://github.com/dbry/adpcm-xq.git",
+ "https://github.com/degenerated1123/ZenLib.git",
+ "https://github.com/degenerated1123/bgfx-cmake.git",
+ "https://github.com/detail.git",
+ "https://github.com/discordapp/discord-rpc.git",
+ "https://github.com/disjoint_sets.git",
+ "https://github.com/dll.git",
+ "https://github.com/docca.git",
+ "https://github.com/dotfloat/fluidsynth-lite.git",
+ "https://github.com/dr-soft/mini_al.git",
+ "https://github.com/droidmonkey/python-cmake-buildsystem.git",
+ "https://github.com/dumganhar/ccs-res.git",
+ "https://github.com/dynamic_bitset.git",
+ "https://github.com/egoboo/egoboo-assets.git",
+ "https://github.com/egoboo/egoboo-external.git",
+ "https://github.com/egoboo/idlib-game-engine.git",
+ "https://github.com/endian.git",
+ "https://github.com/erikd/libsamplerate.git",
+ "https://github.com/etlegacy/etlegacy-libs.git",
+ "https://github.com/exception.git",
+ "https://github.com/fiber.git",
+ "https://github.com/filesystem.git",
+ "https://github.com/flathub/shared-modules.git",
+ "https://github.com/flyweight.git",
+ "https://github.com/fmtlib/fmt.git",
+ "https://github.com/foreach.git",
+ "https://github.com/format.git",
+ "https://github.com/frabert/libdmusic.git",
+ "https://github.com/freeminer/default.git",
+ "https://github.com/freeminer/enet.git",
+ "https://github.com/function.git",
+ "https://github.com/function_types.git",
+ "https://github.com/functional.git",
+ "https://github.com/fusion.git",
+ "https://github.com/fuzzylite/fuzzylite.git",
+ "https://github.com/g-truc/glm.git",
+ "https://github.com/gabomdq/SDL_GameControllerDB.git",
+ "https://github.com/geometry.git",
+ "https://github.com/gerstrong/fheroes2plus.git",
+ "https://github.com/gil.git",
+ "https://github.com/glfw/glfw.git",
+ "https://github.com/gliptic/tl.git",
+ "https://github.com/google/googletest.git",
+ "https://github.com/graph.git",
+ "https://github.com/graph_parallel.git",
+ "https://github.com/grit-engine/grit-bullet.git",
+ "https://github.com/grit-engine/grit-freeimage.git",
+ "https://github.com/grit-engine/grit-lua.git",
+ "https://github.com/grit-engine/grit-ogre.git",
+ "https://github.com/grit-engine/grit-util.git",
+ "https://github.com/grit-engine/grit-windows-prebuilt-dependencies.git",
+ "https://github.com/gulrak/filesystem.git",
+ "https://github.com/hana.git",
+ "https://github.com/hav4ik/tinyai.git",
+ "https://github.com/headers.git",
+ "https://github.com/heap.git",
+ "https://github.com/hhyyrylainen/RubySetupSystem.git",
+ "https://github.com/histogram.git",
+ "https://github.com/hjiang/jsonxx.git",
+ "https://github.com/hof.git",
+ "https://github.com/hoshi10/astar-algorithm-cpp.git",
+ "https://github.com/hydren/rapidxml.git",
+ "https://github.com/icl.git",
+ "https://github.com/inolen/ioq3.git",
+ "https://github.com/inspect.git",
+ "https://github.com/institution/ext.git",
+ "https://github.com/integer.git",
+ "https://github.com/interprocess.git",
+ "https://github.com/interval.git",
+ "https://github.com/intrusive.git",
+ "https://github.com/io.git",
+ "https://github.com/iostreams.git",
+ "https://github.com/iterator.git",
+ "https://github.com/ivucica/glict.git",
+ "https://github.com/ivucica/rules_libsdl12.git",
+ "https://github.com/ivucica/rules_tibia.git",
+ "https://github.com/jarikomppa/soloud.git",
+ "https://github.com/jarro2783/cxxopts.git",
+ "https://github.com/jazztickets/ae.git",
+ "https://github.com/jazztickets/cmake.git",
+ "https://github.com/jhaynie/iphonesim.git",
+ "https://github.com/jonasmr/microprofile.git",
+ "https://github.com/kaadmy/pixture.git",
+ "https://github.com/kcat/openal-soft.git",
+ "https://github.com/kebby/assimp-net.git",
+ "https://github.com/krb5/krb5.git",
+ "https://github.com/kuba--/zip.git",
+ "https://github.com/lairworks/nas2d-core.git",
+ "https://github.com/lambda.git",
+ "https://github.com/laradock/laradock.git",
+ "https://github.com/leethomason/tinyxml2.git",
+ "https://github.com/les-sosna/ios-cmake.git",
+ "https://github.com/lexical_cast.git",
+ "https://github.com/libogg-1.3.0.git",
+ "https://github.com/libretro/libretro-common.git",
+ "https://github.com/libtom/libtommath.git",
+ "https://github.com/libvorbis-1.3.3.git",
+ "https://github.com/lispparser/sexp-cpp.git",
+ "https://github.com/litre.git",
+ "https://github.com/local_function.git",
+ "https://github.com/locale.git",
+ "https://github.com/lockfree.git",
+ "https://github.com/log.git",
+ "https://github.com/logic.git",
+ "https://github.com/logmich/logmich.git",
+ "https://github.com/lubomyr/uae4all2.git",
+ "https://github.com/lubomyr/vice-2.4.git",
+ "https://github.com/lvandeve/lodepng.git",
+ "https://github.com/mat007/turtle.git",
+ "https://github.com/math.git",
+ "https://github.com/meganz/mingw-std-threads.git",
+ "https://github.com/mellinoe/ImGui.NET.git",
+ "https://github.com/memononen/fontstash.git",
+ "https://github.com/metaparse.git",
+ "https://github.com/miloyip/rapidjson.git",
+ "https://github.com/miniupnp/miniupnp.git",
+ "https://github.com/mmatyas/libSDL2pp.git",
+ "https://github.com/mmatyas/unittest-cpp.git",
+ "https://github.com/move.git",
+ "https://github.com/mp11.git",
+ "https://github.com/mpi.git",
+ "https://github.com/mpl.git",
+ "https://github.com/mruby/mruby.git",
+ "https://github.com/msgpack/msgpack-c.git",
+ "https://github.com/msm.git",
+ "https://github.com/multi_array.git",
+ "https://github.com/multi_index.git",
+ "https://github.com/multiprecision.git",
+ "https://github.com/nemtrif/utfcpp.git",
+ "https://github.com/nielsAD/travis-lazarus.git",
+ "https://github.com/nlohmann/json.git",
+ "https://github.com/nothings/stb.git",
+ "https://github.com/nowide.git",
+ "https://github.com/numeric_conversion.git",
+ "https://github.com/numpy/numpydoc.git",
+ "https://github.com/o11c/attoconf.git",
+ "https://github.com/oamldev/oaml.git",
+ "https://github.com/odeint.git",
+ "https://github.com/oolite-binary-resources.git",
+ "https://github.com/oolite-linux-dependencies.git",
+ "https://github.com/oolite-mac-components.git",
+ "https://github.com/oolite-sdl-dependencies.git",
+ "https://github.com/oolite-tests.git",
+ "https://github.com/oolite-windows-dependencies.git",
+ "https://github.com/ooxi/CMake-Gettext.git",
+ "https://github.com/open-source-parsers/jsoncpp.git",
+ "https://github.com/optional.git",
+ "https://github.com/outcome.git",
+ "https://github.com/parameter.git",
+ "https://github.com/parameter_python.git",
+ "https://github.com/paulsapps/Detours.git",
+ "https://github.com/paulsapps/SUDM.git",
+ "https://github.com/paulsapps/TinyXML.git",
+ "https://github.com/paulsapps/boost_1_63_mini.git",
+ "https://github.com/paulsapps/googletest.git",
+ "https://github.com/paulsapps/libdeflate.git",
+ "https://github.com/paulsapps/luabind.git",
+ "https://github.com/paulsapps/nativefiledialog.git",
+ "https://github.com/paulsapps/soxr-0.1.2.git",
+ "https://github.com/paulsapps/sqrat.git",
+ "https://github.com/paulsapps/squirrel.git",
+ "https://github.com/pelya/BasiliskII-android.git",
+ "https://github.com/pelya/Boost-for-Android.git",
+ "https://github.com/pelya/Ninslash.git",
+ "https://github.com/pelya/OpenTTD-JGR-patchpack.git",
+ "https://github.com/pelya/android-keyboard-gadget.git",
+ "https://github.com/pelya/android-shmem.git",
+ "https://github.com/pelya/libiconv-libicu-android.git",
+ "https://github.com/pelya/liero-android.git",
+ "https://github.com/pelya/openarena-engine.git",
+ "https://github.com/pelya/openarena-vm.git",
+ "https://github.com/pelya/openttd-android.git",
+ "https://github.com/pelya/pulseaudio-android.git",
+ "https://github.com/pelya/supertux.git",
+ "https://github.com/pelya/teeworlds.git",
+ "https://github.com/pelya/xmoto.git",
+ "https://github.com/pelya/xserver.git",
+ "https://github.com/performous/compact_enc_det.git",
+ "https://github.com/petroules/solar-cmake.git",
+ "https://github.com/phoenix.git",
+ "https://github.com/poly_collection.git",
+ "https://github.com/polygon.git",
+ "https://github.com/pool.git",
+ "https://github.com/predef.git",
+ "https://github.com/preprocessor.git",
+ "https://github.com/primordialmachine/idlib.git",
+ "https://github.com/process.git",
+ "https://github.com/program_options.git",
+ "https://github.com/property_map.git",
+ "https://github.com/property_tree.git",
+ "https://github.com/proto.git",
+ "https://github.com/ptr_container.git",
+ "https://github.com/pyca/cryptography.git",
+ "https://github.com/python.git",
+ "https://github.com/q-gears/data.git",
+ "https://github.com/q-gears/luajit.git",
+ "https://github.com/qml-box2d/qml-box2d.git",
+ "https://github.com/qnighy/ruby-1.8.1.git",
+ "https://github.com/qnighy/ruby-1.9.2p0.git",
+ "https://github.com/quickbook.git",
+ "https://github.com/qvm.git",
+ "https://github.com/random.git",
+ "https://github.com/range.git",
+ "https://github.com/ratio.git",
+ "https://github.com/rational.git",
+ "https://github.com/rds1983/StbSharp.git",
+ "https://github.com/recastnavigation/recastnavigation.git",
+ "https://github.com/redeclipse/acerspyro.git",
+ "https://github.com/redeclipse/actors.git",
+ "https://github.com/redeclipse/appleflap.git",
+ "https://github.com/redeclipse/blendbrush.git",
+ "https://github.com/redeclipse/caustics.git",
+ "https://github.com/redeclipse/crosshairs.git",
+ "https://github.com/redeclipse/decals.git",
+ "https://github.com/redeclipse/dziq.git",
+ "https://github.com/redeclipse/elyvisions.git",
+ "https://github.com/redeclipse/fonts.git",
+ "https://github.com/redeclipse/freezurbern.git",
+ "https://github.com/redeclipse/john.git",
+ "https://github.com/redeclipse/jojo.git",
+ "https://github.com/redeclipse/jwin.git",
+ "https://github.com/redeclipse/luckystrike.git",
+ "https://github.com/redeclipse/maps.git",
+ "https://github.com/redeclipse/mayhem.git",
+ "https://github.com/redeclipse/mikeplus64.git",
+ "https://github.com/redeclipse/misc.git",
+ "https://github.com/redeclipse/molexted.git",
+ "https://github.com/redeclipse/nieb.git",
+ "https://github.com/redeclipse/nobiax.git",
+ "https://github.com/redeclipse/particles.git",
+ "https://github.com/redeclipse/philipk.git",
+ "https://github.com/redeclipse/projectiles.git",
+ "https://github.com/redeclipse/props.git",
+ "https://github.com/redeclipse/q009.git",
+ "https://github.com/redeclipse/skyboxes.git",
+ "https://github.com/redeclipse/snipergoth.git",
+ "https://github.com/redeclipse/sounds.git",
+ "https://github.com/redeclipse/textures.git",
+ "https://github.com/redeclipse/torley.git",
+ "https://github.com/redeclipse/trak.git",
+ "https://github.com/redeclipse/ulukai.git",
+ "https://github.com/redeclipse/unnamed.git",
+ "https://github.com/redeclipse/vanities.git",
+ "https://github.com/redeclipse/vegetation.git",
+ "https://github.com/redeclipse/weapons.git",
+ "https://github.com/regex.git",
+ "https://github.com/riperiperi/FSOMina.NET.git",
+ "https://github.com/riperiperi/FSOMonoGame.git",
+ "https://github.com/safe_numerics.git",
+ "https://github.com/sakra/cotire.git",
+ "https://github.com/sandsmark/genieutils.git",
+ "https://github.com/satoren/kaguya.git",
+ "https://github.com/schellingb/TinySoundFont.git",
+ "https://github.com/scipy/scipy-sphinx-theme.git",
+ "https://github.com/scope_exit.git",
+ "https://github.com/serialization.git",
+ "https://github.com/sfiera/glfw-gyp.git",
+ "https://github.com/sfiera/gmock-gyp.git",
+ "https://github.com/sfiera/gn-tools.git",
+ "https://github.com/sfiera/libmodplug-gyp.git",
+ "https://github.com/sfiera/libpng-gyp.git",
+ "https://github.com/sfiera/libsfz.git",
+ "https://github.com/sfiera/libsndfile-gyp.git",
+ "https://github.com/sfiera/libzipxx.git",
+ "https://github.com/sfiera/zlib-gn.git",
+ "https://github.com/signals2.git",
+ "https://github.com/singularity/singularity-music-lossless-extended.git",
+ "https://github.com/singularity/singularity-music-lossless-original.git",
+ "https://github.com/singularity/singularity-music.git",
+ "https://github.com/singularity/singularity-osx.git",
+ "https://github.com/singularity/singularity-windows.git",
+ "https://github.com/skaslev/gl3w.git",
+ "https://github.com/skyjake/assimp.git",
+ "https://github.com/smart_ptr.git",
+ "https://github.com/sort.git",
+ "https://github.com/spidermonkey-ff4.git",
+ "https://github.com/spirit.git",
+ "https://github.com/spring/CircuitAI.git",
+ "https://github.com/spring/HughAI.git",
+ "https://github.com/spring/KAIK.git",
+ "https://github.com/spring/Python.git",
+ "https://github.com/spring/Shard.git",
+ "https://github.com/spring/SpringMapConvNG.git",
+ "https://github.com/spring/pr-downloader.git",
+ "https://github.com/spring/pyunitsync.git",
+ "https://github.com/spring1944/submodule-core.git",
+ "https://github.com/spring1944/submodule-customCommands.git",
+ "https://github.com/spring1944/submodule-goals.git",
+ "https://github.com/spring1944/submodule-notAchili.git",
+ "https://github.com/spring1944/submodule-strongpoints.git",
+ "https://github.com/spurious/SDL-mirror.git",
+ "https://github.com/stacktrace.git",
+ "https://github.com/statechart.git",
+ "https://github.com/static_assert.git",
+ "https://github.com/stephank/villain.git",
+ "https://github.com/stepmania/ffmpeg.git",
+ "https://github.com/stepmania/googletest.git",
+ "https://github.com/stepmania/libtomcrypt.git",
+ "https://github.com/stepmania/libtommath.git",
+ "https://github.com/stepmania/ogg.git",
+ "https://github.com/stepmania/vorbis.git",
+ "https://github.com/syoyo/tinyobjloader.git",
+ "https://github.com/system.git",
+ "https://github.com/taisei-project/SDL_GameControllerDB.git",
+ "https://github.com/taisei-project/cglm.git",
+ "https://github.com/team-cube/open-cube-data.git",
+ "https://github.com/teeworlds/teeworlds-maps.git",
+ "https://github.com/teeworlds/teeworlds-translation.git",
+ "https://github.com/test.git",
+ "https://github.com/themanaworld/tmw-music.git",
+ "https://github.com/themanaworld/tmw-tools.git",
+ "https://github.com/thestk/stk.git",
+ "https://github.com/thread.git",
+ "https://github.com/throw_exception.git",
+ "https://github.com/timer.git",
+ "https://github.com/tinygettext/tinygettext.git",
+ "https://github.com/tokenizer.git",
+ "https://github.com/tti.git",
+ "https://github.com/tuple.git",
+ "https://github.com/type_erasure.git",
+ "https://github.com/type_index.git",
+ "https://github.com/type_traits.git",
+ "https://github.com/typeof.git",
+ "https://github.com/ubawurinna/freetype-windows-binaries.git",
+ "https://github.com/ublas.git",
+ "https://github.com/units.git",
+ "https://github.com/unordered.git",
+ "https://github.com/usineur/android-newraw.git",
+ "https://github.com/utility.git",
+ "https://github.com/uuid.git",
+ "https://github.com/variant.git",
+ "https://github.com/variant2.git",
+ "https://github.com/vmd.git",
+ "https://github.com/wave.git",
+ "https://github.com/whoozle/clunk.git",
+ "https://github.com/winapi.git",
+ "https://github.com/wmcbrine/PDCurses.git",
+ "https://github.com/wxWidgets/Catch.git",
+ "https://github.com/wxWidgets/libexpat.git",
+ "https://github.com/wxWidgets/libjpeg-turbo.git",
+ "https://github.com/wxWidgets/libpng.git",
+ "https://github.com/wxWidgets/libtiff.git",
+ "https://github.com/wxWidgets/nanosvg.git",
+ "https://github.com/wxWidgets/zlib.git",
+ "https://github.com/xiph/vorbis.git",
+ "https://github.com/xpressive.git",
+ "https://github.com/yap.git",
+ "https://github.com/zaps166/NFSIISE-ASM.git",
+ "https://github.com/zaps166/NFSIISE-CPP.git",
+ "https://github.com/zeromq/libzmq.git",
+ "https://github.com/zeux/pugixml.git",
+ "https://gitlab.axiodl.com/amuse.git",
+ "https://gitlab.axiodl.com/hecl-gui.git",
+ "https://gitlab.axiodl.com/hecl.git",
+ "https://gitlab.axiodl.com/jbus.git",
+ "https://gitlab.axiodl.com/kabufuda.git",
+ "https://gitlab.axiodl.com/nod.git",
+ "https://gitlab.axiodl.com/specter.git",
+ "https://gitlab.axiodl.com/tinyxml2.git",
+ "https://gitlab.axiodl.com/urde-translations.git",
+ "https://gitlab.com/cyberegoorg/CETech-externals.git",
+ "https://gitlab.com/cyberegoorg/CETech-icons-breeze.git",
+ "https://gitlab.com/hydren/fgeal",
+ "https://gitlab.com/hydren/futil",
+ "https://gitlab.com/hydren/libgeramun",
+ "https://gitlab.com/lierolibre/gvl.git",
+ "https://gitlab.com/lierolibre/lierolibre-data.git",
+ "https://gitlab.com/lierolibre/lierolibre-w32depends.git",
+ "https://gitlab.com/pingus/external/xdg.git",
+ "https://invent.kde.org/kde/rcharactersheet.git",
+ "https://invent.kde.org/kde/rolisteam-common.git",
+ "https://invent.kde.org/kde/rolisteam-diceparser.git",
+ "https://invent.kde.org/kde/rolisteam-plugins.git"
+]
--- a/code/archives.json
+++ b/code/archives.json
--- a/code/backlog.txt
+++ b/code/backlog.txt
@ -0,0 +1,493 @@
+ftp://ftp.tuxpaint.org/unix/ (all of them)
+ftp://ftp.tuxpaint.org/unix/x/
+http://antongerdelan.net/blog/ (other projects besides TestDrive)
+http://cdetect.sourceforge.net/
+http://circularstudios.com/
+http://cyxdown.free.fr/bs/
+http://cyxdown.free.fr/f2b/
+http://dead-code.org/home/
+http://e-adventure.e-ucm.es/login/index.php (games of eAdventure)
+http://ethernet.wasted.ch/
+http://evolonline.org/about
+http://game-editor.com/Main_Page
+http://giderosmobile.com/
+http://haxepunk.com/
+http://hcsoftware.sourceforge.net/jason-rohrer/ (various games there)
+http://hgm.nubati.net/
+http://icculus.org/
+http://icculus.org/asciiroth/
+http://icculus.org/avp/
+http://icculus.org/bitstream/
+http://icculus.org/blackshades/
+http://icculus.org/d2x/
+http://icculus.org/freespace2/
+http://icculus.org/freyja/
+http://icculus.org/hge-unix/ http://hge.relishgames.com/
+http://icculus.org/jugglemaster/
+http://icculus.org/pyddr/
+http://icculus.org/toby/
+http://iichantra.ru/en/
+http://insideastarfilledsky.net/
+http://lazerbears.wixsite.com/lazerbears/cr-editor
+http://libagar.org/agar/index.html.en
+http://libsdl-android.sourceforge.net/
+http://libsdl-android.sourceforge.net/ (Open source games ported to Android)
+http://lukaszjakowski.pl/ (games apart from uMario)
+http://m484games.ucoz.com/
+http://martindrapeau.github.io/backbone-game-engine/
+http://media.pyweek.org/dl/7/ (all of them)
+http://onscripter.osdn.jp/onscripter.html
+http://openapoc.pmprog.co.uk/
+http://pathfinder.wikia.com/wiki/Pathfinder_Roleplaying_Game
+http://playir.com/
+http://pyopengl.sourceforge.net/
+http://pypy.org/ (search for games)
+http://rcbasic.com/
+http://retrospec.sgn.net
+http://roguebasin.roguelikedevelopment.org/index.php?title=Main_Page
+http://sam.zoy.org/monsterz/
+http://senseis.xmp.net/?GoPlayingPrograms
+http://sio2interactive.com/
+http://slick.ninjacave.com/
+http://snowstorm.sourceforge.net/cgi-bin/site.cgi
+http://sol.gfxile.net/ambrose3d/index.html
+http://stage.gamecreation.org/ (all of them)
+http://stellarengine.nongnu.org/games.html (all the games)
+http://storygamecreator.webs.com/
+http://sumwars.org/wiki/Main_Page
+http://svn.assembla.com/svn/gdpl/
+http://turbu-rpg.com/
+http://web.archive.org/web/20071218214256/http://users.tkk.fi/~eye/roguelike/nethack.html
+http://wiki.scummvm.org/index.php/HOWTO-Fangames
+http://ww12.atomicgameengine.com
+http://www.13thmonkey.org/~boris/jgame/
+http://www.abandonia.com/en/games/138/Hidden+Agenda.html
+http://www.abandonia.com/en/games/25660/Crisis+in+the+Kremlin.html
+http://www.andengine.org/
+http://www.astrolog.org/labyrnth/daedalus.htm
+http://www.byond.com/
+http://www.cafu.de/
+http://www.celephais.net/fitzquake/
+http://www.codeskulptor.org/
+http://www.computerchess.org.uk/ccrl/4040/rating_list_all.html
+http://www.dianneandpaul.net/CSBwin/
+http://www.divgo.net/
+http://www.dosbox.com/download.php?main=1
+http://www.drpetter.se/project_sfxr.html
+http://www.dungeoncrawl.org/?d.l
+http://www.finkproject.org/index.php?phpLang=en (do they have games)
+http://www.freemmorpgmaker.com/
+http://www.gameplay3d.io/
+http://www.garagegames.com/products/torque-3d
+http://www.gnu.org/ (search for games)
+http://www.happypenguin.org/show?XKobo (offline)
+http://www.hard-light.net/
+http://www.ibiblio.org/pub/Linux/games/strategy/ (and all others)
+http://www.ifwiki.org/index.php/Main_Page
+http://www.indiedb.com/engines/aqua
+http://www.indiedb.com/engines/bloxel/downloads
+http://www.indiedb.com/engines/dragonsource
+http://www.indiedb.com/engines/gameleon
+http://www.indiedb.com/engines/mirage-legacy-2d-orpg-engine
+http://www.indiedb.com/engines/mugen
+http://www.indiedb.com/engines/prospekt-source
+http://www.indiedb.com/engines/ymir
+http://www.indiedb.com/engines/zweide/downloads
+http://www.isogenicengine.com/
+http://www.j4game.com/
+http://www.kiwijs.org/
+http://www.klinksoftware.com/
+http://www.layabox.com/en/
+http://www.lesfleursdunormal.fr/static/informatique/old/index_en.html
+http://www.linux-games.com/ (all there)
+http://www.linuxdevcenter.com/pub/a/linux/2003/04/24/exult.html
+http://www.mekwars.org/
+http://www.myandroidonline.com/category/games/
+http://www.netgore.com/
+http://www.newbreedsoftware.com/ (all)
+http://www.oletus.fi/games/
+http://www.paulscode.com/games/
+http://www.ph2.net/zugspiel/
+http://www.plasmapong.com/plasma-pong/plasma-pong/
+http://www.roguebasin.com (all)
+http://www.roguebasin.com/index.php?title=Advanced_Rogue
+http://www.roguebasin.com/index.php?title=Category:Roguelike_games
+http://www.roguebasin.com/index.php?title=UltraRogue
+http://www.roguebasin.com/index.php?title=XRogue
+http://www.rpgtoolkit.net/
+http://www.rtsoft.com/novashell/
+http://www.sandboxgamemaker.com/
+http://www.scirra.com/construct-classic
+http://www.siedler25.org/index.php?com=dynamic&mod=1&lang=en&PHPSESSID=0hmj4aug1fqa80jbk15kcafh71
+http://www.silversecond.com/WolfRPGEditor/
+http://www.sourceforge.net/projects/ika
+http://www.sourceforge.net/projects/lgames (all of them)
+http://www.sourceforge.net/projects/stratagus
+http://www.spheredev.org/
+http://www.surfline.ne.jp/hachi/xsoldier.html
+http://www.usgo.org/go-software
+http://www.vbgore.com/Main_Page
+http://www.zeldaroth.fr/us/zroth.php
+http://www.zsnes.com/ (emulator/platform)
+https://001gamecreator.com/
+https://agateau.com/games/
+https://aleph-one-marathon.github.io/
+https://alternativeto.net/ (query with only open source as opten)
+https://app.assembla.com/spaces/openrpg/wiki
+https://app.assembla.com/spaces/Project_Valkyrie2/wiki
+https://appimage.github.io/categories/Game
+https://appimage.org/
+https://archive.codeplex.com/?p=turnota
+https://archive.codeplex.com/?p=voxeliq
+https://archive.org/details/Gna_code_hosting (all of them)
+https://blenderartists.org/t/devils-pinball/552785
+https://blends.debian.org/games/tasks/
+https://blends.debian.org/games/tasks/racing
+https://carrot.soulweaver.fi/#links
+https://cocainediesel.fun/
+https://code.google.com/archive/p/galacticthrone/
+https://code.google.com/archive/p/primitivewars/
+https://code.google.com/archive/p/scummgen/
+https://code.google.com/archive/p/tecbattle/
+https://code.launchpad.net/~flosoft/s25rttr/trunk
+https://conquertheworldbycode.wordpress.com/nostalgia-2/
+https://directory.fsf.org/wiki/Collection:GNOME_Games (all of them)
+https://dragengine.rptd.ch/
+https://eblong.com/zarf/twilight/index.html
+https://edu.kde.org/
+https://empiredirectory.net/
+https://empiredirectory.net/index.php/downloads/viewdownload/6-server-software/13-empire-server
+https://en.wikipedia.org/w/index.php?title=GNU_Backgammon&action=edit&redlink=1
+https://en.wikipedia.org/w/index.php?title=Golden_Age_of_Civilizations&action=edit&redlink=1
+https://en.wikipedia.org/w/index.php?title=Kdegames&action=edit&redlink=1
+https://en.wikipedia.org/w/index.php?title=Simon_Tatham%27s_Portable_Puzzle_Collection&action=edit&redlink=1
+https://en.wikipedia.org/w/index.php?title=Simple_Solitaire_Collection&action=edit&redlink=1
+https://en.wikipedia.org/w/index.php?title=Trigger_Rally&action=edit&redlink=1
+https://en.wikipedia.org/wiki/Crystal_Space
+https://en.wikipedia.org/wiki/GNOME_Games_Collection
+https://en.wikipedia.org/wiki/List_of_commercial_video_games_with_available_source_code
+https://en.wikipedia.org/wiki/M.U.G.E.N
+https://en.wikipedia.org/wiki/MUD#Spread (all there)
+https://en.wikipedia.org/wiki/MUD_client (all there)
+https://en.wikipedia.org/wiki/No_Gravity_(video_game)
+https://en.wikipedia.org/wiki/One_Hour_One_Life
+https://en.wikipedia.org/wiki/Passage_(video_game)
+https://en.wikipedia.org/wiki/Rocks%27n%27Diamonds
+https://en.wikipedia.org/wiki/Rogue_(video_game)
+https://en.wikipedia.org/wiki/Teeworlds
+https://enigma-dev.org/about.htm
+https://faq.tuxfamily.org/Games/En
+https://fedoraproject.org/wiki/SIGs/Games#List_of_games_we_will_NOT_package
+https://flathub.org/home (use it for Linux packaging) / https://flathub.org/apps/category/Game
+https://forums.scummvm.org/viewtopic.php?t=13512&highlight=open+source
+https://freegamer.blogspot.com (maybe there is something interesting)
+https://futurepinball.com/
+https://fydo.net/projects/island-rescue (and other projects there)
+https://gamejolt.com/ (search there)
+https://games.kde.org/ (all of them)
+https://games.kde.org/old/kde_arcade.php
+https://gdevelop-app.com/
+https://github.com/00-Evan/shattered-pixel-dungeon
+https://github.com/00-Evan/shattered-pixel-dungeon-gdx
+https://github.com/acedogblast/Project-Uranium-Godot
+https://github.com/AdaDoom3/AdaDoom3
+https://github.com/AdamsLair/duality
+https://github.com/Alzter/TuxBuilder
+https://github.com/amerkoleci/Vortice.Windows
+https://github.com/arturkot/the-house-game
+https://github.com/asweigart/PythonStdioGames
+https://github.com/AtomicGameEngine/AtomicGameEngine
+https://github.com/atphalix/nexuiz
+https://github.com/azhirnov/FrameGraph
+https://github.com/benl23x5/gloss
+https://github.com/bernardosulzbach/dungeon
+https://github.com/bioglaze/aether3d
+https://github.com/bomblik/BlockOut_II_PSVITA
+https://github.com/bsmr-games (also contains copies)
+https://github.com/Calinou/awesome-gamedev
+https://github.com/CatacombGames/
+https://github.com/cflewis/Infinite-Mario-Bros
+https://github.com/Chluverman/android-gltron
+https://github.com/codenamecpp/carnage3d
+https://github.com/collections/game-engines (only OS)
+https://github.com/collections/javascript-game-engines (only OS)
+https://github.com/collections/pixel-art-tools (tools)
+https://github.com/collections/productivity-tools (maybe for statistical purposes)
+https://github.com/collections/software-development-tools (maybe we can apply some of them across the board)
+https://github.com/collections/tools-for-open-source (maybe we can apply some)
+https://github.com/collections/web-games (only OS)
+https://github.com/collinhover/kaiopua
+https://github.com/cookgreen/Yuris-Revenge
+https://github.com/Cortrah/SpaceOperaDesign, https://github.com/Cortrah/SpaceOperaRuby/blob/master/design/turnstyles.md
+https://github.com/cping/LGame
+https://github.com/cymonsgames/CymonsGames (collection)
+https://github.com/DaanVanYperen/artemis-odb-contrib
+https://github.com/DeflatedPickle/FAOSDance
+https://github.com/delaford/game
+https://github.com/Donerkebap13/DonerComponents
+https://github.com/Drasky-Vanderhoff/CommonDrops
+https://github.com/EaW-Team/equestria_dev
+https://github.com/ec-/Quake3e
+https://github.com/EliFUT/android
+https://github.com/elishacloud/Silent-Hill-2-Enhancements
+https://github.com/endlesstravel/Love2dCS
+https://github.com/enduro2d/enduro2d
+https://github.com/enginmanap/limonEngine
+https://github.com/ErikLetson/torso-ninja
+https://github.com/ErikLetson/torso-ninja-2
+https://github.com/ezEngine/ezEngine
+https://github.com/fallahn/xygine
+https://github.com/FaronBracy/RogueSharp
+https://github.com/fegennari/3DWorld
+https://github.com/flathub (all repositories which are games there)
+https://github.com/FUSEEProjectTeam/Fusee
+https://github.com/fynnfluegge/oreon-engine
+https://github.com/Galapix/galapix
+https://github.com/gamearians
+https://github.com/GamedevFramework/gf
+https://github.com/gan74/Yave
+https://github.com/GlPortal/RadixEngine
+https://github.com/gnFur/Monofoxe
+https://github.com/GNOME/quadrapassel
+https://github.com/godot-extended-libraries/godot-next
+https://github.com/godot-mega-man/Mega-Man-Engine
+https://github.com/godotengine/godot-design
+https://github.com/grantjenks/free-python-games (check all)
+https://github.com/H-uru/Plasma
+https://github.com/Hotride/OrionUO
+https://github.com/hparcells/cards-against-humanity
+https://github.com/i42output/neoGFX
+https://github.com/ictrobot/Cubes
+https://github.com/id-Software
+https://github.com/Illation/ETEngine
+https://github.com/Im-dex/xray-162
+https://github.com/jasonrohrer (add gits to his games)
+https://github.com/jatinmandav/Gaming-in-Python
+https://github.com/JohanLi/uncharted-waters-2
+https://github.com/JohnLamontagne/Lunar-Engine
+https://github.com/junkdog/artemis-odb
+https://github.com/jwvhewitt/gearhead-caramel.git
+https://github.com/KDE
+https://github.com/kduske/TrenchBroom
+https://github.com/kimkulling/osre
+https://github.com/klaussilveira/ioquake3.js
+https://github.com/Kosmonaut3d/DeferredEngine
+https://github.com/LgLinus/StrategyGame
+https://github.com/libretro/libretro-chailove
+https://github.com/libretro/libretro-prboom
+https://github.com/ligurio/awesome-ttygames
+https://github.com/luciopanepinto/pacman
+https://github.com/MarcoLizza/tofu-engine
+https://github.com/MarilynDafa/Bulllord-Engine
+https://github.com/MatthewTheGlutton/HideousDestructor
+https://github.com/McKay42/McOsu
+https://github.com/megamarc/Tilengine
+https://github.com/mewo2/terrain
+https://github.com/mofr/Diablerie
+https://github.com/moonwards1/Moonwards-Virtual-Moon
+https://github.com/morganbengtsson/mos
+https://github.com/MrFrenik/Enjon
+https://github.com/MultiCraft/MultiCraft
+https://github.com/MustaphaTR/Romanovs-Vengeance
+https://github.com/ogarcia/opensudoku
+https://github.com/OGRECave/scape
+https://github.com/OpenMandrivaAssociation
+https://github.com/OpenMandrivaAssociation/nexuiz/blob/master/nexuiz.spec
+https://github.com/OpenRA/d2
+https://github.com/OpenRA/OpenRAModSDK
+https://github.com/opensourcedesign
+https://github.com/opentrack/opentrack
+https://github.com/OSSGames
+https://github.com/OSSGames (all there, but we should have them already)
+https://github.com/Patapom/GodComplex
+https://github.com/PavelDoGreat/WebGL-Fluid-Simulation
+https://github.com/perbone/luascript
+https://github.com/pixijs/pixi.js
+https://github.com/pld-linux
+https://github.com/pld-linux/nexuiz/blob/master/nexuiz.spec
+https://github.com/PolygonTek/BlueshiftEngine
+https://github.com/ppizarror/pygame-menu
+https://github.com/prime31/Nez-Samples
+https://github.com/psuong/ig-developer-console
+https://github.com/qiciengine/qiciengine
+https://github.com/Quaver/Wobble
+https://github.com/rakugoteam/Rakugo
+https://github.com/rds1983/Myra
+https://github.com/redomar/JavaGame
+https://github.com/Renanse/Ardor3D
+https://github.com/RetroAchievements/RALibretro
+https://github.com/RetroAchievements/RAWeb
+https://github.com/rizwan3d/MotoGameEngine
+https://github.com/rlguy/FantasyMapGenerator
+https://github.com/rotators/Fo1in2
+https://github.com/RoxasShadow/Sottaceto
+https://github.com/rramsden/ymir
+https://github.com/sabresaurus/SabreCSG
+https://github.com/SadConsole/SadConsole
+https://github.com/salvadorc17/Prince-Monogame
+https://github.com/SanderMertens/flecs
+https://github.com/saniv/free-game-art/blob/master/foss-copyright-infringement-records.md
+https://github.com/scp-fs2open/fs2open.github.com
+https://github.com/search?p=1&q=sunrider&type=Repositories, sunrider
+https://github.com/senior-sigan/WHY_CPP
+https://github.com/septag/glslcc
+https://github.com/septag/rizz
+https://github.com/skypjack/entt
+https://github.com/smlinux/nexuiz
+https://github.com/SPC-Some-Polish-Coders/PopHead
+https://github.com/SPC-Some-Polish-Coders/PopHead/
+https://github.com/stackos/Viry3D
+https://github.com/stolencatkarma/CataclysmLD
+https://github.com/Suprcode/mir3-zircon
+https://github.com/Sygmei/ObEngine
+https://github.com/tainicom/Aether.Physics2D
+https://github.com/tangziwen/Cube-Engine
+https://github.com/the-insulines
+https://github.com/theaigames
+https://github.com/TheGameCreators/GameGuruRepo
+https://github.com/Tinob/Ishiiruka (https://github.com/shiiion/Ishiiruka, https://github.com/SirMangler/PrimeHack-Updater)
+https://github.com/tizian/Cendric2
+https://github.com/TomBebb/awe
+https://github.com/topics/top-down-shooter
+https://github.com/untakenstupidnick/nbsdgames (Blockout II)
+https://github.com/untakenstupidnick/nbsdgames (Cross-platform ncurses/pdcurses based games under active development)
+https://github.com/Venom0us/Emberpoint
+https://github.com/videogamepreservation
+https://github.com/vocollapse/Blockinger
+https://github.com/WagicProject/wagic
+https://github.com/wesnoth/haldric
+https://github.com/WohlSoft/PGE-Project
+https://github.com/xrOxygen/xray-oxygen
+https://github.com/xtreme8000/BetterSpades
+https://github.com/YuriiSalimov/15-puzzle
+https://github.com/Zal0/ZGB
+https://github.com/zurn/zapper (or any other tapper clone)
+https://gitlab.com/LibreGames
+https://gitlab.com/nyov/nyovs-nexuiz
+https://gitlab.com/vgstation/vgstation-old (and vgstation in general)
+https://gitlab.com/xonlegacy/xonlegacy
+https://gitlab.openrsc.com/open-rsc
+https://gitorious.org/index-list.html (are there other interesting games)
+https://gottcode.org/ (all)
+https://itch.io/ (scrape, look for open source)
+https://jcrpg.blogspot.de/
+https://jotd.pagesperso-orange.fr/ (several games)
+https://kde.org/applications/games/ (all of them)
+https://lgdb.org/ (ask on reddit for archived version)
+https://lgdb.org/engine/rawgl
+https://lgdb.org/game/katawa_shoujo
+https://lgdb.org/game/our-personal-space
+https://lgdb.org/game/sunrider
+https://lgdb.org/game/visions_other_side
+https://libregamewiki.org/Alexei:_Part_IX
+https://libregamewiki.org/CameliaGirls
+https://libregamewiki.org/Childish_Cannoneer
+https://libregamewiki.org/index.php?title=Libregamewiki_talk:Community_Portal&oldid=25410#Some_long_term_ideas
+https://libregamewiki.org/Libregamewiki:Suggested_games#Likely_sources_for_more_free_games
+https://lmemsm.dreamwidth.org/8013.html (List of some of my favorite Open Source games)
+https://love2d.org/forums/viewforum.php?f=14 (check them if time)
+https://odr.chalmers.se/handle/20.500.12380/219006
+https://osdn.net/softwaremap/trove_list.php?form_cat=80
+https://packages.debian.org/sid/games/etw
+https://pixeldoctrine.com/index.html (more than just slime volleyball)
+https://play.google.com/store/apps/dev?id=8387589286898375037 (where is the source, not open source otherwise)
+https://playcanvas.com/
+https://pygame.org/project-Dododu-1134-.html
+https://pygame.org/project-Lands+of+Elderlore-294-.html
+https://pygame.org/project-LOF-2296-.html
+https://pygame.org/project-Mines+of+Elderlore-674-.html
+https://pygame.org/project-Monstrosity-2993-.html
+https://pygame.org/project-Phantasy+Star+Rebirth-1704-.html
+https://pygame.org/project-Ransack-2408-.html
+https://pygame.org/project-The+Cabbages-2077-.html
+https://pypi.org/project/CherryPy/
+https://pypi.org/project/neteria/
+https://pypi.org/project/networkx/
+https://pypi.org/project/PyAMF/
+https://pypi.org/project/simplejson/
+https://pypi.org/project/SQLAlchemy/
+https://pypi.org/project/TurboGears/
+https://pyweek.org/4/entries/ (Ascent of Justice)
+https://repology.org/ (for looking up other repositories of games)
+https://revolutionarygamesstudio.com/ Trhive
+https://rpgmaker.net/engines/rpg20xx/
+https://rpgmaker.net/games/2664/
+https://salsa.debian.org/games-team/etw
+https://scorched-moon.github.io/site/
+https://scratch.mit.edu/ (https://en.scratch-wiki.info/wiki/Scratch_Source_Code)
+https://senseis.xmp.net/?GNUGo
+https://sites.google.com/site/syllablesoftware/games (List of games (with screenshots) that have been ported to Syllable OS)
+https://sourceforge.net/directory/games/games/os:windows/
+https://sourceforge.net/projects/dices/
+https://sourceforge.net/projects/dsa-hl.berlios/
+https://sourceforge.net/projects/lgames/files/barrage/
+https://sourceforge.net/projects/moeng/
+https://sourceforge.net/projects/openzelda/
+https://sourceforge.net/projects/palomino-sim/ (http://www.jimbrooks.org/archive/software/palomino/)
+https://sourceforge.net/projects/pipmak/
+https://sourceforge.net/projects/qua/
+https://sourceforge.net/projects/quest/
+https://sourceforge.net/projects/roguelike/ (collection of other rogue-like sources https://sourceforge.net/projects/roguelike/files/, https://web.archive.org/web/20180418191859/http://rogue.rogueforge.net/)
+https://sourceforge.net/projects/rpgstudio/
+https://sourceforge.net/projects/rpgtoolkit/
+https://sourceforge.net/projects/rptools.berlios/
+https://sourceforge.net/projects/solarconflict/
+https://sourceforge.net/projects/spaxehilk/
+https://sourceforge.net/projects/sphere/
+https://sourceforge.net/projects/tbswwengine/
+https://sourceforge.net/projects/terratenebrae/
+https://sourceforge.net/projects/tyrant/
+https://sourceforge.net/projects/untitled-rpg/
+https://sourceforge.net/projects/wesnoth-on-android/
+https://sourceforge.net/projects/wme/
+https://sourceforge.net/projects/wowrl/
+https://sourceforge.net/projects/wwiig/
+https://sourceforge.net/projects/ztab/
+https://stabyourself.net/ (all projects there)
+https://stendhalgame.org/wiki/Marauroa
+https://tangramgames.dk/games/ (all)
+https://threejs.org/
+https://unity.com/
+https://verge-rpg.com/
+https://voltisoft.com/miragelegacy
+https://web.archive.org/web/20060411054656/http://home.gna.org/oomadness/en/index.html
+https://web.archive.org/web/20150526225753/http://sourceforge.net/projects/mameosx/
+https://web.archive.org/web/20150527150705/http://sourceforge.net/p/forge/community-docs/SourceForge%20Games/ (schon erledigt)
+https://web.archive.org/web/20170714065728/http://users.olis.net.au/zel/
+https://wiki.debian.org/Games/Unsuitable
+https://www.adventuregamestudio.co.uk/
+https://www.allegro.cc/depot/strategy/ (and all other genres there)
+https://www.b3dgs.com/v7/page.php?lang=en&section=projects
+https://www.bennugd.org/
+https://www.blender.org/
+https://www.bsframework.io/
+https://www.choiceofgames.com/make-your-own-games/choicescript-intro/
+https://www.duality2d.net
+https://www.fandom.com/topics/games (look for media links, example https://creatures.fandom.com/wiki/Openc2e)
+https://www.fosshub.com/about.html
+https://www.fosslinux.com/
+https://www.freegamesutopia.com/
+https://www.frictionalgames.com/forum/forum-28.html
+https://www.glitchthegame.com/ (Glitch the game, by Tiny Speck Inc. public domain)
+https://www.libretro.com/
+https://www.libsdl.org/
+https://www.moddb.com/ (scrape, look for open source)
+https://www.moddb.com/engines/pyrogenesis
+https://www.moddb.com/engines/sage-strategy-action-game-engine
+https://www.moddb.com/mods/ (search for all)
+https://www.musztardasarepska.pl/wgdown/
+https://www.ness-engine.com/
+https://www.openhub.net/ (search for games)
+https://www.phpbb.com/
+https://www.piston.rs/
+https://www.reddit.com/r/opensource/comments/cs6m3y/a_list_of_fossrelated_projects_people_to_follow/
+https://www.reddit.com/r/opensourcegames/
+https://www.reddit.com/r/opensourcegames/comments/celw6c/top_3_open_source_pinball_games_2015/
+https://www.renpy.org/ (also all projects linked there)
+https://www.scirra.com/construct2
+https://www.seul.org/~grumbel/tmp/clanlib/games.html
+https://www.tapatalk.com/groups/imperilist/
+https://www.tuxfamily.org/ (if there are games)
+https://www.wurfelengine.net/
+https://zdoom.org/downloads (gzdoom, lzdoom)
+https://zope.readthedocs.io/en/latest/
--- a/code/conversions/aatraders.json
+++ b/code/conversions/aatraders.json
@ -0,0 +1,18 @@
+["https://sourceforge.net/projects/aatrade/files/AATraders%200.4x/aatrade_0.40_re-release.zip/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.4x/aatrade_0.40.0_release.zip/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.3x/Alien%20Assault%20Traders%200.31.0/aatrade_0.31.0_release.zip/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.3x/Alien%20Assault%20Traders%200.30.3/aatrade_0.30.3_release.zip/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.3x/Alien%20Assault%20Traders%200.30.2/aatrade_0.30.2_release.zip/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.3x/Alien%20Assault%20Traders%200.30.1/aatrade_0.30.1_release.zip/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.3x/Alien%20Assault%20Traders%200.30/aatrade_0.30_release.zip/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.2x%20Final/AAtraders%200.21a%20Final%20Patch/AATRade_0.21a_final.zip/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.2x%20Final/AATraders%200.21%20Release/aatrade-0.21.tar.gz/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.2x%20Final/AATrade%200.20%20Release/aatrade-0.20.tar.gz/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.1x%20Final/Release%200.14.1%20w_Profiles/aatrade-0.14.1.tar.gz/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.1x%20Final/Release%200.14%20w_Profiles/aatrade-0.14.tar.gz/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.1x%20Final/Release%20v0.13/aatrade-0.13.tar.gz/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.1x%20Final/Recommended%20Release%20v0.12/aatrade-0.12.tar.gz/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.1x%20Final/Release%20v0.11/aatrade-0.11.tar.gz/download",
+"https://sourceforge.net/projects/aatrade/files/AATraders%200.1x%20Final/Initial%20Release%20v0.10/aatrade-0.10.tar.gz/download",
+"https://sourceforge.net/projects/aatrade/files/OldFiles/aatrade0.21final.zip/download",
+"https://sourceforge.net/projects/aatrade/files/OldFiles/aatrade0.21patched.zip/download"]
--- a/code/conversions/aatraders_source_release_to_git.py
+++ b/code/conversions/aatraders_source_release_to_git.py
@ -0,0 +1,156 @@
+"""
+    Downloads source releases from Sourceforge and puts them into a git repository
+"""
+
+import json
+import datetime
+from utils.utils import *
+
+def special_aatrade_package_extraction(source):
+    """
+    Unpacks "aatrade_packages".
+    """
+    files = os.listdir(source)
+    if any([x.startswith('aatrade_package') for x in files]):
+        # we got the special case
+        print('aatrade package extraction of {}'.format(source))
+
+        # first delete all, that do not begin with the package name
+        for file in files:
+            if not file.startswith('aatrade_package'):
+                os.remove(os.path.join(source, file))
+
+        # second extract all those with are left, removing them too
+        files = os.listdir(source)
+        for file in files:
+            try:
+                extract_archive(os.path.join(source, file), source, 'tar')
+            except:
+                extract_archive(os.path.join(source, file), source, 'zip')
+            os.remove(os.path.join(source, file))
+
+
+if __name__ == '__main__':
+
+    # base path is the directory containing this file
+    base_path = os.path.abspath(os.path.dirname(__file__))
+    print('base path={}'.format(base_path))
+
+    # recreate archive path
+    archive_path = os.path.join(base_path, 'downloads')
+    if not os.path.exists(archive_path):
+        os.mkdir(archive_path)
+
+    # load source releases urls
+    with open(os.path.join(base_path, 'aatraders.json'), 'r') as f:
+        urls = json.load(f)
+    print('will process {} urls'.format(len(urls)))
+    if len(urls) != len(set(urls)):
+        raise RuntimeError("urls list contains duplicates")
+
+    # determine file archives from urls
+    archives = [x.split('/')[-2] for x in urls]
+    if len(archives) != len(set(archives)):
+        raise RuntimeError("files with duplicate archives, cannot deal with that")
+
+    # determine version from file name
+    versions = [determine_archive_version_generic(x, leading_terms=['aatrade_', 'aatrade-', 'aatrade'], trailing_terms=['.zip', '.tar.gz', '_release']) for x in archives]
+    for version in versions:
+        print(version)
+
+    # extend archives to full paths
+    archives = [os.path.join(archive_path, x) for x in archives]
+
+    # download them
+    print('download source releases')
+    for url, destination in zip(urls, archives):
+        # only if not yet existing
+        if os.path.exists(destination):
+            continue
+        # download
+        print('  download {}'.format(os.path.basename(destination)))
+        download_url(url, destination)
+
+    # extract them
+    print('extract downloaded archives')
+    extracted_archives = [x + '-extracted' for x in archives]
+    for archive, extracted_archive in zip(archives, extracted_archives):
+        print('  extract {}'.format(os.path.basename(archive)))
+        # only if not yet existing
+        if os.path.exists(extracted_archive):
+            continue
+        os.mkdir(extracted_archive)
+        # extract
+        extract_archive(archive, extracted_archive, detect_archive_type(archive))
+
+    # go up in unzipped archives until the very first non-empty folder
+    extracted_archives = [strip_wrapped_folders(x) for x in extracted_archives]
+
+    # special 'aatrade_packageX' treatment
+    for extracted_archive in extracted_archives:
+        special_aatrade_package_extraction(extracted_archive)
+
+    # calculate size of folder
+    sizes = [folder_size(x) for x in extracted_archives]
+
+    # determine date
+    dates = [determine_latest_last_modified_date(x) for x in extracted_archives]
+    dates_strings = [datetime.datetime.fromtimestamp(x).strftime('%Y-%m-%d') for x in dates]
+    # if len(dates_strings) != len(set(dates_strings)):
+    #     raise RuntimeError("Some on the same day, cannot cope with that")
+
+    # gather all important stuff in one list and sort by dates and throw those out where size is not in range
+    db = list(zip(urls, extracted_archives, versions, dates, dates_strings, sizes))
+    db.sort(key=lambda x:x[3])
+
+    size_range = [5e6, float("inf")] # set to None if not desired
+    if size_range:
+        db = [x for x in db if size_range[0] <= x[5] <= size_range[1]]
+
+    print('proposed order')
+    for url, _, version, _, date, size in db:
+        print('  date={} version={} size={}'.format(date, version, size))
+
+    # git init
+    git_path = os.path.join(base_path, 'aatrade')
+    if os.path.exists(git_path):
+        shutil.rmtree(git_path)
+    os.mkdir(git_path)
+    os.chdir(git_path)
+    subprocess_run(['git', 'init'])
+    subprocess_run(['git', 'config', 'user.name', 'Trilarion'])
+    subprocess_run(['git', 'config', 'user.email', 'Trilarion@users.noreply.gitlab.com'])
+
+    # now process revision by revision
+    print('process revisions')
+    git_author = 'akapanamajack, tarnus <akapanamajack_tarnus@user.sourceforge.net>'
+    for url, archive_path, version, _, date, _ in db:
+        print('  process version={}'.format(version))
+
+        # clear git path without deleting .git
+        print('    clear git')
+        for item in os.listdir(git_path):
+            # ignore '.git
+            if item == '.git':
+                continue
+            item = os.path.join(git_path, item)
+            if os.path.isdir(item):
+                shutil.rmtree(item)
+            else:
+                os.remove(item)
+
+        # copy unpacked source files to git path
+        print('copy to git')
+        copy_tree(archive_path, git_path)
+
+        # update the git index (add unstaged, remove deleted, ...)
+        print('git add')
+        os.chdir(git_path)
+        subprocess_run(['git', 'add', '--all'])
+
+        # perform the commit
+        print('git commit')
+        os.chdir(git_path)
+        message = 'version {} ({}) on {}'.format(version, url, date)
+        print('  message "{}"'.format(message))
+        subprocess_run(['git', 'commit', '--message={}'.format(message), '--author={}'.format(git_author), '--date={}'.format(date)])
--- a/code/conversions/dfend_reloaded_source_releases_to_git.py
+++ b/code/conversions/dfend_reloaded_source_releases_to_git.py
@ -0,0 +1,103 @@
+"""
+Converts the source releases of D-Fend Reloaded to a Git.
+"""
+
+import sys
+import datetime
+import re
+from utils.utils import *
+
+
+def subprocess_run(cmd):
+    """
+
+    """
+    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if result.returncode:
+        print("error {} in call {}".format(result.returncode, cmd))
+        print(result.stderr.decode('ascii'))
+        sys.exit(-1)
+    else:
+        print('  output: {}'.format(result.stdout.decode('ascii')))
+
+def single_release(zip):
+    """
+
+    """
+
+    # get version
+    matches = version_regex.findall(zip)
+    version = matches[0]
+    print(' version {}'.format(version))
+    ftp_link = 'https://sourceforge.net/projects/dfendreloaded/files/D-Fend%20Reloaded/D-Fend%20Reloaded%20{}/'.format(version)
+
+    # clear git path without deleting '.git'
+    for item in os.listdir(git_path):
+        # ignore '.git
+        if item == '.git':
+            continue
+        item = os.path.join(git_path, item)
+        if os.path.isdir(item):
+            shutil.rmtree(item)
+        else:
+            os.remove(item)
+
+    # unpack zip to git path
+    # with zipfile.ZipFile(os.path.join(source_releases_path, zip), 'r') as zipf:
+    #    zipf.extractall(git_path)
+    unzip(os.path.join(source_releases_path, zip), git_path)
+
+    # get date from the files (latest of last modified)
+    latest_last_modified = 0
+    for dirpath, dirnames, filenames in os.walk(git_path):
+        if dirpath.startswith(os.path.join(git_path, '.git')):
+            # not in '.git'
+            continue
+        for filename in filenames:
+            filepath = os.path.join(dirpath, filename)
+            lastmodified = os.path.getmtime(filepath)
+            if lastmodified > latest_last_modified:
+                latest_last_modified = lastmodified
+                # print('{}, {}'.format(filepath, datetime.datetime.fromtimestamp(latest_last_modified).strftime('%Y-%m-%d')))
+
+    original_date = datetime.datetime.fromtimestamp(latest_last_modified).strftime('%Y-%m-%d')
+    print(' last modified: {}'.format(original_date))
+
+    # update the git index (add unstaged, remove deleted, ...)
+    print('git add')
+    os.chdir(git_path)
+    subprocess_run(['git', 'add', '--all'])
+
+    # perform the commit
+    print('git commit')
+    os.chdir(git_path)
+    message = 'version {} from {} ({})'.format(version, original_date, ftp_link)
+    print('  message "{}"'.format(message))
+    subprocess_run(['git', 'commit', '--message={}'.format(message), '--author={}'.format(author), '--date={}'.format(original_date)])
+
+
+if __name__ == "__main__":
+
+    # general properties
+    author = 'alexanderherzog <alexanderherzog@users.sourceforge.net>'
+    version_regex = re.compile(r"Reloaded-(.*)-", re.MULTILINE)
+
+    # get paths
+    source_releases_path = sys.argv[1]
+    git_path = os.path.join(source_releases_path, 'git')
+
+    # recreate git path
+    recreate_directory(git_path)
+    os.chdir(git_path)
+    subprocess_run('git init')
+
+    # get all files in the source releases path and sort them
+    zips = os.listdir(source_releases_path)
+    zips = [file for file in zips if os.path.isfile(os.path.join(source_releases_path, file))]
+    print('found {} source releases'.format(len(zips)))
+    zips.sort()
+
+    # iterate over them and do revisions
+    for counter, zip in enumerate(zips):
+        print('{}/{}'.format(counter, len(zips)))
+        single_release(zip)
--- a/code/conversions/dungeon_crawl_source_releases_to_git.py
+++ b/code/conversions/dungeon_crawl_source_releases_to_git.py
@ -0,0 +1,502 @@
+"""
+Helps me with importing source revisions into Git
+"""
+
+import shutil
+import os
+import subprocess
+import tarfile
+import zipfile
+import distutils.dir_util
+import sys
+import urllib.request
+import tempfile
+import datetime
+
+
+def extract_sources(source_path, type, destination_path):
+    """
+        Extracts a zip, tar, ... to a destination path.
+    """
+    if type == '.tbz2':
+        tar = tarfile.open(source_path, 'r')
+        os.chdir(destination_path)
+        tar.extractall()
+    elif type == '.zip':
+        with zipfile.ZipFile(source_path, 'r') as zip:
+            zip.extractall(destination_path)
+
+def subprocess_run(cmd):
+    """
+
+    """
+    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if result.returncode:
+        print("error {} in call {}".format(result.returncode, cmd))
+        print(result.stderr.decode('ascii'))
+        sys.exit(-1)
+    else:
+        print('  output: {}'.format(result.stdout.decode('ascii')))
+
+def single_revision():
+    """
+
+    """
+    # remove temp path completely and create again
+    print('clear temp')
+    if os.path.isdir(temp_path):
+        shutil.rmtree(temp_path)
+    os.mkdir(temp_path)
+
+    # download archive
+    print('download archive from ftp')
+    with urllib.request.urlopen(ftp_link) as response:
+        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+            shutil.copyfileobj(response, tmp_file)
+
+    # unpack source files and delete archive
+    print('extract {} to temp'.format(os.path.basename(ftp_link)))
+    extract_sources(tmp_file.name, os.path.splitext(ftp_link)[1], temp_path)
+    os.remove(tmp_file.name)
+
+    # we need to go up in temp_path until we find the first non-empty directory
+    nonempty_temp_path = temp_path
+    names = os.listdir(nonempty_temp_path)
+    while len(names) == 1:
+        nonempty_temp_path = os.path.join(nonempty_temp_path, names[0])
+        names = os.listdir(nonempty_temp_path)
+    print('  working in "{}" relative to temp'.format(os.path.relpath(nonempty_temp_path, temp_path)))
+
+    # if no original date is indicated, get it from the files (latest of last modified)
+    global original_date
+    if original_date is None:
+        latest_last_modified = 0
+        for dirpath, dirnames, filenames in os.walk(nonempty_temp_path):
+            for filename in filenames:
+                filepath = os.path.join(dirpath, filename)
+                lastmodified = os.path.getmtime(filepath)
+                if lastmodified > latest_last_modified:
+                    latest_last_modified = lastmodified
+        original_date = datetime.datetime.fromtimestamp(latest_last_modified).strftime('%Y-%m-%d')
+        print('  extracted original date from files: {}'.format(original_date))
+
+    # clear git path without deleting '.git'
+    print('clear git')
+    for item in os.listdir(git_path):
+        # ignore '.git
+        if item == '.git':
+            continue
+        item = os.path.join(git_path, item)
+        if os.path.isdir(item):
+            shutil.rmtree(item)
+        else:
+            os.remove(item)
+
+    # copy unpacked source files to git path
+    print('copy to git')
+    distutils.dir_util.copy_tree(nonempty_temp_path, git_path)
+
+    # update the git index (add unstaged, remove deleted, ...)
+    print('git add')
+    os.chdir(git_path)
+    subprocess_run(['git', 'add', '--all'])
+
+    # perform the commit
+    print('git commit')
+    os.chdir(git_path)
+    message = 'version {} ({}) on {}'.format(version, ftp_link, original_date)
+    print('  message "{}"'.format(message))
+    # subprocess_run(['git', 'commit', '--message={}'.format(message), '--author={}'.format(author), '--date={}'.format(original_date), '--dry-run'])
+    subprocess_run(['git', 'commit', '--message={}'.format(message), '--author={}'.format(author), '--date={}'.format(original_date)])
+
+
+if __name__ == "__main__":
+
+    git_path = r'..\crawl' # must be initialized with 'git init' before
+    temp_path = r'..\temp'
+    author = 'Linley Henzell et al 1997-2005 <www.dungeoncrawl.org>' # is used for all commits
+
+
+    # 1.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/final/1.1.x/src/dc110f-src.tbz2'
+    # version = '110f'
+    # original_date = '1997-10-04'  # format yyyy-mm-dd, according to versions.txt in version 400b26
+    # single_revision()
+
+    # 2.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/final/2.7.x/src/dc270f-src.tbz2'
+    # version = '270f'
+    # original_date = '1998-09-22'
+    # single_revision()
+
+    # 3.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/final/2.7.x/src/dc272f-src.tbz2'
+    # version = '272f'
+    # original_date = '1998-10-02'
+    # single_revision()
+
+    # 4.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/final/2.8.x/src/dc280f-src.tbz2'
+    # version = '280f'
+    # original_date = '1998-10-18'
+    # single_revision()
+
+    # 5.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/final/2.8.x/src/dc281f-src.tbz2'
+    # version = '281f'
+    # original_date = '1998-10-20'
+    # single_revision()
+
+    # 6.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/final/2.8.x/src/dc282f-src.tbz2'
+    # version = '282f'
+    # original_date = '1998-10-24'
+    # single_revision()
+
+    # 7.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/final/3.0.x/src/dc301f-src.tbz2'
+    # version = '301f'
+    # original_date = '1999-01-01'
+    # single_revision()
+
+    # 8.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/final/3.0.x/src/dc302f-src.tbz2'
+    # version = '302f'
+    # original_date = '1999-01-04'
+    # single_revision()
+
+    # 9.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/final/3.2.x/src/dc320f-src.tbz2'
+    # version = '320f'
+    # original_date = '1999-02-09'
+    # single_revision()
+
+    # 10.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/final/3.3.x/src/dc330f-src.tbz2'
+    # version = '330f'
+    # original_date = '1999-03-30'
+    # single_revision()
+
+    # 11.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/3.3.x/src/cr331beta01-src.zip'
+    # version = '331beta01'
+    # original_date = '1999-04-09'  # "Date last modified" of every file inside and of that the latest
+    # single_revision()
+
+    # 12.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/3.3.x/src/cr331beta02-src.zip'
+    # version = '331beta02'
+    # original_date = '1999-06-18'
+    # single_revision()
+
+    # 13.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/3.3.x/src/cr331beta03-src.zip'
+    # version = '331beta03'
+    # original_date = '1999-06-22'
+    # single_revision()
+
+    # 14.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/3.3.x/src/cr331beta04-src.zip'
+    # version = '331beta04'
+    # original_date = '1999-08-08'
+    # single_revision()
+
+    # 15.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/3.3.x/src/cr331beta05-src.zip'
+    # version = '331beta05'
+    # original_date = '1999-08-27'
+    # single_revision()
+
+    # 16.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/3.3.x/src/cr331beta06-src.zip'
+    # version = '331beta06'
+    # original_date = '1999-09-12'
+    # single_revision()
+
+    # 17.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/3.3.x/src/cr331beta07-src.zip'
+    # version = '331beta07'
+    # original_date = '1999-09-24'
+    # single_revision()
+
+    # 18.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/3.3.x/src/cr331beta08-src.zip'
+    # version = '331beta08'
+    # original_date = '1999-09-28'
+    # single_revision()
+
+    # 19.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/3.3.x/src/cr331beta09-src.zip'
+    # version = '331beta09'
+    # original_date = '1999-10-02'
+    # single_revision()
+
+    # 20.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr1999oct12src.zip'
+    # version = 'cr1999oct12'
+    # original_date = '1999-10-12'
+    # single_revision()
+
+    # 21.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr1999oct15src.zip'
+    # version = 'cr1999oct15'
+    # original_date = '1999-10-15'
+    # single_revision()
+
+    # 22.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr1999nov18src.zip'
+    # version = 'cr1999nov18'
+    # original_date = '1999-11-18'
+    # single_revision()
+
+    # 23.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr1999nov23src.zip'
+    # version = 'cr1999nov23'
+    # original_date = '1999-11-23'
+    # single_revision()
+
+    # 24.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr1999dec27src.zip'
+    # version = 'cr1999dec27'
+    # original_date = '1999-12-27'
+    # single_revision()
+
+    # 25.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr1999dec30src.zip'
+    # version = 'cr1999dec30'
+    # original_date = '1999-12-30'
+    # single_revision()
+
+    # 26.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr1999dec31src.zip'
+    # version = 'cr1999dec31'
+    # original_date = '1999-12-31'
+    # single_revision()
+
+    # 27.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr2000jan10src.zip'
+    # version = 'cr2000jan10'
+    # original_date = '2000-01-10'
+    # single_revision()
+
+    # 28.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr2000feb23src.zip'
+    # version = 'cr2000feb23'
+    # original_date = '2000-02-23'
+    # single_revision()
+
+    # 29.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr2000mar06src.zip'
+    # version = 'cr2000mar06'
+    # original_date = '2000-03-06'
+    # single_revision()
+
+    # 30.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr2000jun19src.zip'
+    # version = 'cr2000jun19src'
+    # original_date = '2000-06-19'
+    # single_revision()
+
+    # 31.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr2000jun20src.zip'
+    # version = 'cr2000jun20'
+    # original_date = '2000-06-20'
+    # single_revision()
+
+    # 32.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr2000jun22src.zip'
+    # version = 'cr2000jun22'
+    # original_date = '2000-06-22'
+    # single_revision()
+
+    # 33.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr2000jul22src.zip'
+    # version = 'cr2000jul22'
+    # original_date = '2000-07-22'
+    # single_revision()
+
+    # 34.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr2000aug01src.zip'
+    # version = 'cr2000aug01'
+    # original_date = '2000-08-01'
+    # single_revision()
+
+    # 35.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr2000aug13src.zip'
+    # version = 'cr2000aug13'
+    # original_date = '2000-08-13'
+    # single_revision()
+
+    # 36.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/orphan/src/cr2000oct30src.zip'
+    # version = 'cr2000oct30'
+    # original_date = '2000-10-30'
+    # single_revision()
+
+    # 37.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta01-src.tbz2'
+    # version = '400beta01'
+    # original_date = None # 2000-12-20
+    # single_revision()
+
+    # 38.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta02-src.tbz2'
+    # version = '400beta02'
+    # original_date = None # 2000-12-22
+    # single_revision()
+
+    # 39.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta03-src.tbz2'
+    # version = '400beta03'
+    # original_date = None # 2000-12-29
+    # single_revision()
+
+    # 40.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta04-src.tbz2'
+    # version = '400beta04'
+    # original_date = None # 2001-01-11
+    # single_revision()
+
+    # 41.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta06-src.tbz2'
+    # version = '400beta06'
+    # original_date = None  # 2001-01-23
+    # single_revision()
+
+    # 42.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta07-src.tbz2'
+    # version = '400beta07'
+    # original_date = None # 2001-01-29
+    # single_revision()
+
+    # 43.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta08-src.tbz2'
+    # version = 'cr400beta08'
+    # original_date = None # 2001-02-20
+    # single_revision()
+
+    # 44.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta09-src.tbz2'
+    # version = 'cr400beta09'
+    # original_date = None # 2001-03-06
+    # single_revision()
+
+    # 45.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta10-src.tbz2'
+    # version = 'cr400beta10'
+    # original_date = None # 2001-03-13
+    # single_revision()
+
+    # 46.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta10b-src.tbz2'
+    # version = 'cr400beta10b'
+    # original_date = None # 2001-03-14
+    # single_revision()
+
+    # 47.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta10c-src.tbz2'
+    # version = 'cr400beta10c'
+    # original_date = None # 2001-03-15
+    # single_revision()
+
+    # 48.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta10d-src.tbz2'
+    # version = '400beta10d'
+    # original_date = None # 2001-03-18
+    # single_revision()
+
+    # 49.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta11-src.tbz2'
+    # version = '400beta11'
+    # original_date = None # 2001-03-21
+    # single_revision()
+
+    # 50.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta12-src.tbz2'
+    # version = '400beta12'
+    # original_date = None # 2001-04-02
+    # single_revision()
+
+    # 51.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta13-src.tbz2'
+    # version = '400beta13'
+    # original_date = None # 2001-04-09
+    # single_revision()
+
+    # 52.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta14-src.tbz2'
+    # version = '400beta14'
+    # original_date = None # 2001-04-20
+    # single_revision()
+
+    # 53.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta15-src.tbz2'
+    # version = '400beta15'
+    # original_date = None # 2001-04-25
+    # single_revision()
+
+    # 54.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta16-src.tbz2'
+    # version = '400beta16'
+    # original_date = None # 2001-05-11
+    # single_revision()
+
+    # 55.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta17-src.tbz2'
+    # version = '400beta17'
+    # original_date = None # 2001-06-01
+    # single_revision()
+
+    # 56.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta18-src.tbz2'
+    # version = '400beta18'
+    # original_date = None # 2001-08-04
+    # single_revision()
+
+    # 57.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta19-src.tbz2'
+    # version = '400beta19'
+    # original_date = None # 2001-08-10
+    # single_revision()
+
+    # 58.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta20-src.tbz2'
+    # version = '400beta20'
+    # original_date = None # 2001-11-05
+    # single_revision()
+
+    # 59.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/cr400beta22-src.tbz2'
+    # version = '400beta22'
+    # original_date = None # 2001-12-21
+    # single_revision()
+
+    # 60.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/dc400b23-src.tbz2'
+    # version = '400b23'
+    # original_date = None # 2002-03-16
+    # single_revision()
+
+    # 61.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/dc400b24-src.tbz2'
+    # version = '400b24'
+    # original_date = '2002-06-03' # taken again from ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/
+    # single_revision()
+
+    # 62.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/dc400b25-src.tbz2'
+    # version = '400b25'
+    # original_date = '2003-03-06'
+    # single_revision()
+
+    # 63.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/dc400a26-src.tbz2'
+    # version = '400a26'
+    # original_date = '2003-03-17'
+    # single_revision()
+
+    # 64.
+    # ftp_link = 'ftp://ftp.dungeoncrawl.org/dev/4.0.x/src/dc400b26-src.tbz2'
+    # version = '400b26'
+    # original_date = '2003-03-24'
+    # single_revision()
--- a/code/conversions/lechemindeladam_svn_to_git.py
+++ b/code/conversions/lechemindeladam_svn_to_git.py
@ -0,0 +1,502 @@
+"""
+The svn is too big to be automatically imported to git (and Github) because there are lots of large binary data components.
+Needs a manual solution.
+
+TODO use git lfs migrate later on the elements
+TODO instead of svn export for every revision, checkout and then update to revision (reduced bandwith)
+"""
+
+import json
+import sys
+import psutil
+
+from utils.utils import *
+
+
+def remove_folders(base_folder, names):
+    if isinstance(names, str):
+        names = (names,)
+    for name in names:
+        folder = os.path.join(base_folder, name)
+        if os.path.isdir(folder):
+            shutil.rmtree(folder)
+
+
+def remove_files(base_folder, names):
+    if isinstance(names, str):
+        names = (names,)
+    for name in names:
+        file = os.path.join(base_folder, name)
+        if os.path.isfile(file):
+            os.remove(file)
+
+
+def special_treatment(destination, revision):
+    """
+
+    """
+
+    # copy content of trunk to base
+    if 2270 <= revision <= 2420:
+        source = os.path.join(destination, 'trunk')
+        if os.path.isdir(source):
+            copy_tree(source, destination)
+            shutil.rmtree(source)
+
+    # copy all important files from Holyspirit/Holyspirit and delete it
+    if 5 <= revision <= 330:
+        source = os.path.join(destination, 'Holyspirit', 'Holyspirit')
+        if os.path.isdir(source):
+            if revision >= 8:
+                shutil.copytree(os.path.join(source, 'Data'), os.path.join(destination, 'Data'))
+            files = [x for x in os.listdir(source) if x.endswith('.txt')]
+            for file in files:
+                shutil.copy(os.path.join(source, file), destination)
+            # remove it
+            shutil.rmtree(os.path.join(destination, 'Holyspirit'))
+
+    # copy all important files from Holyspirit and delete it
+    if 337 <= revision <= 2268:
+        source = os.path.join(destination, 'Holyspirit')
+        if os.path.isdir(source):
+            data = os.path.join(source, 'Data')
+            if os.path.isdir(data):
+                # shutil.copytree(data, os.path.join(destination, 'Data'))
+                shutil.move(data, destination)
+            target = os.path.join(destination, 'Meta')
+            if not os.path.isdir(target):
+                os.mkdir(target)
+            files = [x for x in os.listdir(source) if x.endswith('.txt') or x.endswith('.conf') or x.endswith('.ini')]
+            for file in files:
+                shutil.move(os.path.join(source, file), target)
+            # remove it
+            shutil.rmtree(source)
+
+    # copy data folder vom HolySpiritJE and delete it
+    if 2012 <= revision <= 2269:
+        source = os.path.join(destination, 'HolyspiritJE')
+        if os.path.isdir(source):
+            data = os.path.join(source, 'Data')
+            if os.path.isdir(data):
+                shutil.move(data, os.path.join(destination, 'DataJE'))
+            target = os.path.join(destination, 'MetaJE')
+            if not os.path.isdir(target):
+                os.mkdir(target)
+            files = [x for x in os.listdir(source) if x.endswith('.txt') or x.endswith('.conf') or x.endswith('.ini')]
+            for file in files:
+                shutil.move(os.path.join(source, file), target)
+            # remove it
+            shutil.rmtree(source)
+
+    # remove Holyspirit3 folder
+    if 464 <= revision <= 2268:
+        remove_folders(destination, 'Holyspirit3')
+
+    # remove Holyspirit2 folder
+    if 659 <= revision <= 2268:
+        remove_folders(destination, 'Holyspirit2')
+
+    # remove Launcher/release
+    if 413 <= revision <= 2420:
+        source = os.path.join(destination, 'Launcher')
+        remove_folders(source, ('bin', 'debug', 'release', 'obj'))
+
+    # delete all *.dll, *.exe in base folder
+    if 3 <= revision <= 9:
+        files = os.listdir(destination)
+        for file in files:
+            if file.endswith('.exe') or file.endswith('.dll'):
+                os.remove(os.path.join(destination, file))
+
+    # delete "cross" folder
+    if 42 <= revision <= 43:
+        remove_folders(destination, 'Cross')
+
+    # delete personal photos
+    if 374 <= revision <= 2267:
+        remove_folders(destination, 'Photos')
+    if 2268 <= revision <= 2420:
+        source = os.path.join(destination, 'Media')
+        remove_folders(source, 'Photos')
+
+    # move empire of steam out
+    if 1173 <= revision <= 2420:
+        folder = os.path.join(destination, 'EmpireOfSteam')
+        if os.path.isdir(folder):
+            # move to empire path
+            empire = os.path.join(empire_path, 'r{:04d}'.format(revision))
+            shutil.move(folder, empire)
+
+    # holy editor cleanup
+    if 1078 <= revision <= 2420:
+        source = os.path.join(destination, 'HolyEditor')
+        remove_folders(source, ('bin', 'release', 'debug', 'obj'))
+        remove_files(source, 'moc.exe')
+
+    # source folder cleanup
+    if 939 <= revision <= 2420:
+        source = os.path.join(destination, 'Source')
+        remove_folders(source, 'HS')
+        remove_files(source, 'HS.zip')
+
+    # sourceM folder cleanup
+    if 2110 <= revision <= 2270:
+        source = os.path.join(destination, 'SourceM')
+        remove_folders(source, 'HS')
+
+    # sourceNewApi cleanup
+    if 2261 <= revision <= 2269:
+        source = os.path.join(destination, 'SourceNewApi')
+        remove_folders(source, 'HS')
+
+    # Autres folder cleanup
+    if 1272 <= revision <= 2267:
+        source = os.path.join(destination, 'Autres')
+        remove_folders(source, ('conf', 'db', 'hooks', 'locks'))
+        remove_files(source, ('format', 'maj.php'))
+    # Media/Other folder cleanup
+    if 2268 <= revision <= 2420:
+        source = os.path.join(destination, 'Media', 'Other')
+        remove_files(source, ('format', 'maj.php'))
+
+    # remove Holyspirit-Demo
+    if 1668 <= revision <= 2268:
+        remove_folders(destination, 'Holyspirit_Demo')
+
+    # remove Debug.rar
+    if 1950 <= revision <= 2420:
+        remove_files(destination, 'Debug.rar')
+
+    # remove 3dparty folder
+    if 2273 <= revision <= 2420:
+        remove_folders(destination, '3dparty')
+
+    # branches cleanup
+    if 2270 <= revision <= 2420:
+        remove_folders(destination, 'branches')
+
+
+def delete_global_excludes(folder):
+    """
+
+    """
+    for dirpath, dirnames, filenames in os.walk(folder):
+        rel_path = os.path.relpath(dirpath, folder)
+        for file in filenames:
+            if file in global_exclude:
+                os.remove(os.path.join(dirpath, file))
+
+
+def delete_empty_directories(folder):
+    """
+
+    """
+    for dirpath, dirnames, filenames in os.walk(folder, topdown=False):
+        rel_path = os.path.relpath(dirpath, folder)
+        if not filenames and not dirnames:
+            os.removedirs(dirpath)
+
+
+def list_large_unwanted_files(folder):
+    """
+
+    """
+    output = []
+    for dirpath, dirnames, filenames in os.walk(folder):
+        rel_path = os.path.relpath(dirpath, folder)
+        for file in filenames:
+            file_path = os.path.join(dirpath, file)
+            already_listed = False
+            for extension in unwanted_file_extensions:
+                if file.endswith(extension):
+                    output.append(os.path.join(rel_path, file) + ' ' + str(os.path.getsize(file_path)))
+                    already_listed = True
+                    break
+            if not already_listed and os.path.getsize(file_path) > large_file_limit:
+                output.append(os.path.join(rel_path, file) + ' ' + str(os.path.getsize(file_path)))
+    return output
+
+
+def checkout(revision_start, revision_end=None):
+    """
+
+    """
+    if not revision_end:
+        revision_end = revision_start
+
+    assert revision_end >= revision_start
+
+    for revision in range(revision_start, revision_end + 1):
+        # check free disc space
+        if psutil.disk_usage(svn_checkout_path).free < 3e10:  # 1e10 = 10 GiB
+            print('not enough free disc space, will exit')
+            sys.exit(-1)
+
+        print('checking out revision {}'.format(revision))
+
+        # create destination directory
+        destination = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision))
+        if os.path.exists(destination):
+            shutil.rmtree(destination)
+
+        # checkout
+        start_time = time.time()
+        # sometimes checkout fails for reasons like "svn: E000024: Can't open file '/svn/p/lechemindeladam/code/db/revs/1865': Too many open files", we try again and again in these cases
+        while True:
+            try:
+                subprocess_run(['svn', 'export', '-r{}'.format(revision), svn_url, destination])
+                break
+            except:
+                print('problem with export, will try again')
+                if os.path.isdir(destination):
+                    shutil.rmtree(destination)
+
+        print('checkout took {:.1f}s'.format(time.time() - start_time))
+
+
+def fix_revision(revision_start, revision_end=None):
+    """
+
+    """
+    if not revision_end:
+        revision_end = revision_start
+    assert revision_end >= revision_start
+
+    unwanted_files = {}
+    sizes = {}
+
+    for revision in range(revision_start, revision_end + 1):
+        print('fixing revision {}'.format(revision))
+
+        # destination directory
+        destination = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision))
+        if not os.path.exists(destination):
+            raise RuntimeError('cannot fix revision {}, directory does not exist'.format(revision))
+
+        # special treatment
+        special_treatment(destination, revision)
+
+        # delete files from global exclude list
+        delete_global_excludes(destination)
+
+        # list unwanted files
+        unwanted_files[revision] = list_large_unwanted_files(destination)
+
+        # delete empty directories
+        delete_empty_directories(destination)
+
+        # size of resulting folder
+        sizes[revision] = folder_size(destination)
+
+    text = json.dumps(unwanted_files, indent=1)
+    write_text(os.path.join(svn_checkout_path, 'unwanted_files.json'.format(revision)), text)
+    text = json.dumps(sizes, indent=1)
+    write_text(os.path.join(svn_checkout_path, 'folder_sizes.json'.format(revision)), text)
+
+
+def initialize_git():
+    """
+
+    """
+    # git init
+    os.mkdir(git_path)
+    os.chdir(git_path)
+    subprocess_run(['git', 'init'])
+    subprocess_run(['git', 'config', 'user.name', 'Trilarion'])
+    subprocess_run(['git', 'config', 'user.email', 'Trilarion@users.noreply.gitlab.com'])
+
+
+def combine_log_messages(msg):
+    """
+
+    """
+    # throw out all empty ones
+    msg = [x.strip() for x in msg if x]
+    # combine again
+    msg = "\r\n".join(msg)
+    return msg
+
+
+def read_logs():
+    """
+    Probably regular expressions would have worked too.
+    """
+    # read log
+    print('read all log messages')
+    os.chdir(svn_checkout_path)
+    start_time = time.time()
+    log = subprocess_run(['svn', 'log', svn_url], display=False)
+    print('read log took {:.1f}s'.format(time.time() - start_time))
+    # process log
+    log = log.split('\r\n------------------------------------------------------------------------\r\n')
+    # not the last one
+    log = log[:-2]
+    print('{} log entries'.format(len(log)))
+
+    # process log entries
+    log = [x.split('\r\n') for x in log]
+
+    # the first one still contains an additional "---" elements
+    log[0] = log[0][1:]
+
+    # split the first line
+    info = [x[0].split('|') for x in log]
+
+    # get the revision
+    revision = [int(x[0][1:]) for x in info]
+
+    author = [x[1].strip() for x in info]
+    unique_authors = list(set(author))
+    unique_authors.sort()
+
+    date = [x[2].strip() for x in info]
+    msg = [combine_log_messages(x[2:]) for x in log]
+    logs = list(zip(revision, author, date, msg))
+    logs.sort(key=lambda x: x[0])
+    return logs, unique_authors
+
+
+def gitify(revision_start, revision_end):
+    """
+
+    """
+    assert revision_end >= revision_start
+
+    for revision in range(revision_start, revision_end + 1):
+        print('adding revision {} to git'.format(revision))
+
+        # svn folder
+        svn_folder = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision))
+        if not os.path.exists(svn_folder):
+            raise RuntimeError('cannot add revision {}, directory does not exist'.format(revision))
+
+        # clear git path
+        print('git clear path')
+        while True:
+            try:
+                git_clear_path(git_path)
+                break
+            except PermissionError as e:
+                print(e)
+                # wait a bit
+                time.sleep(1)
+
+        # copy source files to git path
+        print('copy to git')
+        copy_tree(svn_folder, git_path)
+
+        os.chdir(git_path)
+
+        # update the git index (add unstaged, remove deleted, ...)
+        print('git add')
+        subprocess_run(['git', 'add', '--all'])
+
+        # check if there is something to commit
+        status = subprocess_run(['git', 'status', '--porcelain'])
+        if not status:
+            print(' nothing to commit for revision {}, will skip'.format(revision))
+            continue
+
+        # perform the commit
+        print('git commit')
+        log = logs[revision]  # revision, author, date, message
+        message = log[3] + '\r\nsvn-revision: {}'.format(revision)
+        print('  message "{}"'.format(message))
+        author = authors[log[1]]
+        author = '{} <{}>'.format(*author)
+        cmd = ['git', 'commit', '--allow-empty-message', '--message={}'.format(message), '--author={}'.format(author),
+               '--date={}'.format(log[2])]
+        print('  cmd: {}'.format(' '.join(cmd)))
+        subprocess_run(cmd)
+
+
+if __name__ == "__main__":
+
+    global_exclude = ['Thumbs.db']
+    unwanted_file_extensions = ['.exe', '.dll']
+    large_file_limit = 1e6  # in bytes
+
+    # base path is the directory containing this file
+    base_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'conversion')
+    print('base path={}'.format(base_path))
+
+    # derived paths
+    svn_checkout_path = os.path.join(base_path, 'svn')
+    if not os.path.exists(svn_checkout_path):
+        os.mkdir(svn_checkout_path)
+    empire_path = os.path.join(base_path, 'empire')  # empire of steam side project
+    if not os.path.exists(empire_path):
+        os.mkdir(empire_path)
+    git_path = os.path.join(base_path, 'lechemindeladam')
+    if not os.path.exists(git_path):
+        initialize_git()
+
+    # svn url
+    svn_url = "https://svn.code.sf.net/p/lechemindeladam/code/"
+
+    # read logs
+    # logs, authors = read_logs()
+    # text = json.dumps(logs, indent=1)
+    # write_text(os.path.join(base_path, 'logs.json'), text)
+    # text = json.dumps(authors, indent=1)
+    # write_text(os.path.join(base_path, 'authors.json'), text)
+    text = read_text(os.path.join(base_path, 'logs.json'))
+    logs = json.loads(text)
+    logs = {x[0]: x for x in logs}  # dictionary
+    text = read_text(os.path.join(base_path, 'authors.json'))
+    authors = json.loads(text)  # should be a dictionary: svn-author: [git-author, git-email]
+
+    # the steps
+    # checkout(1, 50)
+    # fix_revision(1, 50)
+    # gitify(4, 50)
+
+    # checkout(51, 100)
+    # checkout(101, 200)
+
+    # fix_revision(51, 200)
+
+    # gitify(51, 200)
+
+    # checkout(201, 400)
+    # fix_revision(201, 400)
+    # gitify(201, 400)
+
+    # checkout(401, 800)
+    # fix_revision(401, 800)
+    # gitify(401, 800)
+
+    # checkout(801, 1200)
+    # fix_revision(801, 1200)
+    # gitify(801, 1200)
+
+    # checkout(1201, 1470)
+    # fix_revision(1201, 1470)
+    # gitify(1201, 1470)
+
+    # checkout(1471, 1700)
+    # fix_revision(1471, 1700)
+    # gitify(1471, 1700)
+
+    # checkout(1701, 1900)
+    # fix_revision(1701, 1900)
+    # gitify(1701, 1900)
+
+    # checkout(1901, 2140)
+    # fix_revision(1901, 2140)
+    # gitify(1901, 2140)
+
+    # checkout(2141, 2388)
+    # fix_revision(2141, 2388)
+    # gitify(2141, 2388)
+
+    # checkout(2389, 2420)
+    # fix_revision(2389, 2420)
+    # gitify(2389, 2420)
+
+    # run the following commands in the git bash
+    # git config credential.useHttpPath true
+    # git lfs install
+    # git lfs migrate import --include-ref=master --include="Zombie_paysan.rs.hs,Witch_monster.rs.hs,WanderingStones.rs.hs,TwoWeapons.rs.hs,TwoHands.rs.hs,TwoHand.rs.hs,Reaper.rs.hs,Peasant_crossbow.rs.hs,Peasant_club.rs.hs,OneHand.rs.hs,Offspring_champion.rs.hs,Mimic.rs.hs,LordSkeleton.rs.hs,Goule.rs.hs,ErrantRoche.rs.hs,DemonicPriest0.rs.hs,DemonicPriest.rs.hs,Brute.rs.hs,20575__dobroide__20060706.night.forest02.wav,31464__offtheline__Morning_Sounds.wav,47989__Luftrum__forestsurroundings.wav,ambiance.wav,Catacombs0.wav,Pluie.wav,Taverne fusion.png,Abbey.ogg,AgrarianLands0.ogg,AgrarianLands1.ogg,Boss0.ogg,Catacombs0.ogg,Catacombs1.ogg,DarkForest.ogg,Forest_ambient0.ogg,Johannes.ogg,OWC.ogg"
+
+    # then add remote and push (done)
--- a/code/conversions/phaos.json
+++ b/code/conversions/phaos.json
@ -0,0 +1,49 @@
+["https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9.82/phaos-0.9.82-UPDATED-6.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9.82/phaos-0.9.82-UPDATED-5.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9.82/phaos-0.9.82-UPDATED-4.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9.82/phaos-0.9.82-UPDATED-3.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9.82/phaos-0.9.82-UPDATED-2.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9.82/phaos-0.9.82-UPDATED.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9.82/phaos-0.9.82.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9.81/phaos-0.9.81.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9.8/phaos-0.9.8.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9.7/phaos-0.9.7.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9.6/phaos-0.9.6.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9/phaos-0.9.4.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9/phaos-0.9.3.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.9/phaos-0.9.2.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/Phaos%20Online%20RPG%20%28Source%20Code%29/Version%200.89/phaos-0.89.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.9.5.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.9.1.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.9.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.88.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.88-rc3.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.88-rc2.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.88-rc1.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.87-1.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.87.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.86.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.86-TEST-3.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.86-TEST-2.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.86-TEST-1.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.85.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.85-TEST-3.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.85-TEST-2.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.85-TEST-1.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.84.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.84-TEST2.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.84-TEST1.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.82.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.82-TEST3.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.82-TEST2.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.82-TEST1.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.8.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.77.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.76.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos-0.74.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/phaos0.72.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/Pv0.72.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/Pv0.7develnew.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/Pv0.7.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/Pv0.61.zip/download",
+"https://sourceforge.net/projects/phaosrpg/files/OldFiles/Pv0.6.zip/download"]
--- a/code/conversions/phaos_source_release_to_git.py
+++ b/code/conversions/phaos_source_release_to_git.py
@ -0,0 +1,125 @@
+"""
+Downloads source releases from Sourceforge and puts them into a git repository
+"""
+
+import json
+import datetime
+from utils.utils import *
+
+if __name__ == '__main__':
+
+    # https://sourceforge.net/projects/phaosrpg/files/OldFiles/Pv0.7devel.zip/download is a corrupt zip
+
+    # base path is the directory containing this file
+    base_path = os.path.abspath(os.path.dirname(__file__))
+    print('base path={}'.format(base_path))
+
+    # recreate archive path
+    archive_path = os.path.join(base_path, 'downloads')
+    if not os.path.exists(archive_path):
+        os.mkdir(archive_path)
+
+    # load source releases urls
+    with open(os.path.join(base_path, 'phaos.json'), 'r') as f:
+        urls = json.load(f)
+    print('will process {} urls'.format(len(urls)))
+    if len(urls) != len(set(urls)):
+        raise RuntimeError("urls list contains duplicates")
+
+    # determine file archives from urls
+    archives = [x.split('/')[-2] for x in urls]
+    if len(archives) != len(set(archives)):
+        raise RuntimeError("files with duplicate archives, cannot deal with that")
+
+    # determine version from file name
+    versions = [determine_archive_version_generic(x, leading_terms=['phaos-', 'phaos', 'pv'], trailing_terms=['zip']) for x in archives]
+    # for version in versions:
+    #     print(version)
+
+    # extend archives to full paths
+    archives = [os.path.join(archive_path, x) for x in archives]
+
+    # download them
+    print('download source releases')
+    for url, destination in zip(urls, archives):
+        # only if not yet existing
+        if os.path.exists(destination):
+            continue
+        # download
+        print('  download {}'.format(os.path.basename(destination)))
+        with urllib.request.urlopen(url) as response:
+            with open(destination, 'wb') as f:
+                shutil.copyfileobj(response, f)
+                time.sleep(1) # we are nice
+
+    # unzip them
+    print('unzip downloaded archives')
+    unzipped_archives = [x[:-4] for x in archives] # folder is archive name without .zip
+    for archive, unzipped_archive in zip(archives, unzipped_archives):
+        print('  unzip {}'.format(os.path.basename(archive)))
+        # only if not yet existing
+        if os.path.exists(unzipped_archive):
+            continue
+        os.mkdir(unzipped_archive)
+        # unzip
+        unzip_keep_last_modified(archive, unzipped_archive)
+
+    # go up in unzipped archives until the very first non-empty folder
+    unzipped_archives = [strip_wrapped_folders(x) for x in unzipped_archives]
+
+    # determine date
+    dates = [determine_latest_last_modified_date(x) for x in unzipped_archives]
+    dates_strings = [datetime.datetime.fromtimestamp(x).strftime('%Y-%m-%d') for x in dates]
+    # if len(dates_strings) != len(set(dates_strings)):
+    #     raise RuntimeError("Some on the same day, cannot cope with that")
+
+    # gather all important stuff in one list and sort by dates
+    db = list(zip(urls, unzipped_archives, versions, dates, dates_strings))
+    db.sort(key=lambda x:x[3])
+    print('proposed order')
+    for url, _, version, _, date in db:
+        print('  date={} version={}'.format(date, version))
+
+    # git init
+    git_path = os.path.join(base_path, 'phaosrpg')
+    if os.path.exists(git_path):
+        shutil.rmtree(git_path)
+    os.mkdir(git_path)
+    os.chdir(git_path)
+    subprocess_run(['git', 'init'])
+    subprocess_run(['git', 'config', 'user.name', 'Trilarion'])
+    subprocess_run(['git', 'config', 'user.email', 'Trilarion@users.noreply.gitlab.com'])
+
+    # now process revision by revision
+    print('process revisions')
+    git_author = 'eproductions3 <eproductions3@user.sourceforge.net>'
+    for url, archive_path, version, _, date in db:
+        print('  process version={}'.format(version))
+
+        # clear git path without deleting .git
+        print('    clear git')
+        for item in os.listdir(git_path):
+            # ignore '.git
+            if item == '.git':
+                continue
+            item = os.path.join(git_path, item)
+            if os.path.isdir(item):
+                shutil.rmtree(item)
+            else:
+                os.remove(item)
+
+        # copy unpacked source files to git path
+        print('copy to git')
+        copy_tree(archive_path, git_path)
+
+        # update the git index (add unstaged, remove deleted, ...)
+        print('git add')
+        os.chdir(git_path)
+        subprocess_run(['git', 'add', '--all'])
+
+        # perform the commit
+        print('git commit')
+        os.chdir(git_path)
+        message = 'version {} ({}) on {}'.format(version, url, date)
+        print('  message "{}"'.format(message))
+        subprocess_run(['git', 'commit', '--message={}'.format(message), '--author={}'.format(git_author), '--date={}'.format(date)])
--- a/code/git_statistics.py
+++ b/code/git_statistics.py
@ -0,0 +1,50 @@
+"""
+takes all gits that we have in the list and checks the master branch out, then collects some statistics:
+- number of distinct comitters
+- list of commit dates
+- number of commits
+- language detection and lines of code counting on final state
+
+uses git log --format="%an, %at, %cn, %ct" --all ti get commits, committers and times (as unix time stamp)
+"""
+
+import json
+from utils.utils import *
+
+if __name__ == "__main__":
+
+    # paths
+    file_path  = os.path.realpath(os.path.dirname(__file__))
+    archives_path = os.path.join(file_path, 'git_repositories.json')
+    temp_path = os.path.join(file_path, 'temp')
+
+    # get git archives
+    text = read_text(archives_path)
+    archives = json.loads(text)
+    print('process {} git archives'.format(len(archives)))
+
+    # loop over them
+    for count, archive in enumerate(archives, 1):
+
+        # printer iteration info
+        print('{}/{} - {}'.format(count, len(archives), archive))
+
+        # recreate temp folder
+        recreate_directory(temp_path)
+        os.chdir(temp_path)
+
+        # clone git in temp folder
+        subprocess_run(["git", "clone", "--mirror", archive, temp_path])
+
+        # get commits, etc. info
+        info = subprocess_run(["git", "log", '--format="%an, %at, %cn, %ct"'])
+
+        info = info.split('\n')
+        info = info[:-1] # last line is empty
+        number_commits = len(info)
+
+        info = [x.split(', ') for x in info]
+        commiters = set([x[0] for x in info])
+
+        print(' commits: {}, commiters {}'.format(number_commits, len(commiters)))
+
--- a/code/is_already_included.py
+++ b/code/is_already_included.py
@ -0,0 +1,47 @@
+"""
+Checks a list of game names (comma separated in text file) if they are already included in the database.
+Is fuzzy, i.e. accepts a certain similarity of names.
+"""
+
+import json
+import re
+from difflib import SequenceMatcher
+from utils.utils import *
+
+def similarity(a, b):
+    return SequenceMatcher(None, a, b).ratio()
+
+if __name__ == "__main__":
+    similarity_threshold = 0.7
+
+    root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
+
+    # read docs/data.json
+    data_file = os.path.join(root_path, 'docs', 'data.json')
+    text = read_text(data_file)
+    data = json.loads(text)
+
+    # extract game names
+    data = data['data']
+    data = (x[0] for x in data)
+    existing_names = list(re.sub(r' \([^)]*\)', '', x) for x in data)
+
+    # read names to test
+    test_file = os.path.join(root_path, 'is_already_included.txt')
+    text = read_text(test_file)
+    test_names = text.split(', ')
+
+    # loop over all test names
+    for test_name in test_names:
+        matches = []
+        # loop over all existing names
+        for existing_name in existing_names:
+            s = similarity(test_name.lower(), existing_name.lower())
+            if s > similarity_threshold:
+                matches.append('{} ({:.2f})'.format(existing_name, s))
+        # were matches found
+        if matches:
+            print('{} maybe included in {}'.format(test_name, ', '.join(matches)))
+        else:
+            print('{} not included'.format(test_name))
+
--- a/code/libregamewiki_import.py
+++ b/code/libregamewiki_import.py
@ -0,0 +1,376 @@
+"""
+Imports game details from libregamewiki by scraping the website, starting from https://libregamewiki.org/Category:Games
+
+Also parse rejected games (https://libregamewiki.org/Libregamewiki:Rejected_games_list) and maybe https://libregamewiki.org/Libregamewiki:Suggested_games
+
+Unique left column names in the game info boxes:
+['Code license', 'Code licenses', 'Developer', 'Developers', 'Engine', 'Engines', 'Genre', 'Genres', 'Libraries', 'Library', 'Media license', 'Media licenses', 'P. language', 'P. languages', 'Platforms']
+
+TODO there are games on LGW which are not part of the Games category but part of XXX-Games sub-categories, find them
+"""
+
+import os
+import requests
+import json
+import re
+from bs4 import BeautifulSoup
+from utils import constants, utils, osg
+
+
+def download_lgw_content():
+    """
+
+    :return:
+    """
+
+    # parameters
+    base_url = 'https://libregamewiki.org'
+    destination_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
+    utils.recreate_directory(destination_path)
+
+    # read and process the base url (get all games and categories)
+    url = base_url + '/Category:Games'
+    games = []
+    while True:
+        text = requests.get(url).text
+        soup = BeautifulSoup(text, 'html.parser')
+        #categories = soup.find('div', id='mw-subcategories').find_all('li')
+        #categories = [(x.a['href'], x.a.string) for x in categories]
+
+        # game pages
+        pages = soup.find('div', id='mw-pages').find_all('li')
+        games.extend(((x.a['href'], x.a.string) for x in pages))
+
+        # next page
+        next_page = soup.find('a', string='next page')
+        if not next_page:
+            break
+        url = base_url + next_page['href']
+
+    # remove all those that start with user
+    games = [game for game in games if not any(game[1].startswith(x) for x in ('User:', 'Template:', 'Bullet'))]
+
+    print('current number of games in LGW {}'.format(len(games)))
+
+    for game in games:
+        print(game[1])
+        url = base_url + game[0]
+        destination_file = os.path.join(destination_path, osg.canonical_entry_name(game[0][1:]) + '.html')
+
+        text = requests.get(url).text
+        utils.write_text(destination_file, text)
+
+
+def parse_lgw_content():
+
+    # paths
+    import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
+    entries_file = os.path.join(import_path, '_lgw.json')
+
+    # iterate over all imported files
+    files = os.listdir(import_path)
+    entries = []
+    for file in files:
+        if file.startswith('_lgw'):
+            continue
+
+        text = utils.read_text(os.path.join(import_path, file))
+
+        # parse the html
+        soup = BeautifulSoup(text, 'html.parser')
+        title = soup.h1.get_text()
+        print(title)
+        entry = {'name': title}
+
+        # get all external links
+        ignored_external_links = ('libregamewiki.org', 'freegamedev.net', 'freegamer.blogspot.com', 'opengameart.org', 'gnu.org', 'creativecommons.org', 'freesound.org', 'freecode.com', 'freenode.net')
+        links = [(x['href'], x.get_text()) for x in soup.find_all('a', href=True)]
+        links = [x for x in links if x[0].startswith('http') and not any([y in x[0] for y in ignored_external_links])]
+        entry['external links'] = links
+
+        # get meta description
+        description = soup.find('meta', attrs={"name":"description"})
+        entry['description'] = description['content']
+
+        # parse gameinfobox
+        infos = soup.find('div', class_='gameinfobox')
+        if not infos:
+            print(' no gameinfobox')
+        else:
+            infos = infos.find_all('tr')
+            for x in infos:
+                if x.th and x.td:
+                    # row with header
+                    key = x.th.get_text()
+                    content = x.td.get_text()
+                    content = content.split(',')
+                    content = [x.strip() for x in content]
+                    entry[key] = content
+                if not x.th and x.td:
+                    # row without header: contribute section
+                    x = x.find_all('li')
+                    x = [(x.a.string, x.a['href']) for x in x if x.a]
+                    for key, content in x:
+                        entry[key] = content
+
+        # parse "for available as package in"
+        tables = soup.find_all('table', class_='wikitable')
+        tables = [table for table in tables if table.caption and table.caption.string.startswith('Available as package')]
+        if len(tables) > 0:
+            if len(tables) > 1:
+                raise RuntimeError()
+            table = tables[0]
+            packages = table.find_all('tr')
+            packages = [x.td.a['href'] for x in packages]
+            entry['linux-packages'] = packages
+
+        # categories
+        categories = soup.find_all('div', id='mw-normal-catlinks')
+        if not categories:
+            print(' no categories')
+            categories = []
+        else:
+            if len(categories) > 1:
+                raise RuntimeError()
+            categories = categories[0]
+            categories = categories.find_all('li')
+            categories = [x.a.string for x in categories]
+            if 'Games' not in categories:
+                print(' "Games" not in categories')
+            else:
+                categories.remove('Games') # should be there
+            # strip games at the end
+            phrase = ' games'
+            categories = [x[:-len(phrase)] if x.endswith(phrase) else x for x in categories]
+            ignored_categories = ['Articles lacking reference', 'Stubs']
+            categories = [x for x in categories if x not in ignored_categories]
+        entry['categories'] = categories
+
+        entries.append(entry)
+
+
+    # save entries
+    text = json.dumps(entries, indent=1)
+    utils.write_text(entries_file, text)
+
+
+def replace_content(entries, fields, replacement, search):
+    if not isinstance(fields, tuple):
+        fields = (fields, )
+    for index, entry in enumerate(entries):
+        for field in fields:
+            if field in entry:
+                content = entry[field]
+                if not isinstance(content, list):
+                    content = [content]
+                entry[field] = [replacement if x in search else x for x in content]
+        entries[index] = entry
+    return entries
+
+
+def ignore_content(entries, fields, ignored):
+    if not isinstance(fields, tuple):
+        fields = (fields, )
+    for index, entry in enumerate(entries):
+        for field in fields:
+            if field in entry:
+                content = entry[field]
+                if not isinstance(content, list):
+                    content = [content]
+                content = [x for x in content if x not in ignored]
+                if content:
+                    entry[field] = content
+                else:
+                    del entry[field]
+        entries[index] = entry
+    return entries
+
+def remove_prefix_suffix(entries, fields, prefixes, suffixes):
+    if not isinstance(fields, tuple):
+        fields = (fields, )
+    for index, entry in enumerate(entries):
+        for field in fields:
+            if field in entry:
+                content = entry[field]
+                if not isinstance(content, list):
+                    content = [content]
+                for prefix in prefixes:
+                    content = [x[len(prefix):] if x.startswith(prefix) else x for x in content]
+                for sufix in suffixes:
+                    content = [x[:-len(sufix)] if x.endswith(sufix) else x for x in content]
+                content = [x.strip() for x in content]
+                entry[field] = content
+        entries[index] = entry
+    return entries
+
+
+def lower_case_content(entries, field):
+    for index, entry in enumerate(entries):
+        if field in entry:
+            content = entry[field]
+            if not isinstance(content, list):
+                content = [content]
+            entry[field] = [x.casefold() for x in content]
+            entries[index] = entry
+    return entries
+
+
+def remove_parenthized_content(entries, fields):
+    if not isinstance(fields, tuple):
+        fields = (fields, )
+    for index, entry in enumerate(entries):
+        for field in fields:
+            if field in entry:
+                content = entry[field]
+                if not isinstance(content, list):
+                    content = [content]
+                content = [re.sub(r'\([^)]*\)', '', c) for c in content] # remove parentheses content
+                content = [x.strip() for x in content]
+                content = list(set(content))
+                entry[field] = content
+        entries[index] = entry
+    return entries
+
+
+def ignore_nonnumbers(entries, fields):
+    if not isinstance(fields, tuple):
+        fields = (fields, )
+    for index, entry in enumerate(entries):
+        for field in fields:
+            if field in entry:
+                content = entry[field]
+                if not isinstance(content, list):
+                    content = [content]
+                content = [x for x in content if x.isdigit()]
+                entry[field] = content
+        entries[index] = entry
+    return entries
+
+
+def clean_lgw_content():
+
+    # paths
+    import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
+    entries_file = os.path.join(import_path, '_lgw.json')
+    cleaned_entries_file = os.path.join(import_path, '_lgw.cleaned.json')
+
+    # load entries
+    text = utils.read_text(entries_file)
+    entries = json.loads(text)
+
+    # rename keys
+    key_replacements = (('developer', ('Developer', 'Developers')), ('code license', ('Code license', 'Code licenses')), ('engine', ('Engine', 'Engines')), ('genre', ('Genre', 'Genres')),
+                        ('library', ('Library', 'Libraries')), ('assets license', ('Media license', 'Media licenses')), ('code language', ('P. language', 'P. languages')), ('home', ('Homepage',)),
+                        ('platform', ('Platforms', )), ('tracker', ('Bug/Feature Tracker', )), ('repo', ('Source Code', )), ('forum', ('Forum', )), ('chat', ('Chat', )), ('origin', ('Origin', )),
+                        ('dev home', ('Development Project', )), ('last active', ('Release date', )))
+    for index, entry in enumerate(entries):
+        for new_key, old_keys in key_replacements:
+            for key in old_keys:
+                if key in entry:
+                    entry[new_key] = entry[key]
+                    del entry[key]
+                    break
+        entries[index] = entry
+
+    # ignore keys
+    ignored_keys = ('origin', 'Latest\xa0release')
+    for index, entry in enumerate(entries):
+        for key in ignored_keys:
+            if key in entry:
+                del entry[key]
+        entries[index] = entry
+
+    # check for unique field names
+    unique_fields = set()
+    for entry in entries:
+        unique_fields.update(entry.keys())
+    print('unique lgw fields: {}'.format(sorted(list(unique_fields))))
+
+    # which fields are mandatory
+    mandatory_fields = unique_fields.copy()
+    for entry in entries:
+        remove_fields = [field for field in mandatory_fields if field not in entry]
+        mandatory_fields -= set(remove_fields)
+    print('mandatory lgw fields: {}'.format(sorted(list(mandatory_fields))))
+
+    # statistics before
+    print('field contents before')
+    fields = sorted(list(unique_fields - set(('description', 'external links', 'dev home', 'forum', 'home', 'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name', 'repo', 'Release date', 'categories'))))
+    for field in fields:
+        content = [entry[field] for entry in entries if field in entry]
+        # flatten
+        flat_content = []
+        for c in content:
+            if isinstance(c, list):
+                flat_content.extend(c)
+            else:
+                flat_content.append(c)
+        statistics = utils.unique_elements_and_occurrences(flat_content)
+        print('{}: {}'.format(field, ', '.join(statistics)))
+
+    # content replacements
+    entries = remove_parenthized_content(entries, ('assets license', 'code language', 'code license', 'engine', 'genre', 'last active', 'library'))
+    entries = remove_prefix_suffix(entries, ('code license', 'assets license'), ('"', 'GNU', ), ('"', '[3]', '[2]', '[1]', 'only'))
+    entries = replace_content(entries, ('code license', 'assets license'), 'GPL', ('General Public License', ))
+    entries = replace_content(entries, ('code license', 'assets license'), 'GPL-2.0', ('GPLv2', )) # for LGW GPLv2 would be the correct writing
+    entries = replace_content(entries, ('code license', 'assets license'), 'GPL-2', ('GPLv2', 'GPL v2', 'GPL version 2.0', 'GPL 2.0', 'General Public License v2', 'GPL version 2', 'Gplv2', 'GPL 2'))
+    entries = replace_content(entries, ('code license', 'assets license'), 'GPL-2', ('GPL v2 or later', 'GPL 2+', 'GPL v2+', 'GPL version 2 or later'))
+    entries = replace_content(entries, ('code license', 'assets license'), 'GPL-3.0', ('GPLv3', )) # for LGW GPLv3 would be the correct writing
+    entries = replace_content(entries, ('code license', 'assets license'), 'GPL-3', ('GPL v3', 'GNU GPL v3', 'GPL 3'))
+    entries = replace_content(entries, ('code license', 'assets license'), 'GPL-3', ('GPL v3+', 'GPL v.3 or later', 'GPL v3 or later'))
+    entries = replace_content(entries, ('code license', 'assets license'), 'Public domain', ('public domain', 'Public Domain'))
+    entries = replace_content(entries, ('code license', 'assets license'), 'zlib', ('zlib/libpng license', 'Zlib License'))
+    entries = replace_content(entries, ('code license', 'assets license'), 'BSD', ('Original BSD License', ))
+    entries = replace_content(entries, ('code license', 'assets license'), 'CC-BY-SA-3.0', ('Creative Commons Attribution-ShareAlike 3.0 Unported License', 'CC-BY-SA 3.0', 'CC BY-SA 3.0'))
+    entries = replace_content(entries, ('code license', 'assets license'), 'CC-BY-SA', ('CC BY-SA',))
+    entries = replace_content(entries, ('code license', 'assets license'), 'MIT', ('MIT License', 'MIT"'))
+    entries = replace_content(entries, 'platform', 'macOS', ('Mac', ))
+    entries = remove_prefix_suffix(entries, ('code language', 'developer'), (), ('[3]', '[2]', '[1]'))
+    entries = ignore_content(entries, 'code language', ('HTML5', 'HTML', 'English', 'XML', 'WML'))
+    entries = replace_content(entries, 'code language', 'Lua', ('lua', 'LUA'))
+    entries = remove_prefix_suffix(entries, 'genre', (), ('game', 'games'))
+    entries = lower_case_content(entries, 'genre')
+    entries = replace_content(entries, 'genre', 'platform', ('platformer', ))
+    entries = replace_content(entries, 'genre', 'role playing', ('rpg', ))
+    entries = replace_content(entries, 'genre', 'first person, shooter', ('fps', ))
+    entries = replace_content(entries, 'genre', 'real time, strategy', ('rts',))
+    entries = replace_content(entries, 'genre', 'turn based, strategy', ('tbs',))
+    entries = ignore_content(entries, 'categories', ('GPL', 'C++', 'C', 'ECMAScript', 'Python', 'Java', 'CC BY-SA', 'Lua', 'LGPL', 'CC-BY', 'BSD', 'MIT', 'Qt', 'SDL', 'OpenGL', 'Pygame', 'PD', 'GLUT', 'Haskell', 'Allegro', 'Ruby', 'Zlib/libpng', 'OpenAL', 'Perl', 'Free Pascal', 'LÖVE', 'HTML5', 'Id Tech 1'))
+    entries = replace_content(entries, 'library', 'pygame', ('Pygame', ))
+    entries = replace_content(entries, 'library', 'Qt', ('QT', ))
+    entries = ignore_content(entries, 'library', ('C++', 'Lua', 'Mozilla Firefox'))
+    entries = ignore_nonnumbers(entries, 'last active')
+    entries = ignore_content(entries, 'last active', ('2019', ))
+    entries = ignore_content(entries, 'platform', ('DOS', ))
+
+
+    # list for every unique field
+    print('\nfield contents after')
+    fields = sorted(list(unique_fields - set(('description', 'external links', 'dev home', 'forum', 'home', 'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name', 'repo', 'Release date', 'categories'))))
+    for field in fields:
+        content = [entry[field] for entry in entries if field in entry]
+        # flatten
+        flat_content = []
+        for c in content:
+            if isinstance(c, list):
+                flat_content.extend(c)
+            else:
+                flat_content.append(c)
+        statistics = utils.unique_elements_and_occurrences(flat_content)
+        print('{}: {}'.format(field, ', '.join(statistics)))
+
+    # save entries
+    text = json.dumps(entries, indent=1)
+    utils.write_text(cleaned_entries_file, text)
+
+
+if __name__ == "__main__":
+
+    # stage one
+    # download_lgw_content()
+
+    # stage two
+    # parse_lgw_content()
+
+    # stage three
+    clean_lgw_content()
--- a/code/libregamewiki_synchronization.py
+++ b/code/libregamewiki_synchronization.py
@ -0,0 +1,279 @@
+"""
+Once data from libregamewiki is imported, synchronize with our database, i.e. identify the entries both have in common,
+estimate the differences in the entries both have in common, suggest to add the entries they have not in common to each
+other.
+
+unique imported fields: 'assets license', 'categories', 'code language', 'code license', 'developer', 'engine', 'genre', 'library', 'linux-packages', 'name', 'platform'
+mandatory imported fields: 'categories', 'name'
+
+Mapping lgw -> ours
+assets license -> assets license
+categories -> keywords
+code language -> code language
+code license -> code license
+developer -> free text (info)
+engine -> code dependencies
+genre -> keywords
+library -> code dependencies
+linux-packages - > free text (info)
+name -> name
+platform -> platform
+
+TODO also ignore our rejected entries
+"""
+
+import json
+import os
+from utils import constants, utils, osg
+
+
+lgw_name_aliases = {'Eat the Whistle': 'Eat The Whistle', 'Scorched 3D': 'Scorched3D', 'Blob Wars Episode 1 : Metal Blob Solid': 'Blobwars: Metal Blob Solid', 'Adventure': 'Colossal Cave Adventure',
+                     'Liquid War 6': 'Liquid War', 'Gusanos': 'GUSANOS', 'Corewars': 'Core War', 'FLARE': 'Flare', 'Vitetris': 'vitetris', 'Powder Toy': 'The Powder Toy', 'Asylum': 'SDL Asylum',
+                     'Atanks': 'Atomic Tanks', 'HeXon': 'heXon', 'Unnethack': 'UnNetHack', 'Nova Pinball': 'NOVA PINBALL', 'Jump n Bump': "Jump'n'Bump", 'Blades of Exile': 'Classic Blades of Exile',
+                    'Colobot': 'Colobot: Gold Edition', 'Dead Justice': 'Cat Mother Dead Justice', 'FreeDink': 'GNU FreeDink', 'FRaBs': 'fRaBs', 'Harmonist': 'Harmonist: Dayoriah Clan Infiltration', 'Iris2 3D Client - for Ultima Online': 'Iris2',
+                    'Java Classic Role Playing Game': 'jClassicRPG', 'Osgg': 'OldSkool Gravity Game', 'PyRacerz': 'pyRacerz', 'Starfighter': 'Project: Starfighter',
+                    'TORCS': 'TORCS, The Open Racing Car Simulator', 'Vertigo (game)': 'Vertigo', 'XInvaders3D': 'XInvaders 3D', 'LambdaRogue': 'LambdaRogue: The Book of Stars', 'Maniadrive': 'ManiaDrive',
+                    'Which Way Is Up': 'Which Way Is Up?'}
+lgw_ignored_entries = ['Hetris', '8 Kingdoms', 'Antigravitaattori', 'Arena of Honour', 'Arkhart', 'Ascent of Justice', 'Balazar III', 'Balder3D', 'Barbie Seahorse Adventures', 'Barrage', 'Gnome Batalla Naval', 'Blocks',
+                 'Brickshooter', 'Bweakfwu', 'Cheese Boys', 'Clippers', 'Codewars', 'CRAFT: The Vicious Vikings', 'DQM', 'EmMines', 'Eskimo-run', 'Feuerkraft', 'Fight or Perish', 'Flatland', 'Forest patrol', 'Free Reign', 'GalaxyMage',
+                 'Gloss', 'GRUB Invaders', 'Howitzer Skirmish', 'Imperium: Sticks', 'Interstate Outlaws', 'GNOME Games', 'KDE Games', 'LegacyClone', 'Memonix', 'Ninjapix', 'Neverputt', 'Militia Defense', 'Sudoku86',
+                       'Terminal Overload release history', 'Scions of Darkness', 'Sedtris', 'SilChess', 'SSTPong', 'Tesseract Trainer', 'TunnelWars', 'The Fortress']
+
+licenses_map = {'GPLv2': 'GPL-2.0', 'GPLv2+': 'GPL-2.0', 'GPLv3': 'GPL-3.0', 'GPLv3+': 'GPL-3.0'}
+
+
+def compare_sets(a, b, name, limit=None):
+    """
+
+    :param a:
+    :param b:
+    :param name:
+    :return:
+    """
+    p = ''
+    if not isinstance(a, set):
+        a = set(a)
+    if not isinstance(b, set):
+        b = set(b)
+    d = sorted(list(a - b))
+    if d and limit != 'notus':
+        p += ' {} : us :  {}\n'.format(name, ', '.join(d))
+    d = sorted(list(b - a))
+    if d and limit != 'notthem':
+        p += ' {} : them : {}\n'.format(name, ', '.join(d))
+    return p
+
+
+if __name__ == "__main__":
+
+    # some parameter
+    similarity_threshold = 0.8
+    maximal_newly_created_entries = 40
+
+    # paths
+    lgw_import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
+    lgw_entries_file = os.path.join(lgw_import_path, '_lgw.cleaned.json')
+
+    # import lgw import
+    text = utils.read_text(lgw_entries_file)
+    lgw_entries = json.loads(text)
+
+    # eliminate the ignored entries
+    _ = [x['name'] for x in lgw_entries if x['name'] in lgw_ignored_entries] # those that will be ignored
+    _ = set(lgw_ignored_entries) - set(_) # those that shall be ignored minus those that will be ignored
+    if _:
+        print('Can un-ignore {}'.format(_))
+    lgw_entries = [x for x in lgw_entries if x['name'] not in lgw_ignored_entries]
+
+    # perform name and code language replacements
+    _ = [x['name'] for x in lgw_entries if x['name'] in lgw_name_aliases.keys()] # those that will be renamed
+    _ = set(lgw_name_aliases.keys()) - set(_) # those that shall be renamed minus those that will be renamed
+    if _:
+        print('Can un-rename {}'.format(_))
+    for index, lgw_entry in enumerate(lgw_entries):
+        if lgw_entry['name'] in lgw_name_aliases:
+            lgw_entry['name'] = lgw_name_aliases[lgw_entry['name']]
+        if 'code language' in lgw_entry:
+            languages = lgw_entry['code language']
+            h = []
+            for l in languages:
+                for g in ('/', 'and'):
+                    if g in l:
+                        l = l.split(g)
+                        l = [x.strip() for x in l]
+                if type(l) == str:
+                    l = [l]
+                h.extend(l)
+            languages = h
+            if languages:
+                lgw_entry['code language'] = languages
+            else:
+                del lgw_entry['code language']
+        lgw_entries[index] = lgw_entry
+
+    # check for unique field names
+    unique_fields = set()
+    for lgw_entry in lgw_entries:
+        unique_fields.update(lgw_entry.keys())
+    print('unique lgw fields: {}'.format(sorted(list(unique_fields))))
+
+    # which fields are mandatory
+    mandatory_fields = unique_fields.copy()
+    for lgw_entry in lgw_entries:
+        remove_fields = [field for field in mandatory_fields if field not in lgw_entry]
+        mandatory_fields  -= set(remove_fields)
+    print('mandatory lgw fields: {}'.format(sorted(list(mandatory_fields ))))
+
+    # read our database
+    our_entries = osg.assemble_infos()
+    print('{} entries with us'.format(len(our_entries)))
+
+    # just the names
+    lgw_names = set([x['name'] for x in lgw_entries])
+    our_names = set([x['name'] for x in our_entries])
+    common_names = lgw_names & our_names
+    lgw_names -= common_names
+    our_names -= common_names
+    print('{} in both, {} only in LGW, {} only with us'.format(len(common_names), len(lgw_names), len(our_names)))
+
+    # find similar names among the rest
+    print('similar names')
+    for lgw_name in lgw_names:
+        for our_name in our_names:
+            if osg.name_similarity(lgw_name, our_name) > similarity_threshold:
+                print('{} - {}'.format(lgw_name, our_name))
+
+    newly_created_entries = 0
+    # iterate over their entries
+    print('\n')
+    for lgw_entry in lgw_entries:
+        lgw_name = lgw_entry['name']
+        
+        is_included = False
+        for our_entry in our_entries:
+            our_name = our_entry['name']
+
+            # find those that entries in LGW that are also in our database and compare them
+            if lgw_name == our_name:
+                is_included = True
+                # a match, check the fields
+                name = lgw_name
+
+                p = ''
+
+                # platform
+                key = 'platform'
+                p += compare_sets(lgw_entry.get(key, []), our_entry.get(key, []), key)
+
+                # categories/keywords
+                #p += compare_sets(lgw_entry.get('categories', []), our_entry.get('keywords', []), 'categories/keywords')
+
+                # code language
+                key = 'code language'
+                p += compare_sets(lgw_entry.get(key, []), our_entry.get(key, []), key)
+
+                # code license (GPLv2)
+                key = 'code license'
+                p += compare_sets(lgw_entry.get(key, []), our_entry.get(key, []), key)
+
+                # engine, library
+                p += compare_sets(lgw_entry.get('engine', []), our_entry.get('code dependencies', []), 'code dependencies', 'notthem')
+                p += compare_sets(lgw_entry.get('library', []), our_entry.get('code dependencies', []), 'code dependencies', 'notthem')
+                p += compare_sets(lgw_entry.get('engine', [])+lgw_entry.get('library', []), our_entry.get('code dependencies', []), 'engine/library', 'notus')
+
+                # assets license
+                key = 'assets license'
+                p += compare_sets(lgw_entry.get(key, []), our_entry.get(key, []), key)
+
+                # TODO developer (need to introduce a field with us first)
+
+                if p:
+                    print('{}\n{}'.format(name, p))
+
+        if not is_included:
+            # a new entry, that we have never seen, maybe we should make an entry of our own
+
+            if newly_created_entries >= maximal_newly_created_entries:
+                continue
+
+            # determine file name
+            print('create new entry for {}'.format(lgw_name))
+            file_name = osg.canonical_entry_name(lgw_name) + '.md'
+            target_file = os.path.join(constants.entries_path, file_name)
+            if os.path.isfile(target_file):
+                print('warning: file {} already existing, save under slightly different name'.format(file_name))
+                target_file = os.path.join(constants.entries_path, file_name[:-3] + '-duplicate.md')
+                if os.path.isfile(target_file):
+                    continue # just for safety reasons
+
+            # add name
+            entry = '# {}\n\n'.format(lgw_name)
+
+            # add empty description
+            entry += '_{}_\n\n'.format(lgw_entry['description'])
+
+            # empty home (mandatory on our side)
+            home = lgw_entry.get('home', None)
+            dev_home = lgw_entry.get('dev home', None)
+            entry += '- Home: {}\n'.format(', '.join([x for x in [home, dev_home] if x]))
+
+            # state mandatory on our side
+            entry += '- State: \n'
+
+            # platform, if existing
+            if 'platform' in lgw_entry:
+                entry += '- Platform: {}\n'.format(', '.join(lgw_entry['platform']))
+
+            # keywords (genre) (also mandatory)
+            keywords = lgw_entry.get('genre', [])
+            if 'assets license' in lgw_entry:
+                keywords.append('open content')
+            keywords.sort(key=str.casefold)
+            if keywords:
+                entry += '- Keywords: {}\n'.format(', '.join(keywords))
+
+            # code repository (mandatory but not scraped from lgw)
+            entry += '- Code repository: {}\n'.format(lgw_entry.get('repo', ''))
+
+            # code language, mandatory on our side
+            languages = lgw_entry.get('code language', [])
+            languages.sort(key=str.casefold)
+            entry += '- Code language: {}\n'.format(', '.join(languages))
+
+            # code license, mandatory on our side
+            licenses = lgw_entry.get('code license', [])
+            licenses = [licenses_map[x] if x in licenses_map else x for x in licenses]
+            licenses.sort(key=str.casefold)
+            entry += '- Code license: {}\n'.format(', '.join(licenses))
+
+            # code dependencies (only if existing)
+            code_dependencies = lgw_entry.get('engine', [])
+            code_dependencies.extend(lgw_entry.get('library', []))
+            code_dependencies.sort(key=str.casefold)
+            if code_dependencies:
+                entry += '- Code dependencies: {}\n'.format(', '.join(code_dependencies))
+
+            # assets licenses (only if existing)
+            if 'assets license' in lgw_entry:
+                licenses = lgw_entry.get('assets license', [])
+                licenses = [licenses_map[x] if x in licenses_map else x for x in licenses]
+                licenses.sort(key=str.casefold)
+                entry += '- Assets license: {}\n'.format(', '.join(licenses))
+
+            # external links
+            ext_links = lgw_entry['external links']
+            if ext_links:
+                entry += '\nLinks: {}\n'.format('\n '.join(['{}: {}'.format(x[1], x[0]) for x in ext_links]))
+
+            # free text
+            if 'developer' in lgw_entry:
+                entry += '\nDeveloper: {}\n'.format(', '.join(lgw_entry['developer']))
+            if 'linux-packages' in lgw_entry:
+                entry += '{}\n'.format(lgw_entry['linux-packages'])
+
+            # write ## Building
+            entry += '\n## Building\n'
+
+            # finally write to file
+            utils.write_text(target_file, entry)
+            newly_created_entries += 1
--- a/code/list_python_external_imports.py
+++ b/code/list_python_external_imports.py
@ -0,0 +1,91 @@
+"""
+Where no requirements.txt or setup.py or other information is given for a Python project, get an idea of the external dependencies
+by parsing the Python files and looking for import statements.
+"""
+
+import re
+from utils.utils import *
+
+
+def local_module(module_base, file_path, module):
+    """
+
+    """
+    module = module.split('.')
+    module[-1] += '.py'
+    pathA = os.path.join(module_base, *module)
+    pathB = os.path.join(file_path, *module)
+    return os.path.exists(pathA) or os.path.exists(pathB)
+
+if __name__ == "__main__":
+
+    system_libraries = {'__builtin__', '.', '..', '*', 'argparse', 'array', 'os', 'copy', 'codecs', 'collections', 'ctypes', 'pickle', 'cPickle', 'datetime', 'decimal', 'email', 'functools',
+        'io', 'itertools', 'json', 'httplib', 'glob', 'math', 'cmath', 'heapq', 'md5', 'operator', 'random', 're', 'sha', 'shutil', 'smtplib', 'socket', 'string', 'struct', 'subprocess',
+        'sys', 'thread', 'threading', 'time', 'traceback', 'types', 'urllib', 'urllib2', 'urlparse', 'unittest', 'yaml', 'yaml3', 'zlib', 'zipfile', '__future__'}
+    regex_import = re.compile(r"^\s*import (.*)", re.MULTILINE)
+    regex_from = re.compile(r"^\s*from (.*) import (.*)", re.MULTILINE)
+    regex_comment = re.compile(r"(#.*)$", re.MULTILINE)
+    regex_as = re.compile(r"(as.*)$", re.MULTILINE)
+
+    # modify these locations
+    root_folder = r''
+    module_base = r''
+
+    # get all *.py files below the root_folder
+    python_files = []
+    setup_files = []
+    for dirpath, dirnames, filenames in os.walk(root_folder):
+        for file in ('setup.py', 'requirements.txt'):
+            if file in filenames:
+                setup_files.append(os.path.join(dirpath, file))
+        filenames = [x for x in filenames if x.endswith('.py') or x.endswith('.pyw') or x.endswith('.cry')]
+        if filenames:
+            filenames = [os.path.join(dirpath, x) for x in filenames]
+            python_files.extend(filenames)
+    print('found {} Python files'.format(len(python_files)))
+    if setup_files:
+        print('found setup files: {}'.format(', '.join(setup_files)))
+
+    # iterate over all these files
+    imports = []
+    for file in python_files:
+
+        # get file path
+        file_path = os.path.split(file)[0]
+
+        # read file content
+        content = read_text(file)
+
+        # remove comments
+        content = regex_comment.sub('', content)
+
+        # remove as clauses
+        content = regex_as.sub('', content)
+
+        # search for "import .." statements
+        matches = regex_import.findall(content)
+
+        for match in matches:
+            modules = match.split(',') # split if more
+            for module in modules:
+                module = module.strip()
+                if not local_module(module_base, file_path, module):
+                    imports.append(module)
+
+        # search for "from .. import .." statements
+        matches = regex_from.findall(content)
+
+        for match in matches:
+            module = match[0] # only the from part
+            module = module.strip()
+            if not local_module(module_base, file_path, module):
+                imports.append(module)
+
+    # throw out duplicates
+    imports = list(set(imports) - system_libraries)
+
+    # sort
+    imports.sort()
+
+    # display
+    print('\n'.join(imports))
--- a/code/maintenance.py
+++ b/code/maintenance.py
@ -0,0 +1,996 @@
+"""
+    Runs a series of maintenance operations on the collection of entry files, updating the table of content files for
+    each category as well as creating a statistics file.
+
+    Counts the number of records each sub-folder and updates the overview.
+    Sorts the entries in the contents files of each sub folder alphabetically.
+
+    This script runs with Python 3, it could also with Python 2 with some minor tweaks probably.
+"""
+
+import urllib.request
+import http.client
+import datetime
+import json
+import textwrap
+import os
+import re
+from utils import constants as c, utils, osg
+
+
+def update_readme_and_tocs(infos):
+    """
+    Recounts entries in sub categories and writes them to the readme.
+    Also updates the _toc files in the categories directories.
+
+    Note: The Readme must have a specific structure at the beginning, starting with "# Open Source Games" and ending
+    on "A collection.."
+
+    Needs to be performed regularly.
+    """
+    print('update readme and toc files')
+
+    # completely delete content of toc path
+    for file in os.listdir(c.tocs_path):
+        os.remove(os.path.join(c.tocs_path, file))
+
+    # read readme
+    readme_file = os.path.join(c.root_path, 'README.md')
+    readme_text = utils.read_text(readme_file)
+
+    # compile regex for identifying the building blocks in the readme
+    regex = re.compile(r"(.*?)(\[comment\]: # \(start.*?end of autogenerated content\))(.*)", re.DOTALL)
+
+    # apply regex
+    matches = regex.findall(readme_text)
+    if len(matches) != 1:
+        raise RuntimeError('readme file has invalid structure')
+    matches = matches[0]
+    start = matches[0]
+    end = matches[2]
+
+    tocs_text = ''
+
+    # split infos
+    infos_games, infos_tools, infos_frameworks, infos_libraries = osg.split_infos(infos)
+
+    # create games, tools, frameworks, libraries tocs
+    title = 'Games'
+    file = '_games.md'
+    tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(infos_games))
+    create_toc(title, file, infos_games)
+
+    title = 'Tools'
+    file = '_tools.md'
+    tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(infos_tools))
+    create_toc(title, file, infos_tools)
+
+    title = 'Frameworks'
+    file = '_frameworks.md'
+    tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(infos_frameworks))
+    create_toc(title, file, infos_frameworks)
+
+    title = 'Libraries'
+    file = '_libraries.md'
+    tocs_text += '**[{}](entries/tocs/{}#{})** ({})\n'.format(title, file, title, len(infos_libraries))
+    create_toc(title, file, infos_libraries)
+
+    # create by category
+    categories_text = []
+    for keyword in osg.recommended_keywords:
+        infos_filtered = [x for x in infos if keyword in x['keywords']]
+        title = keyword.capitalize()
+        name = keyword.replace(' ', '-')
+        file = '_{}.md'.format(name)
+        categories_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(title, file, name, len(infos_filtered)))
+        create_toc(title, file, infos_filtered)
+    categories_text.sort()
+    tocs_text += '\nBy category: {}\n'.format(', '.join(categories_text))
+
+    # create by platform
+    platforms_text = []
+    for platform in osg.valid_platforms:
+        infos_filtered = [x for x in infos if platform in x.get('platform', [])]
+        title = platform
+        name = platform.lower()
+        file = '_{}.md'.format(name)
+        platforms_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(title, file, name, len(infos_filtered)))
+        create_toc(title, file, infos_filtered)
+    tocs_text += '\nBy platform: {}\n'.format(', '.join(platforms_text))
+
+    # insert new text in the middle (the \n before the second comment is necessary, otherwise Markdown displays it as part of the bullet list)
+    text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + tocs_text + "\n[comment]: # (end of autogenerated content)" + end
+
+    # write to readme
+    utils.write_text(readme_file, text)
+
+
+def create_toc(title, file, entries):
+    """
+
+    """
+    # file path
+    toc_file = os.path.join(c.tocs_path, file)
+
+    # header line
+    text = '[comment]: # (autogenerated content, do not edit)\n# {}\n\n'.format(title)
+
+    # assemble rows
+    rows = []
+    for entry in entries:
+        rows.append('- **[{}]({})** ({})'.format(entry['name'], '../' + entry['file'], ', '.join(entry['code language'] + entry['code license'] + entry['state'])))
+
+    # sort rows (by title)
+    rows.sort(key=str.casefold)
+
+    # add to text
+    text += '\n'.join(rows)
+
+    # write to toc file
+    utils.write_text(toc_file, text)
+
+
+def check_validity_external_links():
+    """
+    Checks all external links it can find for validity. Prints those with non OK HTTP responses. Does only need to be run
+    from time to time.
+    """
+
+    # TODO check if links are occurring in multiple entries, first go through all entries and find all links, then check links for multiple entries, then check links, follow redirects
+
+    print("check external links (can take a while)")
+
+    # regex for finding urls (can be in <> or in ]() or after a whitespace
+    regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n\)]")
+    # regex = re.compile(r"[\s\n<(](http://.*?)[\s\n>)]")
+
+    # count
+    number_checked_links = 0
+
+    # ignore the following urls (they give false positives here)
+    ignored_urls = ('https://git.tukaani.org/xz.git')
+
+    # iterate over all entries
+    for _, entry_path, content in osg.entry_iterator():
+
+            # apply regex
+            matches = regex.findall(content)
+
+            # for each match
+            for match in matches:
+
+                # for each possible clause
+                for url in match:
+
+                    # if there was something (and not a sourceforge git url)
+                    if url and not url.startswith('https://git.code.sf.net/p/') and url not in ignored_urls:
+                        try:
+                            # without a special header, frequent 403 responses occur
+                            req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'})
+                            urllib.request.urlopen(req)
+                        except urllib.error.HTTPError as e:
+                            print("{}: {} - {}".format(os.path.basename(entry_path), url, e.code))
+                        except urllib.error.URLError as e:
+                            print("{}: {} - {}".format(os.path.basename(entry_path), url, e.reason))
+                        except http.client.RemoteDisconnected:
+                            print("{}: {} - disconnected without response".format(os.path.basename(entry_path), url))
+
+                        number_checked_links += 1
+
+                        if number_checked_links % 50 == 0:
+                            print("{} links checked".format(number_checked_links))
+
+    print("{} links checked".format(number_checked_links))
+
+
+def check_template_leftovers():
+    """
+    Checks for template leftovers.
+
+    Should be run only occasionally.
+    """
+
+    print('check for template leftovers')
+
+    # load template and get all lines
+    text = utils.read_text(os.path.join(c.root_path, 'template.md'))
+    text = text.split('\n')
+    check_strings = [x for x in text if x and not x.startswith('##')]
+
+    # iterate over all entries
+    for _, entry_path, content in osg.entry_iterator():
+
+        for check_string in check_strings:
+            if content.find(check_string) >= 0:
+                raise RuntimeError('{}: found {}'.format(os.path.basename(entry_path), check_string))
+
+
+def fix_entries():
+    """
+    Fixes the keywords, code dependencies, build systems, .. entries, mostly by automatically sorting them.
+    """
+
+    keyword_synonyms = {'RTS': ('real time', 'strategy'), 'realtime': 'real time'}
+
+    # TODO also sort other fields, only read once and then do all, move to separate file
+    # example Javascript to JavaScript and then add whenever the known languages check hits
+
+    print('fix entries')
+
+    # keywords
+    regex = re.compile(r"(.*)- Keywords:([^\n]*)(.*)", re.DOTALL)
+
+    # iterate over all entries
+    for entry, entry_path, content in osg.entry_iterator():
+
+        # match with regex
+        matches = regex.findall(content)
+        if len(matches) != 1:
+            raise RuntimeError('Could not find keywords in entry "{}"'.format(entry))
+
+        match = matches[0]
+
+        # get elements out, split, strip, delete duplicates
+        elements = match[1].split(',')
+        elements = [x.strip() for x in elements]
+        elements = list(set(elements))
+
+        # get category out
+        for keyword in osg.recommended_keywords:
+            if keyword in elements:
+                elements.remove(keyword)
+                category = keyword
+                break
+
+        # special treatments here
+        elements = [x if x != 'TBS' and x != 'TB' else 'turn based' for x in elements]
+        elements = [x if x != 'RTS' else 'real time' for x in elements]
+        elements = [x if x != 'MMO' else 'massive multiplayer online' for x in elements]
+        elements = [x if x != 'MMO' else 'multiplayer online' for x in elements]
+        elements = [x if x != 'SP' else 'singleplayer' for x in elements]
+        elements = [x if x != 'MP' else 'multiplayer' for x in elements]
+        elements = [x if x != 'engine' else 'game engine' for x in elements]
+        elements = [x if x != 'rpg' else 'role playing' for x in elements]
+        elements = [x if x != 'turn based' else 'turn-based' for x in elements]
+        for keyword in ('browser', 'misc', 'tools'):
+            if keyword in elements:
+                elements.remove(keyword)
+
+        # sort
+        elements.sort(key=str.casefold)
+
+        # add category
+        elements.insert(0, category)
+
+        keywords = '- Keywords: {}'.format(', '.join(elements))
+
+        new_content = match[0] + keywords + match[2]
+
+        if new_content != content:
+            # write again
+            utils.write_text(entry_path, new_content)
+
+    # code dependencies
+    regex = re.compile(r"(.*)- Code dependencies:([^\n]*)(.*)", re.DOTALL)
+
+    # iterate over all entries
+    for entry, entry_path, content in osg.entry_iterator():
+        # match with regex
+        matches = regex.findall(content)
+
+        if not matches:
+            # no code dependencies given
+            continue
+
+        match = matches[0]
+
+        # get code dependencies out, split, strip, delete duplicates
+        elements = match[1].split(',')
+        elements = [x.strip() for x in elements]
+        elements = list(set(elements))
+
+        # special treatments here
+        elements = [x if x != 'Blender' else 'Blender game engine' for x in elements]
+        elements = [x if x.lower() != 'libgdx' else 'libGDX' for x in elements]
+        elements = [x if x != 'SDL 2' else 'SDL2' for x in elements]
+        elements = [x if x.lower() != "ren'py" else "Ren'Py" for x in elements]
+
+        # sort
+        elements.sort(key=str.casefold)
+
+        code_dependencies = '- Code dependencies: {}'.format(', '.join(elements))
+
+        new_content = match[0] + code_dependencies + match[2]
+
+        if new_content != content:
+            # write again
+            utils.write_text(entry_path, new_content)
+
+    # build systems
+    regex = re.compile(r"(.*)- Build system:([^\n]*)(.*)", re.DOTALL)
+
+    # iterate over all entries
+    for entry, entry_path, content in osg.entry_iterator():
+        # match with regex
+        matches = regex.findall(content)
+
+        if not matches:
+            # no build system given
+            continue
+
+        match = matches[0]
+
+        # get code dependencies out, split, strip, delete duplicates
+        elements = match[1].split(',')
+        elements = [x.strip() for x in elements]
+        elements = list(set(elements))
+
+        # special treatments here
+
+        # sort
+        elements.sort(key=str.casefold)
+
+        build_system = '- Build system: {}'.format(', '.join(elements))
+
+        new_content = match[0] + build_system + match[2]
+
+        if new_content != content:
+            # write again
+            utils.write_text(entry_path, new_content)
+
+
+def update_statistics(infos):
+    """
+    Generates the statistics page.
+
+    Should be done every time the entries change.
+    """
+
+    print('update statistics')
+
+    # start the page
+    statistics_file = os.path.join(c.root_path, 'statistics.md')
+    statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
+
+    # total number
+    number_entries = len(infos)
+    rel = lambda x: x / number_entries * 100 # conversion to percent
+
+    statistics += 'analyzed {} entries on {}\n\n'.format(number_entries, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
+
+    # State (beta, mature, inactive)
+    statistics += '## State\n\n'
+
+    number_state_beta = sum(1 for x in infos if 'beta' in x['state'])
+    number_state_mature = sum(1 for x in infos if 'mature' in x['state'])
+    number_inactive = sum(1 for x in infos if 'inactive' in x)
+    statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive))
+
+    if number_inactive > 0:
+        entries_inactive = [(x['name'], x['inactive']) for x in infos if 'inactive' in x]
+        entries_inactive.sort(key=lambda x: str.casefold(x[0]))  # first sort by name
+        entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
+        entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
+        statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n'
+
+    # Language
+    statistics += '## Code Languages\n\n'
+    field = 'code language'
+
+    # those without language tag
+    # TODO the language tag is now an essential field, this cannot happen anymore
+    # number_no_language = sum(1 for x in infois if field not in x)
+    # if number_no_language > 0:
+    #     statistics += 'Without language tag: {} ({:.1f}%)\n\n'.format(number_no_language, rel(number_no_language))
+    #     entries_no_language = [x['name'] for x in infois if field not in x]
+    #     entries_no_language.sort()
+    #     statistics += ', '.join(entries_no_language) + '\n\n'
+
+    # get all languages together
+    languages = []
+    for info in infos:
+        if field in info:
+            languages.extend(info[field])
+
+    unique_languages = set(languages)
+    unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages]
+    unique_languages.sort(key=lambda x: str.casefold(x[0])) # first sort by name
+    unique_languages.sort(key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first)
+    unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_languages]
+    statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n'
+
+    # Licenses
+    statistics += '## Code licenses\n\n'
+    field = 'code license'
+
+    # those without license
+    number_no_license = sum(1 for x in infos if field not in x)
+    if number_no_license > 0:
+        statistics += 'Without license tag: {} ({:.1f}%)\n\n'.format(number_no_license, rel(number_no_license))
+        entries_no_license = [x['name'] for x in infos if field not in x]
+        entries_no_license.sort()
+        statistics += ', '.join(entries_no_license) + '\n\n'
+
+    # get all licenses together
+    licenses = []
+    for info in infos:
+        if field in info:
+            licenses.extend(info[field])
+
+    unique_licenses = set(licenses)
+    unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses]
+    unique_licenses.sort(key=lambda x: str.casefold(x[0])) # first sort by name
+    unique_licenses.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
+    unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_licenses]
+    statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n'
+
+    # Keywords
+    statistics += '## Keywords\n\n'
+    field = 'keywords'
+
+    # get all keywords together
+    keywords = []
+    for info in infos:
+        if field in info:
+            keywords.extend(info[field])
+    # reduce those starting with "inspired by"
+    keywords = [x if not x.startswith('inspired by') else 'inspired' for x in keywords]
+    # reduce those starting with "multiplayer"
+    keywords = [x if not x.startswith('multiplayer') else 'multiplayer' for x in keywords]
+
+    unique_keywords = set(keywords)
+    unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords]
+    unique_keywords.sort(key=lambda x: str.casefold(x[0])) # first sort by name
+    unique_keywords.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
+    unique_keywords = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_keywords]
+    statistics += '##### Keywords frequency\n\n' + '\n'.join(unique_keywords) + '\n\n'
+
+    # no download or play field
+    statistics += '## Entries without download or play fields\n\n'
+
+    entries = []
+    for info in infos:
+        if 'download' not in info and 'play' not in info:
+            entries.append(info['name'])
+    entries.sort(key=str.casefold)
+    statistics +=  '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'
+
+    # code hosted not on github, gitlab, bitbucket, launchpad, sourceforge
+    popular_code_repositories = ('github.com', 'gitlab.com', 'bitbucket.org', 'code.sf.net', 'code.launchpad.net')
+    statistics += '## Entries with a code repository not on a popular site\n\n'
+
+    entries = []
+    field = 'code repository'
+    for info in infos:
+        if field in info:
+            popular = False
+            for repo in info[field]:
+                for popular_repo in popular_code_repositories:
+                    if popular_repo in repo:
+                        popular = True
+                        break
+            # if there were repositories, but none popular, add them to the list
+            if not popular:
+                entries.append(info['name'])
+                # print(info[field])
+    entries.sort(key=str.casefold)
+    statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'
+
+    # Code dependencies
+    statistics += '## Code dependencies\n\n'
+    field = 'code dependencies'
+
+    # get all code dependencies together
+    code_dependencies = []
+    entries_with_code_dependency = 0
+    for info in infos:
+        if field in info:
+            code_dependencies.extend(info[field])
+            entries_with_code_dependency += 1
+    statistics += 'With code dependency field {} ({:.1f}%)\n\n'.format(entries_with_code_dependency, rel(entries_with_code_dependency))
+
+    unique_code_dependencies = set(code_dependencies)
+    unique_code_dependencies = [(l, code_dependencies.count(l) / len(code_dependencies)) for l in unique_code_dependencies]
+    unique_code_dependencies.sort(key=lambda x: str.casefold(x[0])) # first sort by name
+    unique_code_dependencies.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
+    unique_code_dependencies = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_code_dependencies]
+    statistics += '##### Code dependencies frequency\n\n' + '\n'.join(unique_code_dependencies) + '\n\n'
+
+    # Build systems:
+    statistics += '## Build systems\n\n'
+    field = 'build system'
+
+    # get all build systems together
+    build_systems = []
+    for info in infos:
+        if field in info:
+            build_systems.extend(info[field])
+
+    statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format(rel(len(build_systems)))
+
+    unique_build_systems = set(build_systems)
+    unique_build_systems = [(l, build_systems.count(l) / len(build_systems)) for l in unique_build_systems]
+    unique_build_systems.sort(key=lambda x: str.casefold(x[0])) # first sort by name
+    unique_build_systems.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
+    unique_build_systems = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_build_systems]
+    statistics += '##### Build systems frequency ({})\n\n'.format(len(build_systems)) + '\n'.join(unique_build_systems) + '\n\n'
+
+    # C, C++ projects without build system information
+    c_cpp_project_without_build_system = []
+    for info in infos:
+        if field not in info and ('C' in info['code language'] or 'C++' in info['code language']):
+            c_cpp_project_without_build_system.append(info['name'])
+    c_cpp_project_without_build_system.sort(key=str.casefold)
+    statistics += '##### C and C++ projects without build system information ({})\n\n'.format(len(c_cpp_project_without_build_system)) + ', '.join(c_cpp_project_without_build_system) + '\n\n'
+
+    # C, C++ projects with build system information but without CMake as build system
+    c_cpp_project_not_cmake = []
+    for info in infos:
+        if field in info and 'CMake' in info[field] and ('C' in info['code language'] or 'C++' in info['code language']):
+            c_cpp_project_not_cmake.append(info['name'])
+    c_cpp_project_not_cmake.sort(key=str.casefold)
+    statistics += '##### C and C++ projects with a build system different from CMake ({})\n\n'.format(len(c_cpp_project_not_cmake)) + ', '.join(c_cpp_project_not_cmake) + '\n\n'
+
+    # Platform
+    statistics += '## Platform\n\n'
+    field = 'platform'
+
+    # get all platforms together
+    platforms = []
+    for info in infos:
+        if field in info:
+            platforms.extend(info[field])
+
+    statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format(rel(len(platforms)))
+
+    unique_platforms = set(platforms)
+    unique_platforms = [(l, platforms.count(l) / len(platforms)) for l in unique_platforms]
+    unique_platforms.sort(key=lambda x: str.casefold(x[0])) # first sort by name
+    unique_platforms.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
+    unique_platforms = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_platforms]
+    statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n'
+
+    # write to statistics file
+    utils.write_text(statistics_file, statistics)
+
+
+def export_json(infos):
+    """
+    Parses all entries, collects interesting info and stores it in a json file suitable for displaying
+    with a dynamic table in a browser.
+    """
+
+    print('export to json for web display')
+
+    # make database out of it
+    db = {'headings': ['Game', 'Description', 'Download', 'State', 'Keywords', 'Source']}
+
+    entries = []
+    for info in infos:
+
+        # game & description
+        entry = ['{} (<a href="{}">home</a>, <a href="{}">entry</a>)'.format(info['name'], info['home'][0],
+            r'https://github.com/Trilarion/opensourcegames/blob/master/entries/' + info['file']),
+            textwrap.shorten(info['description'], width=60, placeholder='..')]
+
+        # download
+        field = 'download'
+        if field in info and info[field]:
+            entry.append('<a href="{}">Link</a>'.format(info[field][0]))
+        else:
+            entry.append('')
+
+        # state (field state is essential)
+        entry.append('{} / {}'.format(info['state'][0], 'inactive since {}'.format(info['inactive']) if 'inactive' in info else 'active'))
+
+        # keywords
+        field = 'keywords'
+        if field in info and info[field]:
+            entry.append(', '.join(info[field]))
+        else:
+            entry.append('')
+
+        # source
+        text = []
+        field = 'code repository'
+        if field in info and info[field]:
+            text.append('<a href="{}">Source</a>'.format(info[field][0]))
+        field = 'code language'
+        if field in info and info[field]:
+            text.append(', '.join(info[field]))
+        field = 'code license'
+        if field in info and info[field]:
+            text.append(info[field][0])
+        entry.append(' - '.join(text))
+
+        # append to entries
+        entries.append(entry)
+
+    # sort entries by game name
+    entries.sort(key=lambda x: str.casefold(x[0]))
+
+    db['data'] = entries
+
+    # output
+    json_path = os.path.join(c.entries_path, os.path.pardir, 'docs', 'data.json')
+    text = json.dumps(db, indent=1)
+    utils.write_text(json_path, text)
+
+
+def git_repo(repo):
+    """
+        Tests if a repo is a git repo, then returns the repo url, possibly modifying it slightly.
+    """
+
+    # generic (https://*.git) or (http://*.git) ending on git
+    if (repo.startswith('https://') or repo.startswith('http://')) and repo.endswith('.git'):
+        return repo
+
+    # for all others we just check if they start with the typical urls of git services
+    services = ['https://git.tuxfamily.org/', 'http://git.pond.sub.org/', 'https://gitorious.org/', 'https://git.code.sf.net/p/']
+    for service in services:
+        if repo.startswith(service):
+            return repo
+
+    if repo.startswith('git://'):
+        return repo
+
+    # the rest is ignored
+    return None
+
+
+def svn_repo(repo):
+    """
+    
+    """
+    if repo.startswith('https://svn.code.sf.net/p/'):
+        return repo
+
+    if repo.startswith('http://svn.uktrainsim.com/svn/'):
+        return repo
+
+    if repo is 'https://rpg.hamsterrepublic.com/source/wip':
+        return repo
+
+    if repo.startswith('http://svn.savannah.gnu.org/svn/'):
+        return repo
+
+    if repo.startswith('svn://'):
+        return repo
+
+    if repo.startswith('https://svn.icculus.org/') or repo.startswith('http://svn.icculus.org/'):
+        return repo
+    
+    # not svn
+    return None
+
+
+def hg_repo(repo):
+    """
+
+    """
+    if repo.startswith('https://bitbucket.org/') and not repo.endswith('.git'):
+        return repo
+
+    if repo.startswith('http://hg.'):
+        return repo
+
+    # not hg
+    return None
+
+
+def export_primary_code_repositories_json(infos):
+    """
+
+    """
+
+    print('export to json for local repository update')
+
+    primary_repos = {'git': [], 'svn': [], 'hg': []}
+    unconsumed_entries = []
+
+    # for every entry filter those that are known git repositories (add additional repositories)
+    field = 'code repository-raw'
+    for info in infos:
+        # if field 'Code repository' is available
+        if field in info:
+            consumed = False
+            repos = info[field]
+            if repos:
+                # split at comma
+                repos = repos.split(',')
+                # keep the first and all others containing "(+)"
+                additional_repos = [x for x in repos[1:] if "(+)" in x]
+                repos = repos[0:1]
+                repos.extend(additional_repos)
+                for repo in repos:
+                    # remove parenthesis and strip of white spaces
+                    repo = re.sub(r'\([^)]*\)', '', repo)
+                    repo = repo.strip()
+                    url = git_repo(repo)
+                    if url:
+                        primary_repos['git'].append(url)
+                        consumed = True
+                        continue
+                    url = svn_repo(repo)
+                    if url:
+                        primary_repos['svn'].append(url)
+                        consumed = True
+                        continue
+                    url = hg_repo(repo)
+                    if url:
+                        primary_repos['hg'].append(url)
+                        consumed=True
+                        continue
+
+            if not consumed:
+                unconsumed_entries.append([info['name'], info[field]])
+                # print output
+                if 'code repository' in info:
+                    print('Entry "{}" unconsumed repo: {}'.format(info['name'], info[field]))
+
+    # sort them alphabetically (and remove duplicates)
+    for k, v in primary_repos.items():
+        primary_repos[k] = sorted(set(v))
+
+    # statistics of gits
+    git_repos = primary_repos['git']
+    print('{} Git repositories'.format(len(git_repos)))
+    for domain in ('repo.or.cz', 'anongit.kde.org', 'bitbucket.org', 'git.code.sf.net', 'git.savannah', 'git.tuxfamily', 'github.com', 'gitlab.com', 'gitlab.com/osgames', 'gitlab.gnome.org'):
+        print('{} on {}'.format(sum(1 if domain in x else 0 for x in git_repos), domain))
+
+    # write them to tools/git
+    json_path = os.path.join(c.root_path, 'tools', 'archives.json')
+    text = json.dumps(primary_repos, indent=1)
+    utils.write_text(json_path, text)
+
+
+def export_git_code_repositories_json():
+    """
+
+    """
+
+    urls = []
+    field = 'code repository'
+
+    # for every entry, get all git
+    for info in infos:
+        # if field 'Code repository' is available
+        if field in info:
+            repos = info[field]
+            if repos:
+                # take the first
+                repo = repos[0]
+                url = git_repo(repo)
+                if url:
+                    urls.append(url)
+
+    # sort them alphabetically (and remove duplicates)
+    urls.sort()
+
+    # write them to tools/git
+    json_path = os.path.join(c.root_path, 'tools', 'git_repositories.json')
+    text = json.dumps(urls, indent=1)
+    utils.write_text(json_path, text)
+
+
+def sort_text_file(file, name):
+    """
+    Reads a text file, splits in lines, removes duplicates, sort, writes back.
+    """
+    text = utils.read_text(file)
+    text = text.split('\n')
+    text = sorted(list(set(text)), key=str.casefold)
+    print('{} contains {} items'.format(name, len(text)))
+    text = '\n'.join(text)
+    utils.write_text(file, text)
+
+
+def clean_backlog(stripped_game_urls):
+
+    # read backlog and split
+    file = os.path.join(c.root_path, 'tools', 'backlog.txt')
+    text = utils.read_text(file)
+    text = text.split('\n')
+
+    # remove those that are in stripped_game_urls
+    text = [x for x in text if utils.strip_url(x) not in stripped_game_urls]
+
+    # remove duplicates and sort
+    text = sorted(list(set(text)), key=str.casefold)
+    print('backlog contains {} items'.format(len(text)))
+
+    # join and save again
+    text = '\n'.join(text)
+    utils.write_text(file, text)
+
+
+def check_validity_backlog():
+    import requests
+
+    # read backlog and split
+    file = os.path.join(c.root_path, 'tools', 'backlog.txt')
+    text = utils.read_text(file)
+    urls = text.split('\n')
+    urls = [x.split(' ')[0] for x in urls]
+
+    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}
+    for url in urls:
+        try:
+            r = requests.get(url, headers=headers, timeout=5)
+        except Exception as e:
+            print('{} gave error: {}'.format(url, e))
+        else:
+            if r.status_code != requests.codes.ok:
+                print('{} returned status code: {}'.format(url, r.status_code))
+
+            if r.is_redirect or r.history:
+                print('{} redirected to {}, {}'.format(url, r.url, r.history))
+
+
+def update_inspirations(infos):
+    """
+
+    """
+
+    print('update inspirations')
+
+    inspirations_file = os.path.join(c.root_path, 'inspirations.md')
+    inspirations = '[comment]: # (partly autogenerated content, edit with care, read the manual before)\n# Inspirations\n\n'
+
+    # collect information
+    originals = {}
+    for info in infos:
+        name = info['name']
+        keywords = info['keywords']
+        ins = [x[12:] for x in keywords if x.startswith('inspired by ')]
+        if ins:
+            ins = ins[0].split(' + ')
+            for original in ins:
+                if original in originals:
+                    originals[original].append(name)
+                else:
+                    originals[original] = [name]
+
+    # number of inspirations
+    inspirations += '{} games listed as inspirations.\n\n'.format(len(originals))
+
+    # iterate through originals alphabetically sorted
+    for original, names in sorted(originals.items(), key=lambda x: str.casefold(x[0])):
+        inspirations += '## {} ({})\n\n'.format(original, len(names))
+        inspirations += '- Inspired entries: {}\n\n'.format(', '.join(sorted(names, key=str.casefold)))
+
+    # write to statistics file
+    utils.write_text(inspirations_file, inspirations)
+
+
+def update_developer(infos):
+    """
+
+    """
+
+    print('update developer')
+
+    developer_file = os.path.join(c.root_path, 'developer.md')
+    content = '[comment]: # (partly autogenerated content, edit with care, read the manual before)\n# Developer\n\n'
+
+    # collect information
+    developer = {}
+    for info in infos:
+        if 'developer' in info:
+            name = info['name']
+            devs = info['developer']
+            for dev in devs:
+                if dev in developer:
+                    developer[dev].append(name)
+                else:
+                    developer[dev] = [name]
+
+    # number of developers
+    content += '{} listed developer(s).\n\n'.format(len(developer))
+
+    # iterate through developers alphabetically sorted
+    for dev, names in sorted(developer.items(), key=lambda x: str.casefold(x[0])):
+        content += '## {} ({})\n\n'.format(dev, len(names))
+        content += '- Games: {}\n\n'.format(', '.join(sorted(names, key=str.casefold)))
+
+    # write to statistics file
+    utils.write_text(developer_file, content)
+
+
+def check_code_dependencies(infos):
+    """
+
+    """
+
+    # get all names
+    names = [x['name'] for x in infos]
+
+    # TODO get all names of frameworks and libraries only and use osg.code_dependencies_aliases
+
+    # get all code dependencies
+    dependencies = {}
+    for info in infos:
+        deps = info.get('code dependencies', [])
+        for dependency in deps:
+            if dependency in dependencies:
+                dependencies[dependency] += 1
+            else:
+                dependencies[dependency] = 1
+
+    # delete those that are in names
+    dependencies = [(k, v) for k,v in dependencies.items() if k not in names and k not in osg.code_dependencies_without_entry]
+
+    # sort by number
+    dependencies.sort(key=lambda x: x[1], reverse=True)
+
+    # print out
+    print('Code dependencies not included as entry')
+    for dep in dependencies:
+        print('{} ({})'.format(*dep))
+
+
+if __name__ == "__main__":
+
+    # check_validity_backlog()
+
+    # backlog
+    game_urls = osg.extract_links()
+    text = utils.read_text(os.path.join(c.root_path, 'tools', 'rejected.txt'))
+    regex = re.compile(r"\((http.*?)\)", re.MULTILINE)
+    matches = regex.findall(text)
+    rejected_urls = []
+    for match in matches:
+        urls = match.split(',')
+        urls = [x.strip() for x in urls]
+        rejected_urls.extend(urls)
+    game_urls.extend(rejected_urls)
+    more_urls = []
+    for url in game_urls:
+        if url.startswith('https://web.archive.org/web'):
+            # print(url) # sometimes the http is missing in archive links (would need proper parsing)
+            url = url[url.index('http', 5):]
+            more_urls.append(url)
+    game_urls.extend(more_urls)
+    stripped_game_urls = [utils.strip_url(x) for x in game_urls]
+    clean_backlog(stripped_game_urls)
+
+    # check for unfilled template lines
+    check_template_leftovers()
+
+    # fix entries
+    fix_entries()
+
+    # assemble info
+    infos = osg.assemble_infos()
+
+    # recount and write to readme and to tocs
+    update_readme_and_tocs(infos)
+
+    # generate report
+    update_statistics(infos)
+
+    # update inspirations
+    update_inspirations(infos)
+
+    # update developers
+    update_developer(infos)
+
+    # update database for html table
+    export_json(infos)
+
+    # collect list of primary code repositories
+    export_primary_code_repositories_json(infos)
+
+    # check code dependencies
+    check_code_dependencies(infos)
+
+    # collect list of git code repositories (only one per project) for git_statistics script
+    # export_git_code_repositories_json()
+
+    # check external links (only rarely)
+    # check_validity_external_links()
+
+    # sort backlog and rejected
+    # sort_text_file(os.path.join(c.root_path, 'tools', 'backlog.txt'), 'backlog')
+    sort_text_file(os.path.join(c.root_path, 'tools', 'rejected.txt'), 'rejected games list')
--- a/code/osgameclones_synchronization.py
+++ b/code/osgameclones_synchronization.py
@ -0,0 +1,493 @@
+"""
+
+osgameclones has the following fields:
+'updated', 'video', 'repo', 'license', 'originals', 'status', 'multiplayer', 'info', 'lang', 'feed', 'content', 'images', 'url', 'name', 'framework', 'type', 'development'
+
+mandatory fields are: 'name', 'license', 'type', 'originals'
+
+possible values:
+osgc-development: active(337), complete(32), halted(330), sporadic(129), very active(6)
+osgc-multiplayer: Co-op(5), Competitive(13), Hotseat(3), LAN(17), Local(3), Matchmaking(1), Online(33), Split-screen(7)
+osgc-type: clone(171), remake(684), similar(11), tool(7)
+osgc-status: playable(274), semi-playable(34), unplayable(34)
+osgc-license: ['AFL3', 'AGPL3', 'Apache', 'Artistic', 'As-is', 'BSD', 'BSD2', 'BSD4', 'bzip2', 'CC-BY', 'CC-BY-NC', 'CC-BY-NC-ND', 'CC-BY-NC-SA', 'CC-BY-SA', 'CC0', 'Custom', 'GPL2', 'GPL3', 'IJG', 'ISC', 'JRL', 'LGPL2', 'LGPL3', 'Libpng', 'MAME', 'MIT', 'MPL', 'MS-PL', 'Multiple', 'NGPL', 'PD', 'WTFPL', 'Zlib']
+osgc-content: commercial(104), free(32), open(61), swappable(5)
+
+Mapping osgameclones -> ours
+
+name -> name
+type -> keywords, description
+originals -> keywords
+repo -> code repository
+url -> home
+feed (-> home)
+development -> state
+status -> state
+multiplayer -> keywords
+lang -> code language
+framework -> code dependencies
+license -> code license / assets license
+content -> keywords
+info -> after fields
+updated not used
+images not used
+video: not used
+
+TODO also ignore our rejected entries
+"""
+
+import ruamel_yaml as yaml
+import os
+from utils import constants, utils, osg
+
+# should change on osgameclones
+osgc_name_aliases = {'4DTris': '4D-TRIS', 'fheroes2': 'Free Heroes 2', 'DrCreep': 'The Castles of Dr. Creep', 'Duke3d_win32': 'Duke3d_w32', 'erampage (EDuke32 fork)': 'erampage', 'GNOME Atomix': 'Atomix', 'Head over Heels 2': 'Head over Heels',
+                     'mewl': 'M.E.W.L.', 'LinWarrior': 'Linwarrior 3D', 'Mice Men Remix': 'Mice Men: Remix', 'OpenApoc': 'Open Apocalypse', 'open-cube': 'Open Cube', 'open-horizon': 'Open Horizon', 'opengl_test_drive_clone': 'OpenGL Test Drive Remake',
+                     'Play Freeciv!': 'Freeciv-web', 'ProjectX': 'Forsaken', 'Siege of Avalon Open Source': 'Siege of Avalon : Open Source', 'ss13remake': 'SS13 Remake', 'shadowgrounds': 'Shadowgrounds', 'RxWars': 'Prescription Wars', 'Super Mario Bros And Level Editor in C#': 'Mario Objects',
+                     'tetris': 'Just another Tetris™ clone', 'twin-e': 'TwinEngine', 'CrossUO: Ultima Online': 'CrossUO', 'Doomsday': 'Doomsday Engine', 'OpMon': 'OPMon'}
+
+# conversion between licenses syntax them and us
+osgc_licenses_map = {'GPL2': 'GPL-2.0', 'GPL3': 'GPL-3.0', 'AGPL3': 'AGPL-3.0', 'LGPL3': 'LGPL-3.0', 'LGPL2': 'LGPL-2.0 or 2.1?', 'MPL': 'MPL-2.0', 'Apache': 'Apache-2.0', 'Artistic': 'Artistic License', 'Zlib': 'zlib', 'PD': 'Public domain', 'AFL3': 'AFL-3.0', 'BSD2': '2-clause BSD'}
+
+# ignore osgc entries (for various reasons like unclear license etc.)
+osgc_ignored_entries = ["A Mouse's Vengeance", 'achtungkurve.com', 'AdaDoom3', 'Agendaroids', 'Alien 8', 'Ard-Reil', 'Balloon Fight', 'bladerunner (Engine within SCUMMVM)', 'Block Shooter', 'Bomb Mania Reloaded', 'boulder-dash', 'Cannon Fodder', 'Contra_remake', 'CosmicArk-Advanced', 'Deuteros X', 'datastorm'
+                        , 'div-columns', 'div-pacman2600', 'div-pitfall', 'div-spaceinvaders2600', 'EXILE', 'Free in the Dark', 'Football Manager', 'Fight Or Perish', 'EarthShakerDS', 'Entombed!', 'FreeRails 2', 'Glest Advanced Engine', 'FreedroidClassic', 'FreeFT', 'Future Blocks', 'HeadOverHeels'
+                        , 'Herzog 3D', 'Homeworld SDL', 'imperialism-remake', 'Jumping Jack 2: Worryingly Familiar', 'Jumping Jack: Further Adventures', 'Jumpman', 'legion', 'KZap', 'LastNinja', 'Lemmix', 'LixD', 'luminesk5', 'Manic Miner', 'Meridian 59 Server 105', 'Meridian 59 German Server 112', 'Mining Haze'
+                        , 'OpenGeneral', 'MonoStrategy', 'New RAW', 'OpenDeathValley', 'OpenOutcast', 'openStrato', 'OpenPop', 'pacman', 'Phavon', 'PKMN-FX', 'Project: Xenocide', 'pyspaceinvaders', 'PyTouhou', 'Racer', 'Ruby OMF 2097 Remake', 'Snipes', 'Spaceship Duel', 'Space Station 14', 'Starlane Empire'
+                        , 'Styx', 'Super Mario Bros With SFML in C#', 'thromolusng', 'Tile World 2', 'Tranzam', 'Voxelstein 3D', 'XQuest 2', 'xrick', 'zedragon', 'Uncharted waters 2 remake', 'Desktop Adventures Engine for ScummVM', 'Open Sonic', 'Aladdin_DirectX', 'Alive_Reversing']
+
+def unique_field_contents(entries, field):
+    """
+    """
+    unique_content = set()
+    for entry in entries:
+        if field in entry:
+            field_content = entry[field]
+            if type(field_content) is list:
+                unique_content.update(field_content)
+            else:
+                unique_content.add(field_content)
+    unique_content = sorted(list(unique_content), key=str.casefold)
+    return unique_content
+
+
+def compare_sets(a, b, name, limit=None):
+    """
+
+    :param a:
+    :param b:
+    :param name:
+    :return:
+    """
+    p = ''
+    if not isinstance(a, set):
+        a = set(a)
+    if not isinstance(b, set):
+        b = set(b)
+    d = sorted(list(a - b))
+    if d and limit != 'notus':
+        p += ' {} : us :  {}\n'.format(name, ', '.join(d))
+    d = sorted(list(b - a))
+    if d and limit != 'notthem':
+        p += ' {} : them : {}\n'.format(name, ', '.join(d))
+    return p
+
+
+if __name__ == "__main__":
+
+    # some parameter
+    similarity_threshold = 0.8
+    maximal_newly_created_entries = 40
+
+    # paths
+    root_path  = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
+
+    # import the osgameclones data
+    osgc_path = os.path.realpath(os.path.join(root_path, os.path.pardir, '11_osgameclones.git', 'games'))
+    osgc_files = os.listdir(osgc_path)
+
+    # iterate over all yaml files in osgameclones/data folder and load contents
+    osgc_entries = []
+    for file in osgc_files:
+        # read yaml
+        with open(os.path.join(osgc_path, file), 'r', encoding='utf-8') as stream:
+            try:
+                _ = yaml.safe_load(stream)
+            except Exception as exc:
+                print(file)
+                raise exc
+
+        # add to entries
+        osgc_entries.extend(_)
+    print('Currently {} entries in osgameclones'.format(len(osgc_entries)))
+
+    # check: print all git repos with untypical structure
+    for osgc_entry in osgc_entries:
+        name = osgc_entry['name']
+        if 'repo' in osgc_entry:
+            osgc_repos = osgc_entry['repo']
+            if isinstance(osgc_repos, str):
+                osgc_repos = [osgc_repos]
+            for repo in osgc_repos:
+                if 'github' in repo and any((repo.endswith(x) for x in ('/', '.git'))):
+                    print('{} : {}'.format(osgc_entry['name'], repo))
+
+    # which fields do they have
+    osgc_fields = set()
+    for osgc_entry in osgc_entries:
+        osgc_fields.update(osgc_entry.keys())
+    osgc_fields = sorted(list(osgc_fields))
+    print('Unique osgc-fields: {}'.format(', '.join(osgc_fields)))
+
+    for field in osgc_fields:
+        if field in ('video', 'feed', 'url', 'repo', 'info', 'updated', 'images', 'name', 'originals'):
+            continue
+        osgc_content = [entry[field] for entry in osgc_entries if field in entry]
+        # flatten
+        flat_content = []
+        for c in osgc_content:
+            if isinstance(c, list):
+                flat_content.extend(c)
+            else:
+                flat_content.append(c)
+        statistics = utils.unique_elements_and_occurrences(flat_content)
+        statistics.sort(key=str.casefold)
+        print('{}: {}'.format(field, ', '.join(statistics)))
+
+    # eliminate the ignored entries
+    _ = [x['name'] for x in osgc_entries if x['name'] in osgc_ignored_entries] # those that will be ignored
+    _ = set(osgc_ignored_entries) - set(_) # those that shall be ignored minus those that will be ignored
+    if _:
+        print('Can un-ignore {}'.format(_))
+    osgc_entries = [x for x in osgc_entries if x['name'] not in osgc_ignored_entries]
+
+    # fix names and licenses (so they are not longer detected as deviations downstreams)
+    _ = [x['name'] for x in osgc_entries if x['name'] in osgc_name_aliases.keys()] # those that will be renamed
+    _ = set(osgc_name_aliases.keys()) - set(_) # those that shall be renamed minus those that will be renamed
+    if _:
+        print('Can un-rename {}'.format(_))
+    for index, entry in enumerate(osgc_entries):
+        name = entry['name']
+        if name in osgc_name_aliases:
+            entry['name'] = osgc_name_aliases[name]
+        if 'license' in entry:
+            osgc_licenses = entry['license']
+            osgc_licenses = [osgc_licenses_map.get(x, x) for x in osgc_licenses]
+            entry['license'] = osgc_licenses
+        # fix content (add suffix content
+        if 'content' in entry:
+            osgc_content = entry['content']
+            if isinstance(osgc_content, str):
+                osgc_content = [osgc_content]
+            osgc_content = [x + ' content' for x in osgc_content]
+            entry['content'] = osgc_content
+        osgc_entries[index] = entry # TODO is this necessary or is the entry modified anyway?
+
+    # which fields do they have
+    osgc_fields = set()
+    for osgc_entry in osgc_entries:
+        osgc_fields.update(osgc_entry.keys())
+    print('unique osgc-fields: {}'.format(osgc_fields))
+
+    # which fields are mandatory
+    for osgc_entry in osgc_entries:
+        remove_fields = [field for field in osgc_fields if field not in osgc_entry]
+        osgc_fields -= set(remove_fields)
+    print('mandatory osfg-fields: {}'.format(osgc_fields))
+
+    # some field statistics
+    print('osgc-development: {}'.format(unique_field_contents(osgc_entries, 'development')))
+    print('osgc-multiplayer: {}'.format(unique_field_contents(osgc_entries, 'multiplayer')))
+    print('osgc-type: {}'.format(unique_field_contents(osgc_entries, 'type')))
+    print('osgc-languages: {}'.format(unique_field_contents(osgc_entries, 'lang')))
+    print('osgc-licenses: {}'.format(unique_field_contents(osgc_entries, 'license')))
+    print('osgc-status: {}'.format(unique_field_contents(osgc_entries, 'status')))
+    print('osgc-framework: {}'.format(unique_field_contents(osgc_entries, 'framework')))
+    print('osgc-content: {}'.format(unique_field_contents(osgc_entries, 'content')))
+
+    # read our database
+    our_entries = osg.assemble_infos()
+    print('{} entries with us'.format(len(our_entries)))
+
+    # just the names
+    osgc_names = set([x['name'] for x in osgc_entries])
+    our_names = set([x['name'] for x in our_entries])
+    common_names = osgc_names & our_names
+    osgc_names -= common_names
+    our_names -= common_names
+    print('{} in both, {} only in osgameclones, {} only with us'.format(len(common_names), len(osgc_names), len(our_names)))
+
+    # find similar names among the rest
+    #print('look for similar names')
+    #for osgc_name in osgc_names:
+    #    for our_name in our_names:
+    #        if osg.game_name_similarity(osgc_name, our_name) > similarity_threshold:
+    #            print(' {} - {}'.format(osgc_name, our_name))
+
+    newly_created_entries = 0
+    # iterate over their entries
+    for osgc_entry in osgc_entries:
+        osgc_name = osgc_entry['name']
+
+        is_included = False
+        for our_entry in our_entries:
+            our_name = our_entry['name']
+
+            # find those that entries in osgameclones that are also in our database and compare them
+            if osgc_name == our_name:
+                is_included = True
+                # a match, check the fields
+                name = osgc_name
+
+                p = ''
+
+                # compare their lang with our code language
+                if 'lang' in osgc_entry:
+                    osgc_languages = osgc_entry['lang']
+                    if type(osgc_languages) == str:
+                        osgc_languages = [osgc_languages]
+                    our_languages = our_entry['code language'] # essential field
+                    p += compare_sets(osgc_languages, our_languages, 'code language')
+
+                # compare their license with our code and assets license
+                if 'license' in osgc_entry:
+                    osgc_licenses = osgc_entry['license']
+                    our_code_licenses = our_entry['code license'] # essential field
+                    our_assets_licenses = our_entry.get('assets license', [])
+                    p += compare_sets(osgc_licenses, our_code_licenses + our_assets_licenses, 'licenses', 'notthem')
+                    p += compare_sets(osgc_licenses, our_code_licenses, 'licenses', 'notus')
+
+                # compare their framework with our code dependencies (capitalization is ignored for now, only starts are compared)
+                our_framework_replacements = {'allegro4': 'allegro'}
+                if 'framework' in osgc_entry:
+                    osgc_frameworks = osgc_entry['framework']
+                    if type(osgc_frameworks) == str:
+                        osgc_frameworks = [osgc_frameworks]
+                    our_frameworks = our_entry.get('code dependencies', [])
+                    our_frameworks = [x.casefold() for x in our_frameworks]
+                    our_frameworks = [x if x not in our_framework_replacements else our_framework_replacements[x] for x in our_frameworks]
+                    osgc_frameworks = [x.casefold() for x in osgc_frameworks]
+                    p += compare_sets(osgc_frameworks, our_frameworks, 'framework/dependencies')
+
+                # compare their repo with our code repository and download
+                if 'repo' in osgc_entry:
+                    osgc_repos = osgc_entry['repo']
+                    if type(osgc_repos) == str:
+                        osgc_repos = [osgc_repos]
+                    osgc_repos = [utils.strip_url(url) for url in osgc_repos]
+                    osgc_repos = [x for x in osgc_repos if not x.startswith('sourceforge.net/projects/')] # we don't need the general sites there
+                    # osgc_repos = [x for x in osgc_repos if not x.startswith('https://sourceforge.net/projects/')] # ignore some
+                    our_repos = our_entry.get('code repository', [])
+                    our_repos = [utils.strip_url(url) for url in our_repos]
+                    our_repos = [x for x in our_repos if not x.startswith('gitlab.com/osgames/')] # we do not yet spread our own deeds (but we will some day)
+                    our_repos = [x for x in our_repos if not 'cvs.sourceforge.net' in x and not 'svn.code.sf.net/p/' in x]  # no cvs or svn anymore
+                    our_downloads = our_entry.get('download', [])
+                    our_downloads = [utils.strip_url(url) for url in our_downloads]
+                    p += compare_sets(osgc_repos, our_repos + our_downloads, 'repo', 'notthem') # if their repos are not in our downloads or repos
+                    p += compare_sets(osgc_repos, our_repos[:1], 'repo', 'notus') # if our main repo is not in their repo
+
+                # compare their url (and feed) to our home (and strip urls)
+                if 'url' in osgc_entry:
+                    osgc_urls = osgc_entry['url']
+                    if type(osgc_urls) == str:
+                        osgc_urls = [osgc_urls]
+                    osgc_urls = [utils.strip_url(url) for url in osgc_urls]
+                    our_urls = our_entry['home']
+                    our_urls = [utils.strip_url(url) for url in our_urls]
+                    our_urls = [url for url in our_urls if not url.startswith('github.com/')] # they don't have them as url
+                    p += compare_sets(osgc_urls, our_urls, 'url/home', 'notthem') # if their urls are not in our urls
+                    p += compare_sets(osgc_urls, our_urls[:1], 'url/home', 'notus') # if our first url is not in their urls
+
+                # compare their status with our state (playable can be beta/mature with us, but not playable must be beta)
+                if 'status' in osgc_entry:
+                    osgc_status = osgc_entry['status']
+                    our_status = our_entry['state'] # essential field
+                    if osgc_status != 'playable' and 'mature' in our_status:
+                        p += ' status : mismatch : them {}, us mature\n'.format(osgc_status)
+
+                # compare their development with our state
+                if 'development' in osgc_entry:
+                    osgc_development = osgc_entry['development']
+                    our_inactive = 'inactive' in our_entry
+                    our_status = our_entry['state']  # essential field
+                    if osgc_development == 'halted' and not our_inactive:
+                        p += ' development : mismatch : them halted - us not inactive\n'
+                    if osgc_development in ['very active', 'active'] and our_inactive:
+                        p += ' development : mismatch : them {}, us inactive\n'.format(osgc_development)
+                    if osgc_development == 'complete' and 'mature' not in our_status:
+                        p += ' development : mismatch : them complete, us not mature\n'
+
+                # compare their originals to our keywords (inspired by)
+                our_keywords = our_entry['keywords']
+                if 'originals' in osgc_entry:
+                    osgc_originals = osgc_entry['originals']
+                    osgc_originals = [x.replace(',', '') for x in osgc_originals] # we cannot have ',' or parts in parentheses in original names
+                    our_originals = [x for x in our_keywords if x.startswith('inspired by ')]
+                    if our_originals:
+                        assert len(our_originals) == 1, '{}: {}'.format(our_name, our_originals)
+                        our_originals = our_originals[0][11:].split('+')
+                        our_originals = [x.strip() for x in our_originals]
+                    our_originals = [x for x in our_originals if x not in ['Doom II']] # ignore same
+                    p += compare_sets(osgc_originals, our_originals, 'originals')
+
+                # compare their multiplayer with our keywords (multiplayer) (only lowercase comparison)
+                if 'multiplayer' in osgc_entry:
+                    osgc_multiplayer = osgc_entry['multiplayer']
+                    if type(osgc_multiplayer) == str:
+                        osgc_multiplayer = [osgc_multiplayer]
+                    osgc_multiplayer = [x.casefold() for x in osgc_multiplayer]
+                    osgc_multiplayer = [x for x in osgc_multiplayer if x not in ['competitive']] # ignored
+                    our_multiplayer = [x for x in our_keywords if x.startswith('multiplayer ')]
+                    if our_multiplayer:
+                        assert len(our_multiplayer) == 1
+                        our_multiplayer = our_multiplayer[0][11:].split('+')
+                        our_multiplayer = [x.strip().casefold() for x in our_multiplayer]
+                    p += compare_sets(osgc_multiplayer, our_multiplayer, 'multiplayer')
+
+                # compare content with keywords
+                if 'content' in osgc_entry:
+                    osgc_content = osgc_entry['content']
+                    if isinstance(osgc_content, str):
+                        osgc_content = [osgc_content]
+                    p += compare_sets(osgc_content, our_keywords, 'content/keywords', 'notthem') # only to us because we have more then them
+
+                # compare their type to our keywords
+                if 'type' in osgc_entry:
+                    game_type = osgc_entry['type']
+                    if isinstance(game_type, str):
+                        game_type = [game_type]
+                    p += compare_sets(game_type, our_keywords, 'type/keywords', 'notthem') # only to us because we have more then them
+
+                if p:
+                    print('{}\n{}'.format(name, p))
+
+        if not is_included:
+            # a new entry, that we have never seen, maybe we should make an entry of our own
+            # continue
+
+            if newly_created_entries >= maximal_newly_created_entries:
+                continue
+
+            game_type = osgc_entry.get('type', None)
+            osgc_status = osgc_entry.get('status', None)
+
+            # we sort some out here (maybe we want to have a closer look at them later)
+            if osgc_status == 'unplayable':
+                # for now not the unplayable ones
+                continue
+            if 'license' not in osgc_entry or 'As-is' in osgc_entry['license']:
+                # for now not the ones without license or with as-is license
+                continue
+
+            # determine file name
+            print('create new entry for {}'.format(osgc_name))
+            file_name = osg.canonical_entry_name(osgc_name) + '.md'
+            target_file = os.path.join(constants.entries_path, file_name)
+            if os.path.isfile(target_file):
+                print('warning: file {} already existing, save under slightly different name'.format(file_name))
+                target_file = os.path.join(constants.entries_path, file_name[:-3] + '-duplicate.md')
+                if os.path.isfile(target_file):
+                    continue # just for safety reasons
+
+            # add name
+            entry = '# {}\n\n'.format(osgc_name)
+
+            # add description
+            description = '{} of {}.'.format(game_type.capitalize(), ', '.join(osgc_entry['originals']))
+            entry += '_{}_\n\n'.format(description)
+
+            # home
+            home = osgc_entry.get('url', None)
+            entry += '- Home: {}\n'.format(home)
+
+            # state
+            entry += '- State: {}'.format(osgc_status)
+            if 'development' in osgc_entry:
+                if osgc_entry['development'] == 'halted':
+                    entry += ', inactive since XX'
+            entry += '\n'
+
+            # language tags
+            lang = osgc_entry.get('lang', [])
+            if type(lang) == str:
+                lang = [lang]
+            # platform 'Web' if language == JavaScript or TypeScript
+            if len(lang) == 1 and lang[0] in ('JavaScript', 'TypeScript'):
+                entry += '- Platform: Web\n'
+
+            # keywords
+            keywords = []
+            if game_type:
+                keywords.append(game_type)
+            if 'originals' in osgc_entry:
+                osgc_originals = osgc_entry['originals']
+                if type(osgc_originals) == str:
+                    osgc_originals = [osgc_originals]
+                keywords.append('inspired by {}'.format(' + '.join(osgc_originals)))
+            if 'multiplayer' in osgc_entry:
+                osgc_multiplayer = osgc_entry['multiplayer']
+                if type(osgc_multiplayer) == str:
+                    osgc_multiplayer = [osgc_multiplayer]
+                keywords.append('multiplayer {}'.format(' + '.join(osgc_multiplayer)))
+            if 'content' in osgc_entry:
+                osgc_content = osgc_entry['content']
+                keywords.append('{} content'.format(osgc_content))
+            if keywords:
+                entry += '- Keywords: {}\n'.format(', '.join(keywords))
+
+            # code repository (mandatory on our side)
+            repo = osgc_entry.get('repo', None)
+            if repo and repo.startswith('https://git') and not repo.endswith('.git'):
+                # we have them with .git on github/gitlab
+                repo += '.git'
+            entry += '- Code repository: {}\n'.format(repo)
+
+            # code language (mandatory on our side)
+            entry += '- Code language: {}\n'.format(', '.join(lang))
+
+            # code license
+            entry += '- Code license: {}\n'.format(', '.join(osgc_entry['license']))
+
+            # code dependencies (if existing)
+            if 'framework' in osgc_entry:
+                osgc_frameworks = osgc_entry['framework']
+                if type(osgc_frameworks) == str:
+                    osgc_frameworks = [osgc_frameworks]
+                entry += '- Code dependencies: {}\n'.format(', '.join(osgc_frameworks))
+
+            # write info (if existing)
+            if 'info' in osgc_entry:
+                entry += '\n{}\n'.format(osgc_entry['info'])
+
+            # write ## Building
+            entry += '\n## Building\n'
+
+            # finally write to file
+            utils.write_text(target_file, entry)
+            newly_created_entries += 1
+
+    # now iterate over our entries and test if we can add anything to them
+    print('entry that could be added to them')
+    for our_entry in our_entries:
+        our_name = our_entry['name']
+
+        # only if contains a keyword starting with "inspired by" and not "tools", "framework" or "library"
+        our_keywords = our_entry['keywords']
+        if not any([x.startswith('inspired by ') for x in our_keywords]):
+            continue
+        if any([x in ['tool', 'library', 'framework'] for x in our_keywords]):
+            continue
+
+        is_included = False
+        for osgc_entry in osgc_entries:
+            osgc_name = osgc_entry['name']
+
+            if osgc_name == our_name:
+                is_included = True
+
+        if not is_included:
+            # that could be added to them
+            print('- [{}]({})'.format(our_name, 'https://github.com/Trilarion/opensourcegames/blob/master/entries/' + our_entry['file']))
+
+
+
--- a/code/rejected.txt
+++ b/code/rejected.txt
@ -0,0 +1,168 @@
+1000 AD (https://sourceforge.net/projects/ad1000/, http://game.1000ad.net/): No license information
+8 Kingdoms (https://sourceforge.net/projects/kralovstvi/, https://svn.code.sf.net/p/kralovstvi/code/): No license found
+Adventure Game Biege (https://sourceforge.net/projects/biege/): No license information next to source code, no contact possibility (https://github.com/Trilarion/opensourcegames/issues/61)
+Aeron (https://sourceforge.net/projects/aeron/, https://svn.code.sf.net/p/aeron/code/): Very early development
+Age of Magic (https://sourceforge.net/projects/ageofmagic/, https://svn.code.sf.net/p/ageofmagic/code/): Only supports outdated Hardware (Symbian)
+Aladdin_DirectX (https://github.com/phanletrunghieu/Aladdin_DirectX.git): Very early development, Inactive, no license information
+Alive_Reversing (https://github.com/AliveTeam/alive_reversing.git): No license information, reverse engineered, likely not licensable ever
+Altera Terra (https://sourceforge.net/projects/alteraterra/, https://svn.code.sf.net/p/alteraterra/code/): Early development stage, no clear license information
+AlterCiv (https://sourceforge.net/projects/alterciv/): Very early development
+android_snake_game (https://github.com/codepath/android_snake_game): No license information found
+Antigravitaattori (http://www.luolamies.org/software/antigrav/, https://packages.debian.org/stable/games/antigravitaattori): Very early development
+Apricot JS (http://apricot-js.tumblr.com/): Difficult to find the source code, no license information, may not be used in games
+Arena of Honour (https://sourceforge.net/projects/aoh/): No clear license with the sources, no license of the assets, early development (alpha), abandoned
+Arkhart (http://gna.org/projects/arkhart/, http://arkhart.nekeme.net/): Sources lost (gna.org)
+Arthur's Knights (http://ainenn.org/KingArthur/, https://sourceforge.net/projects/king-arthur/): Unclear license, author could not be contacted
+Atral Clades (https://sourceforge.net/projects/astralclades/, https://svn.code.sf.net/p/astralclades/code/): No clear license information
+Balazar III (https://web.archive.org/web/20180521213818/http://www.lesfleursdunormal.fr/static/informatique/old/balazar_iii/index_en.html, http://gna.org/svn/?group=balazar): Early development, sources lost (gna.org) - a pity, was fine otherwise
+Balder3D (): Very early development, Balder2D instead
+Barbie Seahorse Adventures (http://www.imitationpickles.org/barbie/, https://pyweek.org/e/toba4/): Source is all rights reserved (source in svn has license allegedly, but it lost)
+BattleChess (https://sourceforge.net/projects/battlechess/): No source code found
+Begin2 (https://sourceforge.net/projects/begin2/, https://svn.code.sf.net/p/begin2/code/): No source available
+Berserk (https://github.com/EgorOrachyov/Berserk): No license information found
+biju-game (https://github.com/fdfragoso/biju-game, http://crocidb.com/labs/biju-game/, https://github.com/CrociDB/biju-game): Very early development, No license added
+Brickshooter (http://bilious.homelinux.org/~paxed/brickshooter/): Homepage offline, sources not archived on the web archive, version very low 0.0.4 (might be too early)
+Bweakfwu (https://github.com/plaimi/bweakfwu, https://secure.plaimi.net/games/bweakfwu): Very early development stage, abandoned
+Camelia Girls (https://web.archive.org/web/20140523011334/http://pigux.com:80/cameliagirls/): Sources not available
+Cheese Boys (http://sourceforge.net/projects/cheeseboys/, http://cheeseboys.keul.it/, https://github.com/keul/Cheese-Boys, http://www.pygame.org/project-Cheese+Boys-692-.html): Very early development, Abandoned, License unclear
+Clippers (http://sourceforge.net/projects/clippers/): No license attached to source code (will contact)
+Codewars (http://sourceforge.net/projects/codewars): Very early development phase (especially client)
+Complete Roll Playing Game Engine (https://sourceforge.net/projects/crpge/): Very early development, long abandoned
+CRAFT: The Vicious Vikings (http://web.archive.org/web/20030402062302/borneo.gmd.de/AS/janus/craft/): Source code not available anymore
+Crowns and Shadows (https://sourceforge.net/projects/crownsandshadow/): No license information, no contact possibility (see also https://github.com/Trilarion/opensourcegames/issues/14)
+Crystal Picnic (https://github.com/Cloudxtreme/crystal-picnic, https://nooskewl.ca/crystal-picnic/): No license of their source code found (or is it zlib?, check again), is commercialized
+Cycle (https://sourceforge.net/projects/meegocycle/): Proprietary license (freeware, source available)
+dart-rpg (https://github.com/brad811/dart-rpg): No license information found
+Desktop Adventures Engine for ScummVM (https://github.com/digitall/scummvm-deskadv.git): GPL-2.0 but early development and inactive since long time
+DeuterosX (https://sourceforge.net/projects/deuterosx/): No license information found, no contact with the author (https://github.com/Trilarion/opensourcegames/issues/25), long abandoned
+Digger (https://github.com/lutzroeder/digger): No license information
+DQM (http://idusale.tk): Homepage redirects to unrelated site
+Drop Shock (http://www.tinywarz.com/index.php, https://sourceforge.net/projects/dropshock/, https://git.code.sf.net/p/dropshock/code): No license found
+EarthShakerDS (http://simianzombie.com/posts/2011/05/10/earth-shaker-ds-released, https://github.com/ant512/EarthShakerDS.git): Target only Nintendo DS as hardware platform
+EconLegacy (https://sourceforge.net/projects/econlegacy/): Very early development
+Eleconics (https://sourceforge.net/projects/eleconics/): No license found
+Element Games Engine (https://github.com/fredwen2008/Element-Games-Engine): No license information
+EmMines (http://eaglemode.sourceforge.net/): Part of a too large application (eaglemode). Could not access Source code. Maybe later.
+Entombed! (http://www.newbreedsoftware.com/entombed/, ftp://ftp.tuxpaint.org/unix/x/entombed/entombed-2007.07.04.tar.gz): Game very small
+Eskimo-run (https://sourceforge.net/projects/eskimo-run): Very early development. Inactive for many years.
+EXILE (http://forums.datarealms.com/viewtopic.php?f=82&t=19266, https://github.com/Geti/EXILE.git): Early development stage
+Feuerkraft (http://www.nongnu.org/feuerkraft, http://savannah.nongnu.org/projects/feuerkraft/): Very early development. Inactive since a long time.
+Fight or Perish (http://www.newbreedsoftware.com/fop/): Very early development. Inactive since a long time.
+Fight Or Perish (http://www.newbreedsoftware.com/fop/, ftp://ftp.tuxpaint.org/unix/x/fop/src/fop-0.5.tar.gz): Early development stage
+Flatland (http://idusale.tk/juegos/Flatland.html): Not available anymore
+fleet0ps (https://sourceforge.net/projects/fleet0ps/): Very early development
+Football Manager (https://github.com/autismuk/Football-Manager.git): Very early development stage
+Forest patrol (http://blazeofglory.org/projects/fp, https://www.pygame.org/project/391): No available anymore, Internet Archive has not stored the source links
+Free in the Dark (https://sourceforge.net/projects/fitd/): No source code available
+Free Reign (http://sourceforge.net/projects/freereign/, http://freereign.sourceforge.net/index.shtml): Very early development, Inactive since long time
+FreeFT (https://github.com/nadult/FreeFT.git): All rights reserved.
+Frogatto & Friends (https://github.com/frogatto/frogatto, https://frogatto.com/): Only artwork and configuration files for the Anura engine and this is copyrighted
+Fruit War (http://fruitwar.sourceforge.net/, https://sourceforge.net/projects/fruitwar/): Very early development, long abandoned
+Future Blocks (https://github.com/fogleman/FutureBlocks.git): Needs DosBox, simple Tetris clone (nice, but maybe too minimal)
+GalaxyMage (https://www.galaxymage.org/, http://gna.org/projects/tactics): No source code available anymore, not on IA nor on gna project site.
+GNOME Games (): Has been separated into individual packages. Will add them separately.
+Grow (https://sourceforge.net/projects/grow/): Insufficient content
+GRUB Invaders (http://www.erikyyy.de/invaders/): Requires use of the GRUB bootloader (also not really original)
+Heroes: Rebirth (
+Herzog 3D (http://herzog3d.sourceforge.net/, https://sourceforge.net/p/herzog3d/code/HEAD/tree/): Very early development
+HistoryLine (https://github.com/oliverdb/Historyline): Very early development, no license information
+Homeworld SDL (https://github.com/aheadley/homeworld): Not open source compliant license (see https://github.com/aheadley/homeworld/blob/master/README)
+Howitzer Skirmish (http://howski.sourceforge.net/): No source code
+Ikariam (https://github.com/advocaite/ikariam, https://github.com/TheOnly92/Ikariem): No license information found, no assets license information found
+imperialism-remake (http://remake.twelvepm.de/, https://github.com/Trilarion/imperialism-Remake): Too minimal (I must know)
+Imperium: Sticks (http://rtciv.sourceforge.net/, http://sourceforge.net/projects/rtciv): No source code available
+ImperiumAO (https://sourceforge.net/projects/impao, https://www.imperiumao.com.ar/): Only engine is open source, engine is ORE
+Inline-Engine (https://github.com/petiaccja/Inline-Engine): Commercial license
+Interstate Outlaws (http://www.interstateoutlaws.com/): Very early development, No clear indication of license with source code, Abandoned since long time
+Ironclad (https://github.com/mikera/ironclad): No license information, asked but did not get a reply
+Java Fabled Lands (http://flapp.sourceforge.net/, https://sourceforge.net/projects/flapp/): No license information available, content might not be free (see https://github.com/Trilarion/opensourcegames/issues/57)
+Java Isometric Engine (https://sourceforge.net/projects/jisoman/): Early development, no license information with source, long abandoned
+Java Settlers of Catan (https://sourceforge.net/projects/javasettlers/): No license information, very early development
+JBI - Java Battle Isle (https://sourceforge.net/projects/jbiclone/): Very early development (a pity)
+Jumping Jack 2 Worryingly Familiar (http://retrospec.sgn.net/game/jj2, http://retrospec.sgn.net/users/nwalker/jack/jjwf.zip): No compliant license found
+Jumping Jack Further Adventures (http://retrospec.sgn.net/game/jj,  http://retrospec.sgn.net/users/nwalker/jack/jj.zip): No compliant license found
+Jumpman (http://www.oldskool.org/pc/jumpman, http://www.oldskool.org/pc/jumpman/jump_src.zip): Only runs on DOS, License problems (taken without asking for permission)
+KDE Games (): Has been separated into individual packages. Will add them separately.
+Kochol Game Engine (https://sourceforge.net/projects/kge/, http://kge3d.org/): Very early development
+KZap (http://kzap.sourceforge.net/): Very early development
+L5R-RPU:L5R Role-Playing Utilitites (https://sourceforge.net/projects/l5r-rpgu/): Early development, no license information with source, long abandoned (since 2007)
+Last Hope A medieval Web Game (https://sourceforge.net/projects/lasthope/, https://web.archive.org/web/20051030000341/http://www.ano-lasthope.org:80/): Very early development
+LastNinja (http://lastninja.sourceforge.net/, https://sourceforge.net/projects/lastninja/): Early development, Abandoned, Needs original material that is probably not obtainable legally
+LegacyClone (http://sourceforge.net/projects/legacyclone/): Very early development
+legion (https://github.com/mahho/legion.git): No compatible license found
+Lemmix (http://www.ericenzwaan.nl/eric/lemmings/lemmix.htm, http://www.ericenzwaan.nl/eric/lemmings/LemOpenSource.zip): Custom license, requires original content that is probably not obtainable legally
+Lightbringer Engine (https://sourceforge.net/projects/lightbringer2015/): Very early development
+linball (http://linball.sourceforge.net/, https://sourceforge.net/projects/linball/): Very early development stage
+Lixoo 2D adventure game engine (https://sourceforge.net/projects/lixoo/): Very early development, long abandoned (since 2004)
+luminesk5 (https://github.com/kaikai2/luminesk5.git): No clear license information found. Not clear if runnable or if playable.
+MAGE Adventure Game Engine (https://sourceforge.net/projects/mage/, https://web.archive.org/web/20030808135304/http://mage.rulez.org/): Very early development, long abandoned (since 2001)
+Manic Miner (http://retrospec.sgn.net/game/mm): Unclear license (freeware?), probably many from retrospec.sgn.net
+Memonix (https://web.archive.org/web/20100304083057/http://www.viewizard.com/games.php): Source not available anymore, Internet Archive doesn't have it too.
+Migration Simulation Game (https://sourceforge.net/projects/migrationsimulationgame/): Very, very early development
+Militia Defense (https://love2d.org/forums/viewtopic.php?f=5&t=380): Does not exist anymore.
+Mindlence RPG Game Library (https://sourceforge.net/projects/mindlence-game/): Very early development
+Mining Haze (http://sourceforge.net/projects/mininghaze/): Very early development
+Mining Haze (https://sourceforge.net/projects/mininghaze/): Early development
+MonoStrategy (https://monostrategy.codeplex.com/, https://codeplexarchive.blob.core.windows.net/archive/projects/monostrategy/monostrategy.zip): Early development (alpha), Not playable, requires original content, abandoned
+Moopy (https://code.google.com/archive/p/moopy/, https://code.google.com/archive/p/moopy/source/default/source): Early development stage
+MultiGame (https://github.com/whendricso/MultiGame): Commercial
+Neverputt (): Part of Neverball or never really offered separately
+New RAW (http://sourceforge.net/projects/newraw/, http://svn.code.sf.net/p/newraw/code/): Early development, requires original content, abandoned
+Ninjapix (http://pyedpypers.org/pyday/?p=27): Sources lost, Homepage offline, Internet archive hasn't stored sources
+Open General (http://www.open-general.com/, https://sourceforge.net/projects/opengeneral/, https://svn.code.sf.net/p/opengeneral/code/): Proprietary license (see credits.txt)
+Open Sonic (http://opensnc.sourceforge.net/home/index.php): Commercial content, already contained in Open Surge
+OpenDeathValley (https://github.com/OpenDeathValley/OpenDeathValley.git): No instructions, no releases, no website, not much code
+OpenMB (https://github.com/cookgreen/OpenMB): Very early development stage, maybe later
+OpenOutcast (https://sourceforge.net/projects/ocmod/): Very early development, no release, short svn history, abandoned
+OpenPop (http://openpopulous.sourceforge.net/, https://sourceforge.net/projects/openpopulous/, https://svn.code.sf.net/p/openpopulous/code/): Very early development
+openStrato (https://github.com/gerdl/openStrato.git): Not much code, no release or demo site, short code history, abandoned
+pacman (https://github.com/snozza/pacman.git): Playable? We already have so many pacmans
+Phavon (http://phavon.sourceforge.net/, http://phavon.cvs.sourceforge.net/viewvc/phavon/): Very early development
+Phoenix (https://github.com/joulupunikki/Phoenix, https://joulupunikki.github.io/Phoenix/#Copyright): Only waive liability, otherwise all rights reserved
+Pico-8 (https://www.lexaloffle.com/pico-8.php): Commercial
+PKMN-FX (https://github.com/jonasdemoor/PKMN-FX.git): Code not available anymore
+Postludium (https://github.com/Pctg-x8/postludium): Very early development, no license information
+Project Diaspora (http://pdiaspora.sourceforge.net/, https://sourceforge.net/projects/pdiaspora/): Proprietary license (see file headers)
+Project: Xenocide (https://sourceforge.net/projects/xenocide/, http://xenocide.cvs.sourceforge.net/viewvc/xenocide/): Very early development
+Promisance (https://sourceforge.net/projects/promisance/,http://promisance.sourceforge.net/): Went closed source with version >= 4.0
+pyspaceinvaders (https://github.com/map0logo/pyspaceinvaders.git): Too small
+PyTouhou (http://pytouhou.linkmauve.fr/,http://hg.linkmauve.fr/touhou): Sources not available anymore
+Racer (http://racer.sourceforge.net/, http://sourceforge.net/projects/racer/): Very early development, Inactive
+realmforge (https://sourceforge.net/projects/realmforge/): Went commercial
+Robotypo (http://robotypo.appspot.com/index.html, https://code.google.com/archive/p/robotypo/): Very early development, No license, long abandoned
+RPG Paper Maker (http://rpg-paper-maker.com/?i=1, https://github.com/RPG-Paper-Maker/RPG-Paper-Maker.git): Closed, proprietary source, usage free for non-commercial use
+Ruby OMF 2097 Remake (http://rubyomf2097.rubyforge.org/, svn://rubyforge.org/var/svn/rubyomf2097): No source code found, web site offline
+RuneSword II (https://sourceforge.net/projects/runesword/, http://runesword.sourceforge.net/, https://github.com/mattiascibien/Runesword.net, https://github.com/aceofspades19/Runesword-II-OS.Net, http://www.runesword.com/rs2.html): No license found
+Schwertreiter (https://sourceforge.net/projects/schwertreiter/): No license information, early development, long abandoned
+Scions of Darkness (): No link found.
+sCoreForge Pathfinder Character Creator (https://sourceforge.net/projects/scoreforge/): No license found
+Scoreus (https://web.archive.org/web/20120618143653/http://scoreos.org:80/about, https://code.google.com/archive/p/scoreos/, https://github.com/Lozza999/scoreos.git, https://code.google.com/archive/p/scoreos/): Source code not found
+Sedtris (http://uuner.doslash.org/forfun/): Obscure language for a game, not very well documented, there are so many Tetris clones out there already
+SilChess (http://eaglemode.sourceforge.net/): Depends on Eaglemode ZUI which may be a too large dependency, difficult to get the source out by itself (maybe later)
+Snipes (https://www.vogons.org/viewtopic.php?f=7&t=49073, https://github.com/Davidebyzero/Snipes.git): Not open source as in free, copyright retains with the authors
+Sokoban (https://sourceforge.net/projects/sokoban/): Proprietary license
+Solar Empire (https://sourceforge.net/projects/solar-empire/, https://git.code.sf.net/p/solar-empire/code): No license found
+Solaris (https://sourceforge.net/projects/solaristhegame/, https://svn.code.sf.net/p/solaristhegame/code/): Very early development stage
+Space Station 14 (http://www.spacestation14.com/): Offline, contains no clear game
+Spaceship Duel (https://archive.codeplex.com/?p=spaceshipduel, https://spaceshipduel.codeplex.com/SourceControl/latest): Cannot download archive from codeplex
+SSTPong (http://sstpong.sourceforge.net/, https://sourceforge.net/projects/sstpong/): Very early development
+Starlane Empire (https://gna.org/projects/starlane/, http://cvs.gna.org/cvsweb/starlane/?cvsroot=starlane): No sources available, see issue #18, gna.org lost
+Styx (http://retrospec.sgn.net/game/styx, http://retrospec.sgn.net/game-links.php?link=styx): Unclear license information, not open source in our sense, see other entries from retrospec here
+Sudoku86 (http://sudoku86.sourceforge.net/): Does only run under DOS. Not the preferred OS for this database.
+Super Mario Bros With SFML in C# (https://mario.codeplex.com/, https://mario.codeplex.com/SourceControl/latest): Codeplex cannot access archive, therefore no source, might be just a previous state of Mario Objects
+Sworsorc (https://github.com/cjeffery/sworsorc, https://svn.code.sf.net/p/alteraterra/code/): Proprietary license (copyright headers in files)
+Terra Reconstructed (https://sourceforge.net/projects/terrastrategy/, http://mori.pyco.eu/drupal): No license information, very early development, long abandoned
+Tesseract Trainer (http://www.mushware.com/): Sources lost, license unclear, very early development, long abandoned
+The Fortress (https://sourceforge.net/projects/fortress/, https://fortressgame.wordpress.com/about/): Very early development, long abandoned
+thromolusng (https://github.com/segfaulthunter/thromolusng.git): Very early development, not playable, no build instructions, abandoned since 2010
+Tranzam (http://retrospec.sgn.net/game/tranzam, http://retrospec.sgn.net/download/files/TranzAm.1.1.zip): see other retrospec.sgn.net entries, license unclear, probably not open source as we want it
+TunnelWars (http://www.ratajik.net/TunnelWars/): Platform is OS2, not interesting for us
+Uncharted waters 2 remake (https://github.com/Jackyjjc/Uncharted-waters-2-remake.git, https://web.archive.org/web/20140504171546/http://jackyjjc.com/category/dk2-dev-journal/): Source got lost
+VegaTrek (https://sourceforge.net/projects/vegatrek/, https://svn.code.sf.net/p/vegatrek/code/): Unclear licensing, copyright of assets questionable
+Visions from the Other Side (http://www.insani.org/mukou.html): Source not available
+Voxelstein 3D (http://voxelstein3d.sourceforge.net/): Very early development
+WarGame (https://archive.codeplex.com/?p=wargame): Unclear license, could not contact authors
+WolfenDoom (https://github.com/Realm667/WolfenDoom): All rights reserved
+XQuest 2 (http://www.swallowtail.org/xquest/, http://www.swallowtail.org/xquest/xquest_1.3_src.tar.gz): License is not really open source (see LICENSE.DOC in https://web.archive.org/web/20170320015957/http://www.swallowtail.org/xquest/xquest_1.3_src.tar.gz)
+xrick (http://www.bigorno.net/xrick): No open source license/unclear license (see file README in http://www.bigorno.net/xrick/xrick-021212.zip)
+zedragon (https://github.com/charlierobson/zedragon.git): License not found, Assembly, not sure which OS is supported, no release, not much guidance
--- a/code/utils/init.py
+++ b/code/utils/init.py
--- a/code/utils/archive.py
+++ b/code/utils/archive.py
@ -0,0 +1,37 @@
+"""
+
+"""
+
+
+def derive_folder_name(url, replaces):
+    """
+
+    """
+    sanitize = lambda x: x.replace('/', '.')
+    for service in replaces:
+        if url.startswith(service):
+            url = replaces[service] + url[len(service):]
+            return sanitize(url)
+    for generic in ['http://', 'https://', 'git://', 'svn://']:
+        if url.startswith(generic):
+            url = url[len(generic):]
+            return sanitize(url)
+    raise Exception('malformed url: {}'.format(url))
+
+
+def git_folder_name(url):
+    """
+
+    """
+    replaces = {
+        'https://github.com': 'github',
+        'https://git.code.sf.net/p': 'sourceforge',
+        'https://git.tuxfamily.org': 'tuxfamily',
+        'https://git.savannah.gnu.org/git': 'savannah.gnu',
+        'https://gitlab.com': 'gitlab',
+        'https://gitorious.org': 'gitorious',
+        'https://anongit.': '',
+        'https://bitbucket.org': 'bitbucket',
+        'https://gitlab.gnome.org': 'gnome'
+    }
+    return derive_folder_name(url, replaces)
--- a/code/utils/constants.py
+++ b/code/utils/constants.py
@ -0,0 +1,12 @@
+"""
+Paths, properties.
+"""
+
+import os
+
+# paths
+root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
+entries_path = os.path.join(root_path, 'entries')
+tocs_path = os.path.join(entries_path, 'tocs')
+
+local_properties_file = os.path.join(root_path, 'local.properties')
--- a/code/utils/osg.py
+++ b/code/utils/osg.py
@ -0,0 +1,297 @@
+"""
+Specific functions working on the games.
+"""
+
+import re
+import os
+from difflib import SequenceMatcher
+from utils import utils, constants as c
+
+essential_fields = ('Home', 'State', 'Keywords', 'Code repository', 'Code language', 'Code license')
+valid_fields = ('Home', 'Media', 'State', 'Play', 'Download', 'Platform', 'Keywords', 'Code repository', 'Code language',
+'Code license', 'Code dependencies', 'Assets license', 'Developer', 'Build system', 'Build instructions')
+valid_platforms = ('Windows', 'Linux', 'macOS', 'Android', 'iOS', 'Web')
+recommended_keywords = ('action', 'arcade', 'adventure', 'visual novel', 'sports', 'platform', 'puzzle', 'role playing', 'simulation', 'strategy', 'cards', 'board', 'music', 'educational', 'tool', 'game engine', 'framework', 'library', 'remake')
+known_languages = ('AGS Script', 'ActionScript', 'Ada', 'AngelScript', 'Assembly', 'Basic', 'Blender Script', 'BlitzMax', 'C', 'C#', 'C++', 'Clojure', 'CoffeeScript', 'ColdFusion', 'D', 'DM', 'Dart', 'Dia', 'Elm', 'Emacs Lisp', 'F#', 'GDScript', 'Game Maker Script', 'Go', 'Groovy', 'Haskell', 'Haxe', 'Io', 'Java', 'JavaScript', 'Kotlin', 'Lisp', 'Lua', 'MegaGlest Script', 'MoonScript', 'None', 'OCaml', 'Objective-C', 'PHP', 'Pascal', 'Perl', 'Python', 'QuakeC', 'R', "Ren'py", 'Ruby', 'Rust', 'Scala', 'Scheme', 'Script', 'Shell', 'Swift', 'TorqueScript', 'TypeScript', 'Vala', 'Visual Basic', 'XUL', 'ZenScript', 'ooc')
+known_licenses = ('2-clause BSD', '3-clause BSD', 'AFL-3.0', 'AGPL-3.0', 'Apache-2.0', 'Artistic License-1.0', 'Artistic License-2.0', 'Boost-1.0', 'CC-BY-NC-3.0', 'CC-BY-NC-SA-2.0', 'CC-BY-NC-SA-3.0', 'CC-BY-SA-3.0', 'CC-BY-NC-SA-4.0', 'CC-BY-SA-4.0', 'CC0', 'Custom', 'EPL-2.0', 'GPL-2.0', 'GPL-3.0', 'IJG', 'ISC', 'Java Research License', 'LGPL-2.0', 'LGPL-2.1', 'LGPL-3.0', 'MAME', 'MIT', 'MPL-1.1', 'MPL-2.0', 'MS-PL', 'MS-RL', 'NetHack General Public License', 'None', 'Proprietary', 'Public domain', 'SWIG license', 'Unlicense', 'WTFPL', 'wxWindows license', 'zlib')
+known_multiplayer_modes = ('competitive', 'co-op', 'hotseat', 'LAN', 'local', 'massive', 'matchmaking', 'online', 'split-screen')
+
+# TODO put the abbreviations directly in the name line (parenthesis maybe), that is more natural
+code_dependencies_aliases = {'Simple DirectMedia Layer': ('SDL', 'SDL2'), 'Simple and Fast Multimedia Library': 'SFML', 'Boost (C++ Libraries)': 'Boost', 'SGE Game Engine': 'SGE'}
+code_dependencies_without_entry = {'OpenGL': 'https://www.opengl.org/', 'GLUT': 'https://www.opengl.org/resources/libraries/', 'WebGL': 'https://www.khronos.org/webgl/', 'Unity': 'https://unity.com/solutions/game',
+                                   '.NET': 'https://dotnet.microsoft.com/', 'Vulkan': 'https://www.khronos.org/vulkan/', 'KDE Frameworks': 'https://kde.org/products/frameworks/'}
+
+regex_sanitize_name = re.compile(r"[^A-Za-z 0-9-+]+")
+regex_sanitize_name_space_eater = re.compile(r" +")
+
+
+def name_similarity(a, b):
+    return SequenceMatcher(None, str.casefold(a), str.casefold(b)).ratio()
+
+
+def split_infos(infos):
+    """
+    Split into games, tools, frameworks, libraries
+    """
+    games = [x for x in infos if not any([y in x['keywords'] for y in ('tool', 'framework', 'library')])]
+    tools = [x for x in infos if 'tool' in x['keywords']]
+    frameworks = [x for x in infos if 'framework' in x['keywords']]
+    libraries = [x for x in infos if 'library' in x['keywords']]
+    return games, tools, frameworks, libraries
+
+
+def entry_iterator():
+    """
+
+    """
+
+    # get all entries (ignore everything starting with underscore)
+    entries = os.listdir(c.entries_path)
+
+    # iterate over all entries
+    for entry in entries:
+        entry_path = os.path.join(c.entries_path, entry)
+
+        # ignore directories ("tocs" for example)
+        if os.path.isdir(entry_path):
+            continue
+
+        # read entry
+        content = utils.read_text(entry_path)
+
+        # yield
+        yield entry, entry_path, content
+
+
+def canonical_entry_name(name):
+    """
+    Derives a canonical game name from an actual game name (suitable for file names, ...)
+    """
+    name = name.casefold()
+    name = name.replace('ö', 'o').replace('ä', 'a').replace('ü', 'u')
+    name = regex_sanitize_name.sub('', name)
+    name = regex_sanitize_name_space_eater.sub('_', name)
+    name = name.replace('_-_', '-')
+    name = name.replace('--', '-').replace('--', '-')
+
+    return name
+
+
+def parse_entry(content):
+    """
+    Returns a dictionary of the features of the content.
+
+    Raises errors when a major error in the structure is expected, prints a warning for minor errors.
+    """
+
+    info = {}
+
+    # read name
+    regex = re.compile(r"^# (.*)") # start of content, starting with "# " and then everything until the end of line
+    matches = regex.findall(content)
+    if len(matches) != 1 or not matches[0]: # name must be there
+        raise RuntimeError('Name not found in entry "{}" : {}'.format(content, matches))
+    info['name'] = matches[0]
+
+    # read description
+    regex = re.compile(r"^.*\n\n_(.*)_\n") # third line from top, everything between underscores
+    matches = regex.findall(content)
+    if len(matches) != 1 or not matches[0]: # description must be there
+        raise RuntimeError('Description not found in entry "{}"'.format(content))
+    info['description'] = matches[0]
+
+    # first read all field names
+    regex = re.compile(r"^- (.*?): ", re.MULTILINE) # start of each line having "- ", then everything until a colon, then ": "
+    fields = regex.findall(content)
+
+    # check that essential fields are there
+    for field in essential_fields:
+        if field not in fields: # essential fields must be there
+            raise RuntimeError('Essential field "{}" missing in entry "{}"'.format(field, info['name']))
+
+    # check that all fields are valid fields and are existing in that order
+    index = 0
+    for field in fields:
+        while index < len(valid_fields) and field != valid_fields[index]:
+            index += 1
+        if index == len(valid_fields): # must be valid fields and must be in the right order
+            raise RuntimeError('Field "{}" in entry "{}" either misspelled or in wrong order'.format(field, info['name']))
+
+    # iterate over found fields
+    for field in fields:
+        regex = re.compile(r"- {}: (.*)".format(field))
+        matches = regex.findall(content)
+        if len(matches) != 1: # every field must be present only once
+            raise RuntimeError('Field "{}" in entry "{}" exist multiple times.'.format(field, info['name']))
+        v = matches[0]
+
+        # first store as is
+        info[field.lower()+'-raw'] = v
+
+        # remove parenthesis with content
+        v = re.sub(r'\([^)]*\)', '', v)
+
+        # split on ', '
+        v = v.split(', ')
+
+        # strip
+        v = [x.strip() for x in v]
+
+        # remove all being false (empty) that were for example just comments
+        v = [x for x in v if x]
+
+        # if entry is of structure <..> remove <>
+        v = [x[1:-1] if x[0] is '<' and x[-1] is '>' else x for x in v]
+
+        # empty fields will not be stored
+        if not v:
+            continue
+
+        # store in info
+        info[field.lower()] = v
+
+    # check again that essential fields made it through
+    for field in ('home', 'state', 'keywords', 'code language', 'code license'):
+        if field not in info: # essential fields must still be inside
+            raise RuntimeError('Essential field "{}" empty in entry "{}"'.format(field, info['name']))
+
+    # now checks on the content of fields
+
+    # name and description should not have spaces at the begin or end
+    for field in ('name', 'description'):
+        v = info[field]
+        if len(v) != len(v.strip()): # warning about that
+            print('Warning: No leading or trailing spaces in field {} in entry "{}"'.format(field, info['name']))
+
+    # state (essential field) must contain either beta or mature but not both, but at least one
+    v = info['state']
+    for t in v:
+        if t != 'beta' and t != 'mature' and not t.startswith('inactive since '):
+            raise RuntimeError('Unknown state tage "{}" in entry "{}"'.format(t, info['name']))
+    if 'beta' in v != 'mature' in v:
+        raise RuntimeError('State must be one of <"beta", "mature"> in entry "{}"'.format(info['name']))
+
+    # extract inactive year
+    phrase = 'inactive since '
+    inactive_year = [x[len(phrase):] for x in v if x.startswith(phrase)]
+    assert len(inactive_year) <= 1
+    if inactive_year:
+        info['inactive'] = inactive_year[0]
+
+    # urls in home, download, play and code repositories must start with http or https (or git) and should not contain spaces
+    for field in ['home', 'download', 'play', 'code repository']:
+        if field in info:
+            for url in info[field]:
+                if not any([url.startswith(x) for x in ['http://', 'https://', 'git://', 'svn://', 'ftp://', 'bzr://']]):
+                    raise RuntimeError('URL "{}" in entry "{}" does not start with http/https/git/svn/ftp/bzr'.format(url, info['name']))
+                if ' ' in url:
+                    raise RuntimeError('URL "{}" in entry "{}" contains a space'.format(url, info['name']))
+
+    # github/gitlab repositories should end on .git and should start with https
+    if 'code repository' in info:
+        for repo in info['code repository']:
+            if any((x in repo for x in ('github', 'gitlab', 'git.tuxfamily', 'git.savannah'))):
+                if not repo.startswith('https://'):
+                    print('Warning: Repo {} in entry "{}" should start with https://'.format(repo, info['name']))
+                if not repo.endswith('.git'):
+                    print('Warning: Repo {} in entry "{}" should end on .git.'.format(repo, info['name']))
+
+    # check that all platform tags are valid tags and are existing in that order
+    if 'platform' in info:
+        index = 0
+        for platform in info['platform']:
+            while index < len(valid_platforms) and platform != valid_platforms[index]:
+                index += 1
+            if index == len(valid_platforms): # must be valid platforms and must be in that order
+                raise RuntimeError('Platform tag "{}" in entry "{}" either misspelled or in wrong order'.format(platform, info['name']))
+
+    # there must be at least one keyword
+    if 'keywords' not in info:
+        raise RuntimeError('Need at least one keyword in entry "{}"'.format(info['name']))
+
+    # check for existence of at least one recommended keywords
+    fail = True
+    for recommended_keyword in recommended_keywords:
+        if recommended_keyword in info['keywords']:
+            fail = False
+            break
+    if fail: # must be at least one recommended keyword
+        raise RuntimeError('Entry "{}" contains no recommended keyword'.format(info['name']))
+
+    # languages should be known
+    languages = info['code language']
+    for language in languages:
+        if language not in known_languages:
+            print('Warning: Language {} in entry "{}" is not a known language. Misspelled or new?'.format(language, info['name']))
+
+    # licenses should be known
+    licenses = info['code license']
+    for license in licenses:
+        if license not in known_licenses:
+            print('Warning: License {} in entry "{}" is not a known license. Misspelled or new?'.format(license, info['name']))
+
+    return info
+
+
+def assemble_infos():
+    """
+    Parses all entries and assembles interesting infos about them.
+    """
+
+    print('assemble game infos')
+
+    # a database of all important infos about the entries
+    infos = []
+
+    # iterate over all entries
+    for entry, _, content in entry_iterator():
+
+        # parse entry
+        info = parse_entry(content)
+
+        # add file information
+        info['file'] = entry
+
+        # check canonical file name
+        canonical_file_name = canonical_entry_name(info['name']) + '.md'
+        # we also allow -X with X =2..9 as possible extension (because of duplicate canonical file names)
+        if canonical_file_name != entry and canonical_file_name != entry[:-5] + '.md':
+            print('Warning: file {} should be {}'.format(entry, canonical_file_name))
+            source_file = os.path.join(c.entries_path, entry)
+            target_file = os.path.join(c.entries_path, canonical_file_name)
+            if not os.path.isfile(target_file):
+                pass
+                # os.rename(source_file, target_file)
+
+        # add to list
+        infos.append(info)
+
+    return infos
+
+
+def extract_links():
+    """
+    Parses all entries and extracts http(s) links from them
+    """
+
+    # regex for finding urls (can be in <> or in ]() or after a whitespace
+    regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n,]")
+
+    # iterate over all entries
+    urls = set()
+    for _, _, content in entry_iterator():
+
+        # apply regex
+        matches = regex.findall(content)
+
+        # for each match
+        for match in matches:
+
+            # for each possible clause
+            for url in match:
+
+                # if there was something (and not a sourceforge git url)
+                if url:
+                    urls.add(url)
+    urls = sorted(list(urls), key=str.casefold)
+    return urls
--- a/code/utils/osg_github.py
+++ b/code/utils/osg_github.py
@ -0,0 +1,20 @@
+"""
+Everything specific to the Github API (via PyGithub).
+"""
+
+from github import Github
+
+
+def retrieve_repo_info(repos):
+    """
+    For a list of Github repos, retrieves repo information
+    """
+    result = []
+    g = Github()
+    for repo in repos:
+        r = g.get_repo(repo)
+        e = {'archived': r.archived, 'description': r.description, 'language': r.language,
+             'last modified': r.last_modified, 'open issues count': r.open_issues_count,
+             'stars count': r.stargazers_count, 'topics': r.topics, 'repo': repo}
+        result.append(e)
+    return result
--- a/code/utils/osg_parse.py
+++ b/code/utils/osg_parse.py
--- a/code/utils/utils.py
+++ b/code/utils/utils.py
@ -0,0 +1,312 @@
+"""
+Utilities for the tools. Only depending on standard Python or third party modules.
+"""
+
+import os
+import shutil
+import subprocess
+import tarfile
+import time
+import urllib.request
+import zipfile
+import stat
+
+
+def read_text(file):
+    """
+    Reads a whole text file (UTF-8 encoded).
+    """
+    with open(file, mode='r', encoding='utf-8', errors='ignore') as f:
+        text = f.read()
+    return text
+
+
+def read_first_line(file):
+    """
+    Convenience function because we only need the first line of a category overview really.
+    """
+    with open(file, mode='r', encoding='utf-8') as f:
+        line = f.readline()
+    return line
+
+
+def write_text(file, text):
+    """
+    Writes a whole text file (UTF-8 encoded).
+    """
+    with open(file, mode='w', encoding='utf-8') as f:
+        f.write(text)
+
+
+def determine_archive_version_generic(name, leading_terms, trailing_terms):
+    """
+    Given an archive file name, tries to get version information. Generic version that can cut off leading and trailing
+    terms and converts to lower case. Give the most special terms first in the list. As many cut offs as possible are
+    performed.
+    """
+    # to lower case
+    name = name.lower()
+
+    # cut leading terms
+    for t in leading_terms:
+        if name.startswith(t):
+            name = name[len(t):]
+
+    # cut trailing terms
+    for t in trailing_terms:
+        if name.endswith(t):
+            name = name[:-len(t)]
+    return name
+
+
+def unzip_keep_last_modified(archive, destination):
+    """
+    Unzips content of a zip file archive into the destination directory keeping the last modified file property as
+    it was in the zip archive.
+
+    Assumes that destination is an existing directory path.
+    """
+    with zipfile.ZipFile(archive, 'r') as zip:
+        # zip.extractall(destination)  # does not keep the last modified property
+        for zip_entry in zip.infolist():
+            name, date_time = zip_entry.filename, zip_entry.date_time
+            date_time = time.mktime(date_time + (0, 0, -1))
+            zip.extract(zip_entry, destination)
+            os.utime(os.path.join(destination, name), (date_time, date_time))
+
+
+def detect_archive_type(name):
+    """
+    Tries to guess which type an archive is.
+    """
+    # test for tar
+    tar_endings = ['.tbz2', '.tar.gz']
+    for ending in tar_endings:
+        if name.endswith(ending):
+            return 'tar'
+
+    # test for zip
+    zip_endings = ['.zip', '.jar']
+    for ending in zip_endings:
+        if name.endswith(ending):
+            return 'zip'
+
+    # unknown
+    return None
+
+
+def folder_size(path):
+    size = 0
+    for dirpath, dirnames, filenames in os.walk(path):
+        for file in filenames:
+            size += os.path.getsize(os.path.join(dirpath, file))
+    return size
+
+
+def extract_archive(source, destination, type):
+    """
+    Extracts a zip, tar, ... to a destination path.
+
+    Type may result from detect_archive_type().
+    """
+    if type == 'tar':
+        tar = tarfile.open(source, 'r')
+        tar.extractall(destination)
+    elif type == 'zip':
+        unzip_keep_last_modified(source, destination)
+
+
+def strip_wrapped_folders(folder):
+    """
+    If a folder only contains a single sub-folder and nothing else, descends this way as much as possible.
+
+    Assumes folder is a directory.
+    """
+    while True:
+        entries = list(os.scandir(folder))
+        if len(entries) == 1 and entries[0].is_dir():
+            folder = entries[0].path
+        else:
+            break
+    return folder
+
+
+def determine_latest_last_modified_date(folder):
+    """
+    Given a folder, recursively searches all files in this folder and all sub-folders and memorizes the latest
+    "last modified" date of all these files.
+    """
+    latest_last_modified = 0
+    for dirpath, dirnames, filenames in os.walk(folder):
+        for filename in filenames:
+            filepath = os.path.join(dirpath, filename)
+            lastmodified = os.path.getmtime(filepath)
+            if lastmodified > latest_last_modified:
+                latest_last_modified = lastmodified
+    return latest_last_modified
+
+
+def subprocess_run(cmd, display=True):
+    """
+    Runs a cmd via subprocess and displays the std output in case of success or the std error output in case of failure
+    where it also stops execution.
+    """
+    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if result.returncode:
+        if display:
+            print("error {} in call {}".format(result.returncode, cmd))
+            print(result.stdout.decode('cp1252'))
+            print(result.stderr.decode('cp1252'))
+        raise RuntimeError()
+    if display:
+        print('  output: {}'.format(result.stdout.decode('cp1252')))
+    return result.stdout.decode('cp1252')
+
+
+# TODO need move_tree
+def copy_tree(source, destination):
+    """
+    Copies the full content of one directory into another avoiding the use of distutils.di_util.copy_tree because that
+    can give unwanted errors on Windows (probably related to symlinks).
+    """
+    # this gave an FileNotFoundError: [Errno 2] No such file or directory: '' on Windows
+    # distutils.dir_util.copy_tree(archive_path, git_path)
+    for dirpath, dirnames, filenames in os.walk(source):
+        # first create all the directory on destination
+        directories_to_be_created = [os.path.join(destination, os.path.relpath(os.path.join(dirpath, x), source)) for x
+                                     in dirnames]
+        for directory in directories_to_be_created:
+            os.makedirs(directory, exist_ok=True)
+        # second copy all the files
+        filepaths_source = [os.path.join(dirpath, x) for x in filenames]
+        filepaths_destination = [os.path.join(destination, os.path.relpath(x, source)) for x in filepaths_source]
+        for src, dst in zip(filepaths_source, filepaths_destination):
+            shutil.copyfile(src, dst)
+
+
+def download_url(url, destination):
+    """
+    Using urllib.request downloads from an url to a destination. Destination will be a file.
+
+    Waits one second before, trying to be nice.
+    """
+    time.sleep(1)  # we are nice
+    with urllib.request.urlopen(url) as response:
+        with open(destination, 'wb') as f:
+            shutil.copyfileobj(response, f)
+
+
+def handleRemoveReadonly(func, path, exc):
+    """
+    Necessary on Windows. See https://stackoverflow.com/questions/1889597/deleting-directory-in-python
+    """
+    os.chmod(path, stat.S_IWRITE)
+    func(path)
+
+
+def git_clear_path(git_path):
+    """
+    Clears all in a path except the '.git' directory
+    """
+    for item in os.listdir(git_path):
+        # ignore '.git
+        if item == '.git':
+            continue
+        item = os.path.join(git_path, item)
+        if os.path.isdir(item):
+            shutil.rmtree(item, onerror=handleRemoveReadonly)
+        else:
+            os.remove(item)
+
+
+def recreate_directory(path):
+    """
+    Recreates a directory (deletes before if existing)
+    """
+    if os.path.isdir(path):
+        shutil.rmtree(path, onerror=handleRemoveReadonly)
+    for attempts in range(10):
+        try:
+            os.mkdir(path)
+        except PermissionError:
+            time.sleep(0.1)
+            continue
+        else:
+            break
+    else:
+        raise RuntimeError()
+
+
+def unzip(zip_file, destination_directory):
+    """
+    Unzips and keeps the original modified date.
+
+    :param zip_file:
+    :param destination_directory:
+    :return:
+    """
+    dirs = {}
+
+    with zipfile.ZipFile(zip_file, 'r') as zip:
+        for info in zip.infolist():
+            name, date_time = info.filename, info.date_time
+            name = os.path.join(destination_directory, name)
+            zip.extract(info, destination_directory)
+
+            # still need to adjust the dt o/w item will have the current dt
+            date_time = time.mktime(info.date_time + (0, 0, -1))
+
+            if os.path.isdir(name):
+                # changes to dir dt will have no effect right now since files are
+                # being created inside of it; hold the dt and apply it later
+                dirs[name] = date_time
+            else:
+                os.utime(name, (date_time, date_time))
+
+    # done creating files, now update dir dt
+    for name in dirs:
+        date_time = dirs[name]
+        os.utime(name, (date_time, date_time))
+
+
+def strip_url(url):
+    for prefix in ('http://', 'https://', 'svn://', 'www.'):
+        if url.startswith(prefix):
+            url = url[len(prefix):]
+    for suffix in ('/', '.git', '/en', '/index.html'):
+        if url.endswith(suffix):
+            url = url[:-len(suffix)]
+    return url
+
+
+def load_properties(filepath, sep='=', comment_char='#'):
+    """
+    Read the file as a properties file (in Java).
+    """
+    properties = {}
+    with open(filepath, "rt") as file:
+        for line in file:
+            line = line.strip()
+            if not line.startswith(comment_char):
+                line = line.split(sep)
+                assert(len(line)==2)
+                key = line[0].strip()
+                value = line[1].strip()
+                properties[key] = value
+    return properties
+
+
+def unique_elements_and_occurrences(elements):
+    """
+
+    """
+    unique_elements = {}
+    for element in elements:
+        try:
+            unique_elements[element] = unique_elements.get(element, 0) + 1
+        except Exception as e:
+            print(e)
+    unique_elements = list(unique_elements.items())
+    unique_elements.sort(key=lambda x: -x[1])
+    unique_elements = ['{}({})'.format(k, v) for k, v in unique_elements]
+    return unique_elements