synchronized awesome list (https://github.com/radek-sprta/awesome-game-remakes) and sourceforge member pages and some inspiration wikipedia links

2021-09-28 13:42:58 +02:00
parent 0daac9c31e
commit e7ea8fc6ab
67 changed files with 607 additions and 129 deletions
--- a/code/backlog.txt
+++ b/code/backlog.txt
@@ -87,4 +87,6 @@ https://www.seul.org/~grumbel/tmp/clanlib/games.html
 https://www.tapatalk.com/groups/imperilist/
 https://www.wurfelengine.net/
 https://zdoom.org/downloads (gzdoom, lzdoom)
-https://zope.readthedocs.io/en/latest/
+https://zope.readthedocs.io/en/latest/
+https://github.com/jmorton06/Lumos
+http://crazycarscpc.free.fr/
--- a/code/backlog_enhanced.txt
+++ b/code/backlog_enhanced.txt
@@ -1,5 +1,6 @@
 more work

+https://sourceforge.net/p/uhexen2/code/HEAD/tree/ Hexen II: Hammer of Thyrion
 http://hgm.nubati.net/ (winboard)
 http://insideastarfilledsky.net/ (source may be somewhere else)
 https://empiredirectory.net/
@@ -9,6 +10,7 @@ https://github.com/neuromancer/re-private-eye.git (moved to scummvm) and is part
 http://www.hard-light.net/ (Free Space Open, Knossos, Mods, https://github.com/scp-fs2open/fs2open.github.com, https://wiki.hard-light.net/index.php/Getting_started)
 http://svn.assembla.com/svn/gdpl/ (not sure what links there and what kind of game it is, license?)
 https://web.archive.org/web/20161230103538/http://www.linuxdevcenter.com/pub/a/linux/2003/04/24/exult.html
+http://barbarian.1987.free.fr/indexEN.htm (many version, source license?)

 roguelike

@@ -121,6 +123,10 @@ https://en.wikipedia.org/wiki/List_of_free_and_open-source_Android_applications#
 https://en.wikipedia.org/wiki/MUD#Spread (all there)
 https://en.wikipedia.org/wiki/MUD_client (all there)

+osdn
+
+https://osdn.net/
+
 special

 https://alternativeto.net/ (query with only open source as option)
--- a/code/maintenance_inspirations.py
+++ b/code/maintenance_inspirations.py
@@ -5,6 +5,7 @@ Maintenance of inspirations.md and synchronization with the inspirations in the
 # TODO search fandom
 # TODO which inspirations have wikipedia entries with open source games category but aren't included
 # TODO if update included entries are included, update entries with media too
+# TODO series always with lowercase

 import time
 from utils import osg, osg_ui, osg_wikipedia, constants as c
@@ -76,6 +77,9 @@ class InspirationMaintainer:
        print('missed inspirations checked')

    def check_for_wikipedia_links(self):
+        """
+        Check the inspirations that haven't yet have a Wikipedia link in their Media field by searching for them on Wikipedia.
+        """
        if not self.inspirations:
            print('inspirations not yet loaded')
            return
--- a/code/rejected.txt
+++ b/code/rejected.txt
@@ -195,4 +195,6 @@ XQuest 2 (http://www.swallowtail.org/xquest/, http://www.swallowtail.org/xquest/
 xrick (http://www.bigorno.net/xrick): No open source license/unclear license (see file README in http://www.bigorno.net/xrick/xrick-021212.zip)
 Yave (https://github.com/gan74/Yave): General graphics engine, not game centered in any way and experimental
 Yuris Revenge (https://github.com/cookgreen/Yuris-Revenge): Mod to OpenRA
-zedragon (https://github.com/charlierobson/zedragon.git): License not found, Assembly, not sure which OS is supported, no release, not much guidance
+zedragon (https://github.com/charlierobson/zedragon.git): License not found, Assembly, not sure which OS is supported, no release, not much guidance
+rawgl (https://github.com/cyxx/rawgl): All rights reserved by Gregory Montoir. No FOSS license.
+The Great Escape in C (https://github.com/dpt/The-Great-Escape-in-C): All rights reserved by David Thomas. No FOSS license.
--- a/code/synchronization/sourceforge_import.py
+++ b/code/synchronization/sourceforge_import.py
@@ -1,6 +1,8 @@
 """
 Scrapes Sourceforge project sites and adds (mostly developer) information to our database.
-""" # TODO sourceforge sites that are not existing anymore but we have an archive link, also scrape
+"""
+
+# TODO sourceforge sites that are not existing anymore but we have an archive link, also scrape

 import os
 import json
@@ -12,17 +14,22 @@ sf_entries_file = os.path.join(c.code_path, 'sourceforge_entries.txt')
 prefix = 'https://sourceforge.net/projects/'

 # author names in SF that aren't the author names how we have them
-SF_alias_list = {'Erik Johansson (aka feneur)': 'Erik Johansson', 'Itms': 'Nicolas Auvray', 'baris yuksel': 'Baris Yuksel',
+SF_alias_list = {'Erik Johansson (aka feneur)': 'Erik Johansson', 'Itms': 'Nicolas Auvray',
+                 'baris yuksel': 'Baris Yuksel',
                 'Wraitii': 'Lancelot de Ferrière', 'Simzer': 'Simon Laszlo', 'armin bajramovic': 'Armin Bajramovic',
-                 'bleu tailfly': 'bleutailfly', 'dlh': 'DLH', 'Bjorn Hansen': 'Bjørn Hansen', 'Louens Veen': 'Lourens Veen',
-                 'linley_henzell': 'Linley Henzell', 'Patrice DUHAMEL': 'Patrice Duhamel', 'Etienne SOBOLE': 'Etienne Sobole',
+                 'bleu tailfly': 'bleutailfly', 'dlh': 'DLH', 'Bjorn Hansen': 'Bjørn Hansen',
+                 'Louens Veen': 'Lourens Veen',
+                 'linley_henzell': 'Linley Henzell', 'Patrice DUHAMEL': 'Patrice Duhamel',
+                 'Etienne SOBOLE': 'Etienne Sobole',
                 'L. H.    [Lubomír]': 'L. H. Lubomír', 'davidjoffe': 'David Joffe', 'EugeneLoza': 'Eugene Loza',
-                 'Kenneth Gangsto': 'Kenneth Gangstø', 'Lucas GAUTHERON': 'Lucas Gautheron', 'Per I Mathisen': 'Per Inge Mathisen',
+                 'Kenneth Gangsto': 'Kenneth Gangstø', 'Lucas GAUTHERON': 'Lucas Gautheron',
+                 'Per I Mathisen': 'Per Inge Mathisen',
                 'wrtlprnft': 'Wrzlprnft', 'daniel_santos': 'Daniel Santos', 'Dark_Sylinc': 'darksylinc',
                 'Don Llopis': 'Don E. Llopis', 'dwachs': 'Dwachs', 'Pierre-Loup Griffais': 'Pierre-Loup A. Griffais',
                 'Richard Gobeille': 'Richard C. Gobeille', 'timfelgentreff': 'Tim Felgentreff',
                 'Dr. Martin Brumm': 'Martin Brumm', 'Dr. Wolf-Dieter Beelitz': 'Wolf-Dieter Beelitz'}

+# authors to be ignored
 SF_ignore_list = ('', 'Arianne Integration Bot')


@@ -49,28 +56,32 @@ def collect_sourceforge_entries():

 def sourceforge_import():
    """
-
-    :return:
+    Scraps Sourceforge project sites and adds developer information to the entries
    """
+
+    # read entries that have sourceforge projects
    files = json.loads(utils.read_text(sf_entries_file))

+    # read developer information
    all_developers = osg.read_developers()
    print(' {} developers read'.format(len(all_developers)))
    all_developers_changed = False

    # all exceptions that happen will be eaten (but will end the execution)
    try:
-        # loop over each entry
+        # loop over each entry with a sourceforge project
        for index, file in enumerate(files):
-            print(' process {}'.format(file))
+            print(' process {} ({})'.format(file, index))

-            # read entry
+            # read full entry
            entry = osg.read_entry(file)
            developers = entry.get('Developer', [])
            urls = [x.value for x in entry['Home'] if x.startswith('https://sourceforge.net/projects/')]

+            # do we need to save it again
            entry_changed = False

+            # for all sourceforge project urls in this entry
            for url in urls:
                print('  sf project {}'.format(url))

@@ -78,8 +89,11 @@ def sourceforge_import():
                    print('error: sf project does not end with slash')
                    url += '/'

-                # members
-                url_members = 'https://sourceforge.net/p/' + url[len(prefix):] + '_members/'
+                # read and parse members page
+                project_name = url[len(prefix):-1]
+                if 'berlios' in project_name:  # berlios projects never have member pages
+                    continue
+                url_members = 'https://sourceforge.net/p/' + project_name + '/_members/'
                response = requests.get(url_members)
                if response.status_code != 200:
                    print('error: url {} not accessible, status {}'.format(url_members, response.status_code))
@@ -88,48 +102,54 @@ def sourceforge_import():
                authors = soup.find('div', id='content_base').find('table').find_all('tr')
                authors = [author.find_all('td') for author in authors]
                authors = [author[1].a['href'] for author in authors if len(author) == 3]
+                # for every author in the list of scraped authors
                for author in authors:
                    # sometimes author already contains the full url, sometimes not
                    url_author = 'https://sourceforge.net' + author if not author.startswith('http') else author
+                    # get the personal author page from sourceforge
                    response = requests.get(url_author)
                    if response.status_code != 200 and author not in ('/u/favorito/',):
                        print('error: url {} not accessible, status {}'.format(url_author, response.status_code))
                        raise RuntimeError()
-                    url_author = response.url  # could be different now
+                    url_author = response.url  # could be different now (redirect)
                    if 'auth/?return_to' in url_author or response.status_code != 200:
                        # for some reason authorisation is forbidden or page was not available (happens for example for /u/kantaros)
                        author_name = author[3:-1]
                        nickname = author_name
                    else:
+                        # this is the typical case
                        soup = BeautifulSoup(response.text, 'html.parser')
-                        author_name = soup.h1.get_text()
+                        author_name = soup.h1.get_text().strip()  # lately they have a newline at the end, need to strip that
                        author_name = SF_alias_list.get(author_name, author_name)  # replace by alias if possible
                        nickname = soup.find('dl', class_='personal-data').find('dd').get_text()
                        nickname = nickname.replace('\n', '').strip()
-                    nickname += '@SF' # our indication of the platform to search for
-                    author_name = author_name.strip() # names can still have white spaces before or after
+                    nickname += '@SF'  # our indication of the platform to search for
+                    author_name = author_name.strip()  # names could still have white spaces before or after

+                    # some authors we ignore
                    if author_name in SF_ignore_list:
                        continue

-                    # look author up in entry developers
+                    # look author up in entry developers field, if not existing add
                    if author_name not in developers:
                        print('   dev "{}" added to entry {}'.format(author_name, file))
                        entry['Developer'] = entry.get('Developer', []) + [osg_parse.ValueWithComment(author_name)]
                        entry_changed = True
-                        developers = entry.get('Developer', [])
+                        developers = entry.get('Developer', [])  # update developers

                    # look author and SF nickname up in developers data base
                    if author_name in all_developers:
+                        # get existing developer information
                        dev = all_developers[author_name]
-                        if not nickname in dev.get('Contact', []):
+                        if nickname not in dev.get('Contact', []):
                            print(' existing dev "{}" added nickname ({}) to developer database'.format(author_name, nickname))
                            # check that name has not already @SF contact
                            if any(x.endswith('@SF') for x in dev.get('Contact', [])):
-                                print('warning: already SF contact')
+                                print('warning: already different SF contact existing')
                            all_developers[author_name]['Contact'] = dev.get('Contact', []) + [nickname]
                            all_developers_changed = True
                    else:
+                        # new developer entry in the developers data base
                        print('   dev "{}" ({}) added to developer database'.format(author_name, nickname))
                        all_developers[author_name] = {'Name': author_name, 'Contact': [nickname], 'Games': [entry['Title']]}
                        all_developers_changed = True
@@ -156,9 +176,8 @@ def sourceforge_import():


 if __name__ == "__main__":
-
    # collect entries
-    collect_sourceforge_entries()
+    # collect_sourceforge_entries()

    # import information from sf
-    # sourceforge_import()
+    sourceforge_import()
--- a/code/synchronization/synchronize_awesome_lists.py
+++ b/code/synchronization/synchronize_awesome_lists.py
@@ -6,6 +6,10 @@ import re
 import requests
 from utils import osg, osg_rejected

+AWESOME_LIST = 'https://raw.githubusercontent.com/radek-sprta/awesome-game-remakes/master/README.md'
+# Probably could fix them within the awesome lists
+IGNORED = ('2006rebotted', 'raw(gl)', 'fheroes2', 'FS2OPEN', 'Barbarian', 'Hexen II: Hammer of Thyrion')
+
 matcher = re.compile(r'\[(.*)?\]\((.*?)\) - (.*)')  # general structure: - [title](link) - description

 if __name__ == "__main__":
@@ -14,10 +18,8 @@ if __name__ == "__main__":
    rejected = osg_rejected.read_rejected_file()

    # read awesome list
-    awesome_list = 'https://raw.githubusercontent.com/radek-sprta/awesome-game-remakes/master/README.md'
-    print('read {}'.format(awesome_list))
-    r = requests.get(awesome_list, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=20,
-                     allow_redirects=True)
+    print('read {}'.format(AWESOME_LIST))
+    r = requests.get(AWESOME_LIST, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=20, allow_redirects=True)
    if r.status_code != requests.codes.ok:
        raise RuntimeError('Cannot download awesome list.')
    text = r.text
@@ -34,6 +36,14 @@ if __name__ == "__main__":
            description = matches[2]
            entries.append({'Title': title, 'URL': url, 'Description': description, 'Category': category})

+    # remove those from the ignored list
+    entries = [entry for entry in entries if not any(entry['Title'] == x for x in IGNORED)]
+
+    # remove those that are in our rejected list
+    rejected_titles = [x['Title'] for x in rejected]
+    entries = [entry for entry in entries if entry['Title'] not in rejected_titles]
+    print('after filtering for rejected entries {}'.format(len(entries)))
+
    # a bit of statistics about this awesome list
    print('contains {} entries in {} categories'.format(len(entries), len(text)))
    n = [0, 0]
@@ -44,21 +54,18 @@ if __name__ == "__main__":
            n[1] += 1
    print('{} links to Github, {} links not to Github'.format(*n))

-    # remove those that are in our rejected list
-    rejected_titles = [x['Title'] for x in rejected]
-    entries = [entry for entry in entries if entry['Title'] not in rejected_titles]
-
    # read our database
    our_entries = osg.read_entries()
    print('{} entries read (osgl)'.format(len(our_entries)))

    # go through this awesome list entries one by one and compare to our list
    for entry in entries:
+        title = entry['Title']
+        url = entry['URL']
        # go through our entries
        similar_entries = []
        for our_entry in our_entries:
-            title_equal = entry['Title'] == our_entry['Title']
-            url = entry['URL']
+            title_equal = title == our_entry['Title']
            url_present = any(url in x for x in our_entry['Home']) or any(url in x for x in our_entry.get('Code repository', []))
            if title_equal or url_present:
                similar_entries.append(our_entry)
--- a/code/utils/osg_rejected.py
+++ b/code/utils/osg_rejected.py
@@ -18,7 +18,7 @@ def read_rejected_file():
    text = u.read_text(rejected_file)
    rejected = []
    for line in text.split('\n'):
-        print(line)
+        # print(line)
        matches = matcher.findall(line)[0]  # we know there will be exactly one match on every line
        name = matches[0].strip()
        links = matches[1].split(',')