cleanup of keywords and code dependencies

This commit is contained in:
Trilarion
2019-11-21 13:06:00 +01:00
parent 523c0ca55a
commit 7b3c31160a
241 changed files with 601 additions and 595 deletions

View File

@@ -494,6 +494,7 @@
"https://github.com/cthielen/Epiar.git",
"https://github.com/cubei/FlappyCow.git",
"https://github.com/cubosphere/cubosphere-code.git",
"https://github.com/cubosphere/cubosphere-data.git",
"https://github.com/cxong/FallingTime.git",
"https://github.com/cxong/cdogs-sdl.git",
"https://github.com/cyberegoorg/cetech.git",
@@ -1178,7 +1179,7 @@
"https://src.fedoraproject.org/rpms/blobwars.git"
],
"svn": [
"http://svn.uktrainsim.com/svn/openrails/trunk",
"http://svn.uktrainsim.com/svn/openrails",
"https://svn.code.sf.net/p/balder/code/",
"https://svn.code.sf.net/p/blobby/code/",
"https://svn.code.sf.net/p/bombic2/code/",
@@ -1254,6 +1255,6 @@
"https://code.launchpad.net/heroesofwesnoth",
"https://code.launchpad.net/openhomm",
"https://code.launchpad.net/~dnax88/gweled/trunk",
"https://code.launchpad.net/~silwol/freenukum/trunk"
"https://code.launchpad.net/~silwol/freenukum"
]
}

View File

@@ -98,6 +98,7 @@ http://www.myandroidonline.com/category/games/
http://www.netgore.com/
http://www.newbreedsoftware.com/ (all)
http://www.oletus.fi/games/
http://www.paulscode.com/games/
http://www.ph2.net/zugspiel/
http://www.plasmapong.com/plasma-pong/plasma-pong/
http://www.roguebasin.com (all)
@@ -189,6 +190,7 @@ https://github.com/atphalix/nexuiz
https://github.com/azhirnov/FrameGraph
https://github.com/benl23x5/gloss
https://github.com/bioglaze/aether3d
https://github.com/bomblik/BlockOut_II_PSVITA
https://github.com/bsmr-games (also contains copies)
https://github.com/CatacombGames/
https://github.com/cflewis/Infinite-Mario-Bros
@@ -205,6 +207,7 @@ https://github.com/collinhover/kaiopua
https://github.com/cookgreen/Yuris-Revenge
https://github.com/Cortrah/SpaceOperaDesign, https://github.com/Cortrah/SpaceOperaRuby/blob/master/design/turnstyles.md
https://github.com/cping/LGame
https://github.com/cymonsgames/CymonsGames (collection)
https://github.com/DaanVanYperen/artemis-odb-contrib
https://github.com/DeflatedPickle/FAOSDance
https://github.com/delaford/game
@@ -254,7 +257,9 @@ https://github.com/ligurio/awesome-ttygames
https://github.com/MarcoLizza/tofu-engine
https://github.com/MatthewTheGlutton/HideousDestructor
https://github.com/McKay42/McOsu
https://github.com/megamarc/Tilengine
https://github.com/mewo2/terrain
https://github.com/mofr/Diablerie
https://github.com/moonwards1/Moonwards-Virtual-Moon
https://github.com/morganbengtsson/mos
https://github.com/MrFrenik/Enjon

View File

@@ -71,7 +71,6 @@ def parse_lgw_content():
files = os.listdir(import_path)
entries = []
for file in files:
file = files[56]
if file.startswith('_lgw'):
continue
@@ -294,14 +293,31 @@ def clean_lgw_content():
mandatory_fields -= set(remove_fields)
print('mandatory lgw fields: {}'.format(sorted(list(mandatory_fields))))
# statistics before
print('field contents before')
fields = sorted(list(unique_fields - set(('description', 'external links', 'dev home', 'forum', 'home', 'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name', 'repo', 'Release date', 'categories'))))
for field in fields:
content = [entry[field] for entry in entries if field in entry]
# flatten
flat_content = []
for c in content:
if isinstance(c, list):
flat_content.extend(c)
else:
flat_content.append(c)
statistics = utils.unique_elements_and_occurrences(flat_content)
print('{}: {}'.format(field, ', '.join(statistics)))
# content replacements
entries = remove_parenthized_content(entries, ('assets license', 'code language', 'code license', 'engine', 'genre', 'last active', 'library'))
entries = remove_prefix_suffix(entries, ('code license', 'assets license'), ('"', 'GNU', ), ('"', '[3]', '[2]', '[1]', 'only'))
entries = replace_content(entries, ('code license', 'assets license'), 'GPL', ('General Public License', ))
entries = replace_content(entries, ('code license', 'assets license'), 'GPLv2', ('GPL v2', 'GPL version 2.0', 'GPL 2.0', 'General Public License v2', 'GPL version 2', 'Gplv2', 'GPL 2'))
entries = replace_content(entries, ('code license', 'assets license'), 'GPLv2+', ('GPL v2 or later', 'GPL 2+', 'GPL v2+', 'GPL version 2 or later'))
entries = replace_content(entries, ('code license', 'assets license'), 'GPLv3', ('GPL v3', 'GNU GPL v3', 'GPL 3'))
entries = replace_content(entries, ('code license', 'assets license'), 'GPLv3+', ('GPL v3+', 'GPL v.3 or later', 'GPL v3 or later'))
entries = replace_content(entries, ('code license', 'assets license'), 'GPL-2.0', ('GPLv2', )) # for LGW GPLv2 would be the correct writing
entries = replace_content(entries, ('code license', 'assets license'), 'GPL-2', ('GPLv2', 'GPL v2', 'GPL version 2.0', 'GPL 2.0', 'General Public License v2', 'GPL version 2', 'Gplv2', 'GPL 2'))
entries = replace_content(entries, ('code license', 'assets license'), 'GPL-2', ('GPL v2 or later', 'GPL 2+', 'GPL v2+', 'GPL version 2 or later'))
entries = replace_content(entries, ('code license', 'assets license'), 'GPL-3.0', ('GPLv3', )) # for LGW GPLv3 would be the correct writing
entries = replace_content(entries, ('code license', 'assets license'), 'GPL-3', ('GPL v3', 'GNU GPL v3', 'GPL 3'))
entries = replace_content(entries, ('code license', 'assets license'), 'GPL-3', ('GPL v3+', 'GPL v.3 or later', 'GPL v3 or later'))
entries = replace_content(entries, ('code license', 'assets license'), 'Public domain', ('public domain', 'Public Domain'))
entries = replace_content(entries, ('code license', 'assets license'), 'zlib', ('zlib/libpng license', 'Zlib License'))
entries = replace_content(entries, ('code license', 'assets license'), 'BSD', ('Original BSD License', ))
@@ -329,7 +345,7 @@ def clean_lgw_content():
# list for every unique field
# fields = sorted(list(unique_fields))
print('\nfield contents after')
fields = sorted(list(unique_fields - set(('description', 'external links', 'dev home', 'forum', 'home', 'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name', 'repo', 'Release date', 'categories'))))
for field in fields:
content = [entry[field] for entry in entries if field in entry]
@@ -341,7 +357,7 @@ def clean_lgw_content():
else:
flat_content.append(c)
statistics = utils.unique_elements_and_occurrences(flat_content)
print('\n{}: {}'.format(field, ', '.join(statistics)))
print('{}: {}'.format(field, ', '.join(statistics)))
# save entries
text = json.dumps(entries, indent=1)
@@ -354,7 +370,7 @@ if __name__ == "__main__":
# download_lgw_content()
# stage two
parse_lgw_content()
# parse_lgw_content()
# stage three
# clean_lgw_content()
clean_lgw_content()

View File

@@ -27,27 +27,40 @@ import os
from utils import constants, utils, osg
name_replacements = {'Eat the Whistle': 'Eat The Whistle', 'Scorched 3D': 'Scorched3D', 'Silver Tree': 'SilverTree', 'Blob Wars Episode 1 : Metal Blob Solid': 'Blobwars: Metal Blob Solid', 'Adventure': 'Colossal Cave Adventure',
'Fall Of Imiryn': 'Fall of Imiryn', 'Liquid War 6': 'Liquid War', 'Gusanos': 'GUSANOS', 'Corewars': 'Core War', 'FLARE': 'Flare', 'Vitetris': 'vitetris', 'Powder Toy': 'The Powder Toy', 'Asylum': 'SDL Asylum',
'Atanks': 'Atomic Tanks', 'HeXon': 'heXon', 'Unnethack': 'UnNetHack', 'Nova Pinball': 'NOVA PINBALL', 'Jump n Bump': "Jump'n'Bump", 'Blades of Exile': 'Classic Blades of Exile', 'BlinKen': 'Blinken',
'Colobot': 'Colobot: Gold Edition', 'Dead Justice': 'Cat Mother Dead Justice', 'FreeDink': 'GNU FreeDink', 'FRaBs': 'fRaBs', 'Harmonist': 'Harmonist: Dayoriah Clan Infiltration', 'Iris2 3D Client - for Ultima Online': 'Iris2',
'Java Classic Role Playing Game': 'jClassicRPG', 'LambdaRouge': 'LambdaRogue: The Book of Stars', 'Osgg': 'OldSkool Gravity Game', 'PyRacerz': 'pyRacerz', 'Starfighter': 'Project: Starfighter',
'TORCS': 'TORCS, The Open Racing Car Simulator', 'Vertigo (game)': 'Vertigo', 'XInvaders3D': 'XInvaders 3D'}
ignored_names = ['Hetris', '8 Kingdoms', 'Antigravitaattori', 'Arena of Honour', 'Arkhart', 'Ascent of Justice', 'Balazar III', 'Balder3D', 'Barbie Seahorse Adventures', 'Barrage', 'Gnome Batalla Naval', 'User:AVRS/sandbox', 'Blocks',
lgw_name_aliases = {'Eat the Whistle': 'Eat The Whistle', 'Scorched 3D': 'Scorched3D', 'Blob Wars Episode 1 : Metal Blob Solid': 'Blobwars: Metal Blob Solid', 'Adventure': 'Colossal Cave Adventure',
'Liquid War 6': 'Liquid War', 'Gusanos': 'GUSANOS', 'Corewars': 'Core War', 'FLARE': 'Flare', 'Vitetris': 'vitetris', 'Powder Toy': 'The Powder Toy', 'Asylum': 'SDL Asylum',
'Atanks': 'Atomic Tanks', 'HeXon': 'heXon', 'Unnethack': 'UnNetHack', 'Nova Pinball': 'NOVA PINBALL', 'Jump n Bump': "Jump'n'Bump", 'Blades of Exile': 'Classic Blades of Exile',
'Colobot': 'Colobot: Gold Edition', 'Dead Justice': 'Cat Mother Dead Justice', 'FreeDink': 'GNU FreeDink', 'FRaBs': 'fRaBs', 'Harmonist': 'Harmonist: Dayoriah Clan Infiltration', 'Iris2 3D Client - for Ultima Online': 'Iris2',
'Java Classic Role Playing Game': 'jClassicRPG', 'Osgg': 'OldSkool Gravity Game', 'PyRacerz': 'pyRacerz', 'Starfighter': 'Project: Starfighter',
'TORCS': 'TORCS, The Open Racing Car Simulator', 'Vertigo (game)': 'Vertigo', 'XInvaders3D': 'XInvaders 3D', 'LambdaRogue': 'LambdaRogue: The Book of Stars', 'Maniadrive': 'ManiaDrive',
'Which Way Is Up': 'Which Way Is Up?'}
lgw_ignored_entries = ['Hetris', '8 Kingdoms', 'Antigravitaattori', 'Arena of Honour', 'Arkhart', 'Ascent of Justice', 'Balazar III', 'Balder3D', 'Barbie Seahorse Adventures', 'Barrage', 'Gnome Batalla Naval', 'Blocks',
'Brickshooter', 'Bweakfwu', 'Cheese Boys', 'Clippers', 'Codewars', 'CRAFT: The Vicious Vikings', 'DQM', 'EmMines', 'Eskimo-run', 'Feuerkraft', 'Fight or Perish', 'Flatland', 'Forest patrol', 'Free Reign', 'GalaxyMage',
'Gloss', 'GRUB Invaders', 'Howitzer Skirmish', 'Imperium: Sticks', 'Interstate Outlaws', 'GNOME Games', 'KDE Games', 'LegacyClone', 'Memonix', 'Ninjapix', 'Neverputt', 'Militia Defense', 'Sudoku86',
'Terminal Overload release history', 'Scions of Darkness', 'Sedtris', 'SilChess', 'SSTPong', 'Tesseract Trainer', 'TunnelWars']
'Terminal Overload release history', 'Scions of Darkness', 'Sedtris', 'SilChess', 'SSTPong', 'Tesseract Trainer', 'TunnelWars']
licenses_map = {'GPLv2': 'GPL-2.0', 'GPLv2+': 'GPL-2.0', 'GPLv3': 'GPL-3.0', 'GPLv3+': 'GPL-3.0'}
def list_compare(a, b, k):
def compare_sets(a, b, name, limit=None):
"""
:param a:
:param b:
:param name:
:return:
"""
x = [x for x in a if x not in b]
p = ''
for x in x:
p += ' {} {} missing\n'.format(k, x)
if not isinstance(a, set):
a = set(a)
if not isinstance(b, set):
b = set(b)
d = sorted(list(a - b))
if d and limit != 'notus':
p += ' {} : us : {}\n'.format(name, ', '.join(d))
d = sorted(list(b - a))
if d and limit != 'notthem':
p += ' {} : them : {}\n'.format(name, ', '.join(d))
return p
@@ -58,18 +71,28 @@ if __name__ == "__main__":
maximal_newly_created_entries = 40
# paths
import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
lgw_entries_file = os.path.join(import_path, '_lgw.cleaned.json')
lgw_import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
lgw_entries_file = os.path.join(lgw_import_path, '_lgw.cleaned.json')
# import lgw import
text = utils.read_text(lgw_entries_file)
lgw_entries = json.loads(text)
# perform name replacements
lgw_entries = [x for x in lgw_entries if x['name'] not in ignored_names]
# eliminate the ignored entries
_ = [x['name'] for x in lgw_entries if x['name'] in lgw_ignored_entries] # those that will be ignored
_ = set(lgw_ignored_entries) - set(_) # those that shall be ignored minus those that will be ignored
if _:
print('Can un-ignore {}'.format(_))
lgw_entries = [x for x in lgw_entries if x['name'] not in lgw_ignored_entries]
# perform name and code language replacements
_ = [x['name'] for x in lgw_entries if x['name'] in lgw_name_aliases.keys()] # those that will be renamed
_ = set(lgw_name_aliases.keys()) - set(_) # those that shall be renamed minus those that will be renamed
if _:
print('Can un-rename {}'.format(_))
for index, lgw_entry in enumerate(lgw_entries):
if lgw_entry['name'] in name_replacements:
lgw_entry['name'] = name_replacements[lgw_entry['name']]
if lgw_entry['name'] in lgw_name_aliases:
lgw_entry['name'] = lgw_name_aliases[lgw_entry['name']]
if 'code language' in lgw_entry:
languages = lgw_entry['code language']
h = []
@@ -114,11 +137,11 @@ if __name__ == "__main__":
print('{} in both, {} only in LGW, {} only with us'.format(len(common_names), len(lgw_names), len(our_names)))
# find similar names among the rest
#print('similar names')
#for lgw_name in lgw_names:
# for our_name in our_names:
# if osg.game_name_similarity(lgw_name, our_name) > similarity_threshold:
# print('{} - {}'.format(lgw_name, our_name))
print('similar names')
for lgw_name in lgw_names:
for our_name in our_names:
if osg.game_name_similarity(lgw_name, our_name) > similarity_threshold:
print('{} - {}'.format(lgw_name, our_name))
newly_created_entries = 0
# iterate over their entries
@@ -140,11 +163,29 @@ if __name__ == "__main__":
# platform
key = 'platform'
p += list_compare(lgw_entry.get(key, []), our_entry.get(key, []), key)
p += compare_sets(lgw_entry.get(key, []), our_entry.get(key, []), key)
# categories/keywords
#p += compare_sets(lgw_entry.get('categories', []), our_entry.get('keywords', []), 'categories/keywords')
# code language
key = 'code language'
p += list_compare(lgw_entry.get(key, []), our_entry.get(key, []), key)
p += compare_sets(lgw_entry.get(key, []), our_entry.get(key, []), key)
# code license (GPLv2)
key = 'code license'
p += compare_sets(lgw_entry.get(key, []), our_entry.get(key, []), key)
# engine, library
p += compare_sets(lgw_entry.get('engine', []), our_entry.get('code dependencies', []), 'code dependencies', 'notthem')
p += compare_sets(lgw_entry.get('library', []), our_entry.get('code dependencies', []), 'code dependencies', 'notthem')
p += compare_sets(lgw_entry.get('engine', [])+lgw_entry.get('library', []), our_entry.get('code dependencies', []), 'engine/library', 'notus')
# assets license
key = 'assets license'
p += compare_sets(lgw_entry.get(key, []), our_entry.get(key, []), key)
# TODO developer (need to introduce a field with us first)
if p:
print('{}\n{}'.format(name, p))

View File

@@ -104,11 +104,11 @@ if __name__ == "__main__":
# import the osgameclones data
osgc_path = os.path.realpath(os.path.join(root_path, os.path.pardir, '11_osgameclones.git', 'games'))
files = os.listdir(osgc_path)
osgc_files = os.listdir(osgc_path)
# iterate over all yaml files in osgameclones/data folder and load contents
osgc_entries = []
for file in files:
for file in osgc_files:
# read yaml
with open(os.path.join(osgc_path, file), 'r', encoding='utf-8') as stream:
try:

View File

@@ -68,7 +68,7 @@ def parse_entry(content):
regex = re.compile(r"^# (.*)") # start of content, starting with "# " and then everything until the end of line
matches = regex.findall(content)
if len(matches) != 1 or not matches[0]:
raise RuntimeError('Name not found in entry "{}"'.format(content))
raise RuntimeError('Name not found in entry "{}" : {}'.format(content, matches))
info['name'] = matches[0]
# read description
@@ -160,8 +160,8 @@ def parse_entry(content):
for field in ['home', 'download', 'play', 'code repository']:
if field in info:
for url in info[field]:
if not any([url.startswith(x) for x in ['http://', 'https://', 'git://', 'svn://', 'ftp://']]):
raise RuntimeError('URL "{}" in entry "{}" does not start with http/https/git/svn/ftp'.format(url, info['name']))
if not any([url.startswith(x) for x in ['http://', 'https://', 'git://', 'svn://', 'ftp://', 'bzr://']]):
raise RuntimeError('URL "{}" in entry "{}" does not start with http/https/git/svn/ftp/bzr'.format(url, info['name']))
if ' ' in url:
raise RuntimeError('URL "{}" in entry "{}" contains a space'.format(url, info['name']))