imports from osgameclones
This commit is contained in:
@ -21,6 +21,8 @@
|
||||
"https://git.code.sf.net/p/chromium-bsu/code",
|
||||
"https://git.code.sf.net/p/dangerdeep/git",
|
||||
"https://git.code.sf.net/p/dnt/code",
|
||||
"https://git.code.sf.net/p/doomlegacy/legacy2",
|
||||
"https://git.code.sf.net/p/doomlegacy/masterserver",
|
||||
"https://git.code.sf.net/p/dunedynasty/dunedynasty",
|
||||
"https://git.code.sf.net/p/dunelegacy/code",
|
||||
"https://git.code.sf.net/p/epicheroes/code",
|
||||
@ -103,6 +105,8 @@
|
||||
"https://github.com/Illarion-eV/Illarion-Content.git",
|
||||
"https://github.com/Illarion-eV/Illarion-Java.git",
|
||||
"https://github.com/Illarion-eV/Illarion-Server.git",
|
||||
"https://github.com/Interkarma/daggerfall-unity.git",
|
||||
"https://github.com/Interrupt/delverengine.git",
|
||||
"https://github.com/Kromster80/kam_remake.git",
|
||||
"https://github.com/LWJGL/lwjgl3.git",
|
||||
"https://github.com/Leejjon/Battleround.git",
|
||||
@ -190,6 +194,7 @@
|
||||
"https://github.com/angband/angband.git",
|
||||
"https://github.com/antionio/game-off-2013.git",
|
||||
"https://github.com/anttisalonen/kingdoms.git",
|
||||
"https://github.com/aperture-software/colditz-escape.git",
|
||||
"https://github.com/arescentral/antares.git",
|
||||
"https://github.com/arx/ArxLibertatis.git",
|
||||
"https://github.com/atrinik/atrinik.git",
|
||||
@ -217,9 +222,13 @@
|
||||
"https://github.com/craftworkgames/infiniminer.git",
|
||||
"https://github.com/crawl/crawl.git",
|
||||
"https://github.com/cthielen/Epiar.git",
|
||||
"https://github.com/cubosphere/cubosphere-code.git",
|
||||
"https://github.com/cxong/cdogs-sdl.git",
|
||||
"https://github.com/darklegion/tremulous.git",
|
||||
"https://github.com/davidjoffe/dave_gnukem.git",
|
||||
"https://github.com/delight-im/OpenSoccer.git",
|
||||
"https://github.com/dgengin/DGEngine.git",
|
||||
"https://github.com/dhewm/dhewm3.git",
|
||||
"https://github.com/djyt/cannonball.git",
|
||||
"https://github.com/dmecke/OpenSoccerStar.git",
|
||||
"https://github.com/doxygen/doxygen.git",
|
||||
@ -229,6 +238,8 @@
|
||||
"https://github.com/dungeons-of-moria/umoria.git",
|
||||
"https://github.com/ec429/harris.git",
|
||||
"https://github.com/egoboo/egoboo.git",
|
||||
"https://github.com/ellisonleao/clumsy-bird.git",
|
||||
"https://github.com/emezeske/digbuild.git",
|
||||
"https://github.com/endless-sky/endless-sky.git",
|
||||
"https://github.com/enigma-dev/enigma-dev.git",
|
||||
"https://github.com/exult/exult.git",
|
||||
@ -237,6 +248,7 @@
|
||||
"https://github.com/fariazz/World-of-Heroes.git",
|
||||
"https://github.com/farmboy0/slashem.git",
|
||||
"https://github.com/fastrgv/AdaVenture.git",
|
||||
"https://github.com/fogleman/Craft.git",
|
||||
"https://github.com/freeciv/freeciv-web.git",
|
||||
"https://github.com/freeciv/freeciv.git",
|
||||
"https://github.com/freedoom/freedoom.git",
|
||||
@ -245,6 +257,7 @@
|
||||
"https://github.com/freeors/War-Of-Kingdom.git",
|
||||
"https://github.com/freeserf/freeserf.git",
|
||||
"https://github.com/gabrielecirulli/2048.git",
|
||||
"https://github.com/galaxyhaxz/devilution.git",
|
||||
"https://github.com/gemrb/gemrb.git",
|
||||
"https://github.com/glennrp/libpng.git",
|
||||
"https://github.com/goblinhack/goblinhack.git",
|
||||
@ -258,7 +271,9 @@
|
||||
"https://github.com/guillaume-gouchon/dungeonquest.git",
|
||||
"https://github.com/guillaume-gouchon/smash.js.git",
|
||||
"https://github.com/hackcraft-de/linwarrior.git",
|
||||
"https://github.com/haleymt/CrystalQuest.git",
|
||||
"https://github.com/harfbuzz/harfbuzz.git",
|
||||
"https://github.com/haroldo-ok/datastorm.git",
|
||||
"https://github.com/henkboom/pax-britannica.git",
|
||||
"https://github.com/hhirsch/ardentryst.git",
|
||||
"https://github.com/hinogi/eternalwinterwars.git",
|
||||
@ -341,12 +356,15 @@
|
||||
"https://github.com/red-eclipse/base.git",
|
||||
"https://github.com/richardjs/Maelstrom.git",
|
||||
"https://github.com/riksweeney/edgar.git",
|
||||
"https://github.com/rohit-n/Clonepoint.git",
|
||||
"https://github.com/sabetts/bratwurst.git",
|
||||
"https://github.com/sago007/annchienta.git",
|
||||
"https://github.com/samcv/brainworkshop.git",
|
||||
"https://github.com/scottschiller/ArmorAlley.git",
|
||||
"https://github.com/scummvm/scummvm.git",
|
||||
"https://github.com/shinyquagsire23/DesktopAdventures.git",
|
||||
"https://github.com/silverweed/lifish.git",
|
||||
"https://github.com/simeonpilgrim/coab.git",
|
||||
"https://github.com/singularity/singularity.git",
|
||||
"https://github.com/snauts/game-lv.git",
|
||||
"https://github.com/spring/spring.git",
|
||||
@ -357,6 +375,7 @@
|
||||
"https://github.com/superpowers/superpowers-core.git",
|
||||
"https://github.com/supertuxkart/stk-code.git",
|
||||
"https://github.com/suprafun/aiwars.git",
|
||||
"https://github.com/svkaiser/Doom64EX.git",
|
||||
"https://github.com/swig/swig.git",
|
||||
"https://github.com/tales/sourceoftales.git",
|
||||
"https://github.com/tales/tales-client.git",
|
||||
@ -375,6 +394,7 @@
|
||||
"https://github.com/unnethack/unnethack.git",
|
||||
"https://github.com/urho3d/Urho3D.git",
|
||||
"https://github.com/valeriansaliou/boulder-dash.git",
|
||||
"https://github.com/varunpant/CrappyBird.git",
|
||||
"https://github.com/vcmi/vcmi.git",
|
||||
"https://github.com/vcosta/derclou.git",
|
||||
"https://github.com/vegastrike/Vega-Strike-Engine-Source.git",
|
||||
@ -394,6 +414,7 @@
|
||||
"https://github.com/zaki/irrlicht.git",
|
||||
"https://github.com/zenorogue/hyperrogue.git",
|
||||
"https://github.com/zombieman1041/BlakedAwesomenaughts.git",
|
||||
"https://gitlab.com/Dringgstein/Commander-Genius.git",
|
||||
"https://gitlab.com/KilgoreTroutMaskReplicant/1oom.git",
|
||||
"https://gitlab.com/drummyfish/Bombman.git",
|
||||
"https://gitlab.com/evol/evol-all.git",
|
||||
|
@ -2,16 +2,203 @@
|
||||
Imports game details from libregamewiki by scraping the website, starting from https://libregamewiki.org/Category:Games
|
||||
|
||||
Also parse rejected games (https://libregamewiki.org/Libregamewiki:Rejected_games_list) and maybe https://libregamewiki.org/Libregamewiki:Suggested_games
|
||||
|
||||
Unique left column names in the game info boxes:
|
||||
['Code license', 'Code licenses', 'Developer', 'Developers', 'Engine', 'Engines', 'Genre', 'Genres', 'Libraries', 'Library', 'Media license', 'Media licenses', 'P. language', 'P. languages', 'Platforms']
|
||||
"""
|
||||
|
||||
import requests
|
||||
import re
|
||||
import json
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
from utils.utils import *
|
||||
|
||||
|
||||
def key_selection_gameinfobox(a, b):
|
||||
"""
|
||||
Checks which of the two elements in a is in b or none but not both
|
||||
"""
|
||||
if len(a) != 2:
|
||||
raise RuntimeError()
|
||||
c = [x in b for x in a]
|
||||
if all(c):
|
||||
raise RuntimeError
|
||||
if not any(c):
|
||||
return None, None
|
||||
d = [(k, i) for (i, k) in enumerate(a) if c[i]]
|
||||
return d[0]
|
||||
|
||||
|
||||
def extract_field_content(key, idx, info):
|
||||
"""
|
||||
From a game info field.
|
||||
"""
|
||||
content = info[key].get_text()
|
||||
content = content.split(',')
|
||||
content = [x.strip() for x in content]
|
||||
content = [x if not (x.endswith('[1]') or x.endswith('[2]')) else x[:-3] for x in content] # remove trailing [1,2]
|
||||
content = [x.strip() for x in content]
|
||||
if not content:
|
||||
raise RuntimeError
|
||||
if (len(content) > 1 and idx == 0) or (len(content) == 1 and idx == 1):
|
||||
print(' warning: {} Sg./Pl. mismatch'.format(key))
|
||||
return content
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
regex_games = re.compile(r"<li><a href=\"\/(.+?)\".*?>(.+?)<\/a><\/li>") # url part, name
|
||||
# parameters
|
||||
base_url = 'https://libregamewiki.org'
|
||||
ignored_gameinfos = ['Contribute', 'Origin', 'Release date', 'Latest release']
|
||||
|
||||
# read and process the base url (get all games and categories)
|
||||
url = base_url + '/Category:Games'
|
||||
games = []
|
||||
while True:
|
||||
text = requests.get(url).text
|
||||
soup = BeautifulSoup(text, 'html.parser')
|
||||
#categories = soup.find('div', id='mw-subcategories').find_all('li')
|
||||
#categories = [(x.a['href'], x.a.string) for x in categories]
|
||||
|
||||
# game pages
|
||||
pages = soup.find('div', id='mw-pages').find_all('li')
|
||||
games.extend(((x.a['href'], x.a.string) for x in pages))
|
||||
|
||||
# next page
|
||||
next_page = soup.find('a', string='next page')
|
||||
if not next_page:
|
||||
break
|
||||
url = base_url + next_page['href']
|
||||
|
||||
print('current number of games in LGW {}'.format(len(games)))
|
||||
|
||||
# parse games
|
||||
counter = 0
|
||||
unique_gameinfo_fields = set()
|
||||
entries = []
|
||||
for game in games:
|
||||
url = base_url + game[0]
|
||||
text = requests.get(url).text
|
||||
soup = BeautifulSoup(text, 'html.parser')
|
||||
title = soup.h1.string
|
||||
print(title)
|
||||
entry = {'name': title}
|
||||
|
||||
# parse gameinfobox
|
||||
info = soup.find('div', class_='gameinfobox')
|
||||
if not info:
|
||||
print(' no gameinfobox')
|
||||
else:
|
||||
info = info.find_all('tr')
|
||||
info = [(x.th.string, x.td) for x in info if x.th and x.th.string]
|
||||
info = [x for x in info if x[0] not in ignored_gameinfos]
|
||||
info = dict(info)
|
||||
unique_gameinfo_fields.update(info.keys())
|
||||
|
||||
# consume fields of gameinfobox
|
||||
# genre
|
||||
key, idx = key_selection_gameinfobox(('Genre', 'Genres'), info.keys())
|
||||
if key:
|
||||
genres = extract_field_content(key, idx, info)
|
||||
entry['genre']
|
||||
del info[key]
|
||||
|
||||
# platforms
|
||||
key = 'Platforms'
|
||||
if key in info:
|
||||
platforms = extract_field_content(key, 1, info)
|
||||
# platforms = [x if x != 'Mac' else 'macOS' for x in platforms] # replace Mac with macOS
|
||||
entry['platform'] = platforms
|
||||
del info[key]
|
||||
|
||||
# developer
|
||||
key, idx = key_selection_gameinfobox(('Developer', 'Developers'), info.keys())
|
||||
if key:
|
||||
entry['developer'] = extract_field_content(key, idx, info)
|
||||
del info[key]
|
||||
|
||||
# code license
|
||||
key, idx = key_selection_gameinfobox(('Code license', 'Code licenses'), info.keys())
|
||||
if key:
|
||||
entry['code license'] = extract_field_content(key, idx, info)
|
||||
del info[key]
|
||||
|
||||
# media license
|
||||
key, idx = key_selection_gameinfobox(('Media license', 'Media licenses'), info.keys())
|
||||
if key:
|
||||
entry['assets license'] = extract_field_content(key, idx, info)
|
||||
del info[key]
|
||||
|
||||
# engine
|
||||
key, idx = key_selection_gameinfobox(('Engine', 'Engines'), info.keys())
|
||||
if key:
|
||||
entry['engine'] = extract_field_content(key, idx, info)
|
||||
del info[key]
|
||||
|
||||
# library
|
||||
key, idx = key_selection_gameinfobox(('Library', 'Libraries'), info.keys())
|
||||
if key:
|
||||
entry['library'] = extract_field_content(key, idx, info)
|
||||
del info[key]
|
||||
|
||||
# programming language
|
||||
key, idx = key_selection_gameinfobox(('P. language', 'P. languages'), info.keys())
|
||||
if key:
|
||||
languages = extract_field_content(key, idx, info)
|
||||
languages = [x for x in languages if x != 'HTML5'] # ignore HTML5
|
||||
entry['code language'] = languages
|
||||
del info[key]
|
||||
|
||||
# unconsumed
|
||||
if info:
|
||||
print('unconsumed gameinfo keys {}'.format(info.keys()))
|
||||
raise RuntimeError()
|
||||
|
||||
# parse "for available as package in"
|
||||
tables = soup.find_all('table', class_='wikitable')
|
||||
tables = [table for table in tables if table.caption and table.caption.string.startswith('Available as package')]
|
||||
if len(tables) > 0:
|
||||
if len(tables) > 1:
|
||||
raise RuntimeError()
|
||||
table = tables[0]
|
||||
packages = table.find_all('tr')
|
||||
packages = [x.td.a['href'] for x in packages]
|
||||
entry['linux-packages'] = packages
|
||||
|
||||
# categories
|
||||
categories = soup.find_all('div', id='mw-normal-catlinks')
|
||||
if not categories:
|
||||
print(' no categories')
|
||||
categories = []
|
||||
else:
|
||||
if len(categories) > 1:
|
||||
raise RuntimeError()
|
||||
categories = categories[0]
|
||||
categories = categories.find_all('li')
|
||||
categories = [x.a.string for x in categories]
|
||||
if 'Games' not in categories:
|
||||
print(' "Games" not in categories')
|
||||
else:
|
||||
categories.remove('Games') # should be there
|
||||
# strip games at the end
|
||||
phrase = ' games'
|
||||
categories = [x[:-len(phrase)] if x.endswith(phrase) else x for x in categories]
|
||||
ignored_categories = ['Articles lacking reference', 'Stubs']
|
||||
categories = [x for x in categories if x not in ignored_categories]
|
||||
entry['categories'] = categories
|
||||
|
||||
entries.append(entry)
|
||||
# print(entry)
|
||||
|
||||
counter += 1
|
||||
if counter > 20:
|
||||
# break
|
||||
pass
|
||||
|
||||
unique_gameinfo_fields = sorted(list(unique_gameinfo_fields))
|
||||
print('unique gameinfo fields: {}'.format(unique_gameinfo_fields))
|
||||
|
||||
# save entries
|
||||
json_path = os.path.join(os.path.dirname(__file__), 'lgw_import.json')
|
||||
text = json.dumps(entries, indent=1)
|
||||
write_text(json_path, text)
|
||||
|
||||
# read base url
|
||||
base_url = 'https://libregamewiki.org/Category:Games'
|
||||
text = requests.get(base_url).text
|
||||
print(text)
|
||||
|
@ -2,4 +2,46 @@
|
||||
Once data from libregamewiki is imported, synchronize with our database, i.e. identify the entries both have in common,
|
||||
estimate the differences in the entries both have in common, suggest to add the entries they have not in common to each
|
||||
other.
|
||||
"""
|
||||
|
||||
unique imported fields: 'assets license', 'categories', 'code language', 'code license', 'developer', 'engine', 'genre', 'library', 'linux-packages', 'name', 'platform'
|
||||
"""
|
||||
|
||||
import json
|
||||
from utils.utils import *
|
||||
|
||||
|
||||
def get_unique_field_content(field, entries):
|
||||
"""
|
||||
|
||||
"""
|
||||
unique_content = set()
|
||||
for entry in entries:
|
||||
if field in entry:
|
||||
unique_content.update(entry[field])
|
||||
return sorted(list(unique_content))
|
||||
|
||||
platform_replacements = {'Mac': 'macOS'}
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# import lgw import
|
||||
json_path = os.path.join(os.path.dirname(__file__), 'lgw_import.json')
|
||||
text = read_text(json_path)
|
||||
lgw_entries = json.loads(text)
|
||||
|
||||
# check for unique field names
|
||||
unique_fields = set()
|
||||
for lgw_entry in lgw_entries:
|
||||
unique_fields.update(lgw_entry.keys())
|
||||
unique_fields = sorted(list(unique_fields))
|
||||
print('unique lgw fields: {}'.format(unique_fields))
|
||||
|
||||
# unique contents
|
||||
print('{}: {}'.format('platform', get_unique_field_content('platform', lgw_entries)))
|
||||
print('{}: {}'.format('code language', get_unique_field_content('code language', lgw_entries)))
|
||||
print('{}: {}'.format('categories', get_unique_field_content('categories', lgw_entries)))
|
||||
print('{}: {}'.format('genre', get_unique_field_content('genre', lgw_entries)))
|
||||
print('{}: {}'.format('library', get_unique_field_content('library', lgw_entries)))
|
||||
print('{}: {}'.format('code license', get_unique_field_content('code license', lgw_entries)))
|
||||
print('{}: {}'.format('assets license', get_unique_field_content('assets license', lgw_entries)))
|
||||
print('{}: {}'.format('engine', get_unique_field_content('engine', lgw_entries)))
|
@ -45,7 +45,8 @@ osgc_name_aliases = {}
|
||||
osgc_licenses_map = {'GPL2': 'GPL-2.0', 'GPL3': 'GPL-3.0', 'AGPL3': 'AGPL-3.0', 'LGPL3': 'LGPL-3.0', 'LGPL2': 'LGPL-2.1', 'MPL': 'MPL-2.0', 'Apache': 'Apache-2.0', 'Artistic': 'Artistic License'}
|
||||
|
||||
# ignore osgc entries (for various reasons like unclear license etc.)
|
||||
osgc_ignored_entries = ["A Mouse's Vengeance", 'achtungkurve.com', 'AdaDoom3', 'Agendaroids', 'Alien 8', 'Ard-Reil', 'Balloon Fight', 'bladerunner (Engine within SCUMMVM)', 'Block Shooter', 'Bomb Mania Reloaded', 'boulder-dash', 'Cannon Fodder']
|
||||
osgc_ignored_entries = ["A Mouse's Vengeance", 'achtungkurve.com', 'AdaDoom3', 'Agendaroids', 'Alien 8', 'Ard-Reil', 'Balloon Fight', 'bladerunner (Engine within SCUMMVM)', 'Block Shooter', 'Bomb Mania Reloaded', 'boulder-dash', 'Cannon Fodder', 'Contra_remake', 'CosmicArk-Advanced', 'Deuteros X', 'datastorm'
|
||||
,'div-columns', 'div-pacman2600', 'div-pitfall', 'div-spaceinvaders2600']
|
||||
|
||||
def similarity(a, b):
|
||||
return SequenceMatcher(None, str.casefold(a), str.casefold(b)).ratio()
|
||||
@ -325,12 +326,12 @@ if __name__ == "__main__":
|
||||
originals = osgc_entry['originals']
|
||||
if type(originals) == str:
|
||||
originals = [originals]
|
||||
keywords.append('inspired by {}'.format(' + '.join(original)))
|
||||
keywords.append('inspired by {}'.format(' + '.join(originals)))
|
||||
if 'multiplayer' in osgc_entry:
|
||||
multiplayer = osgc_entry['multiplayer']
|
||||
if type(multiplayer) == str:
|
||||
multiplayer = [multiplayer]
|
||||
keywords.extend('multiplayer {}'.format(' + '.join(multiplayer)))
|
||||
keywords.append('multiplayer {}'.format(' + '.join(multiplayer)))
|
||||
if 'content' in osgc_entry:
|
||||
content = osgc_entry['content']
|
||||
keywords.append('{} content'.format(content))
|
||||
|
Reference in New Issue
Block a user