a few additions from backlog and working on scripts
This commit is contained in:
@ -282,7 +282,9 @@
|
||||
"https://github.com/aburch/simutrans.git",
|
||||
"https://github.com/acmepjz/meandmyshadow.git",
|
||||
"https://github.com/adamenkov/jet-story.git",
|
||||
"https://github.com/afritz1/OpenTESArena.git",
|
||||
"https://github.com/ahmetkasif/KittenMaxit.git",
|
||||
"https://github.com/ajweeks/FlexEngine.git",
|
||||
"https://github.com/ajxs/jsFO.git",
|
||||
"https://github.com/akarnokd/open-ig.git",
|
||||
"https://github.com/albertz/openlierox.git",
|
||||
@ -290,6 +292,7 @@
|
||||
"https://github.com/alexdantas/www.git",
|
||||
"https://github.com/alexknvl/fonline.git",
|
||||
"https://github.com/alphaonex86/CatchChallenger.git",
|
||||
"https://github.com/amroibrahim/DIYDoom.git",
|
||||
"https://github.com/anael-seghezzi/Maratis-4.git",
|
||||
"https://github.com/andrettin/wyrmsun.git",
|
||||
"https://github.com/andrewfenn/Hardwar.git",
|
||||
|
@ -87,11 +87,8 @@ https://fedoraproject.org/wiki/SIGs/Games#List_of_games_we_will_NOT_package
|
||||
https://flathub.org/home (use it for Linux packaging) / https://flathub.org/apps/category/Game
|
||||
https://freegamer.blogspot.com/2015/11/top-3-open-source-pinball-games.html
|
||||
https://futurepinball.com/
|
||||
https://github.com/afritz1/OpenTESArena
|
||||
https://github.com/ajweeks/FlexEngine
|
||||
https://github.com/amerkoleci/alimer
|
||||
https://github.com/amerkoleci/vortice
|
||||
https://github.com/amroibrahim/DIYDoom
|
||||
https://github.com/Anthonymcqueen21/Pygame---Alien-Invasion
|
||||
https://github.com/ao-libre/ao-cliente
|
||||
https://github.com/apsillers/lords-of-the-fey
|
||||
@ -123,6 +120,7 @@ https://github.com/cubei/FlappyCow
|
||||
https://github.com/cyberegoorg/cetech
|
||||
https://github.com/DaemonEngine/Daemon
|
||||
https://github.com/Dariasteam/TowerJumper
|
||||
https://github.com/DeflatedPickle/FAOSDance
|
||||
https://github.com/demonixis/C3DE
|
||||
https://github.com/digitall/scummvm-deskadv
|
||||
https://github.com/DigitalPulseSoftware/NazaraEngine
|
||||
@ -141,6 +139,7 @@ https://github.com/freeboardgame/FreeBoardGame.org
|
||||
https://github.com/FreeCol/freecol
|
||||
https://github.com/gamearians
|
||||
https://github.com/GentenStudios/quartz-engine
|
||||
https://github.com/grantjenks/free-python-games (check all)
|
||||
https://github.com/GunshipPenguin/open_flood
|
||||
https://github.com/hedgewars/hw
|
||||
https://github.com/hparcells/cards-against-humanity
|
||||
@ -172,6 +171,7 @@ https://github.com/pelya/commandergenius
|
||||
https://github.com/pld-linux
|
||||
https://github.com/ptitSeb/gl4es
|
||||
https://github.com/raysan5/rfxgen
|
||||
https://github.com/Realm667/WolfenDoom
|
||||
https://github.com/romlok/godot-gdhexgrid
|
||||
https://github.com/RonenNess/GeonBit.UI
|
||||
https://github.com/RPG-Paper-Maker/RPG-Paper-Maker
|
||||
@ -220,7 +220,6 @@ https://pyweek.org/4/entries/ (Ascent of Justice)
|
||||
https://revolutionarygamesstudio.com/ Trhive
|
||||
https://salsa.debian.org/games-team/etw
|
||||
https://scratch.mit.edu/ (https://en.scratch-wiki.info/wiki/Scratch_Source_Code)
|
||||
https://secretchronicles.org/en/
|
||||
https://sourceforge.net/projects/actiongame/
|
||||
https://sourceforge.net/projects/deng/
|
||||
https://sourceforge.net/projects/ettu/
|
||||
|
@ -7,48 +7,23 @@ Unique left column names in the game info boxes:
|
||||
['Code license', 'Code licenses', 'Developer', 'Developers', 'Engine', 'Engines', 'Genre', 'Genres', 'Libraries', 'Library', 'Media license', 'Media licenses', 'P. language', 'P. languages', 'Platforms']
|
||||
"""
|
||||
|
||||
import os
|
||||
import requests
|
||||
import json
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
from utils.utils import *
|
||||
from bs4 import BeautifulSoup
|
||||
from utils import constants, utils, osg
|
||||
|
||||
|
||||
def key_selection_gameinfobox(a, b):
|
||||
def download_lgw_content():
|
||||
"""
|
||||
Checks which of the two elements in a is in b or none but not both
|
||||
|
||||
:return:
|
||||
"""
|
||||
if len(a) != 2:
|
||||
raise RuntimeError()
|
||||
c = [x in b for x in a]
|
||||
if all(c):
|
||||
raise RuntimeError
|
||||
if not any(c):
|
||||
return None, None
|
||||
d = [(k, i) for (i, k) in enumerate(a) if c[i]]
|
||||
return d[0]
|
||||
|
||||
|
||||
def extract_field_content(key, idx, info):
|
||||
"""
|
||||
From a game info field.
|
||||
"""
|
||||
content = info[key].get_text()
|
||||
content = content.split(',')
|
||||
content = [x.strip() for x in content]
|
||||
content = [x if not (x.endswith('[1]') or x.endswith('[2]')) else x[:-3] for x in content] # remove trailing [1,2]
|
||||
content = [x.strip() for x in content]
|
||||
if not content:
|
||||
raise RuntimeError
|
||||
if (len(content) > 1 and idx == 0) or (len(content) == 1 and idx == 1):
|
||||
print(' warning: {} Sg./Pl. mismatch'.format(key))
|
||||
return content
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# parameters
|
||||
base_url = 'https://libregamewiki.org'
|
||||
ignored_gameinfos = ['Contribute', 'Origin', 'Release date', 'Latest release']
|
||||
destination_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
|
||||
utils.recreate_directory(destination_path)
|
||||
|
||||
# read and process the base url (get all games and categories)
|
||||
url = base_url + '/Category:Games'
|
||||
@ -69,89 +44,70 @@ if __name__ == "__main__":
|
||||
break
|
||||
url = base_url + next_page['href']
|
||||
|
||||
# remove all those that start with user
|
||||
games = [game for game in games if not any(game[1].startswith(x) for x in ('User:', 'Template:', 'Bullet'))]
|
||||
|
||||
print('current number of games in LGW {}'.format(len(games)))
|
||||
|
||||
# parse games
|
||||
counter = 0
|
||||
unique_gameinfo_fields = set()
|
||||
entries = []
|
||||
for game in games:
|
||||
print(game[1])
|
||||
url = base_url + game[0]
|
||||
destination_file = os.path.join(destination_path, osg.canonical_game_name(game[0][1:]) + '.html')
|
||||
|
||||
text = requests.get(url).text
|
||||
utils.write_text(destination_file, text)
|
||||
|
||||
|
||||
def parse_lgw_content():
|
||||
|
||||
# paths
|
||||
import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
|
||||
entries_file = os.path.join(import_path, '_lgw.json')
|
||||
|
||||
# iterate over all imported files
|
||||
files = os.listdir(import_path)
|
||||
entries = []
|
||||
for file in files:
|
||||
if file == '_lgw.json':
|
||||
continue
|
||||
|
||||
text = utils.read_text(os.path.join(import_path, file))
|
||||
|
||||
# parse the html
|
||||
soup = BeautifulSoup(text, 'html.parser')
|
||||
title = soup.h1.string
|
||||
title = soup.h1.get_text()
|
||||
print(title)
|
||||
entry = {'name': title}
|
||||
|
||||
# get all external links
|
||||
links = [(x['href'], x.get_text()) for x in soup.find_all('a', href=True)]
|
||||
links = [x for x in links if x[0].startswith('http') and not x[0].startswith('https://libregamewiki.org/')]
|
||||
entry['external links'] = links
|
||||
|
||||
# get meta description
|
||||
description = soup.find('meta', attrs={"name":"description"})
|
||||
entry['description'] = description['content']
|
||||
|
||||
# parse gameinfobox
|
||||
info = soup.find('div', class_='gameinfobox')
|
||||
if not info:
|
||||
infos = soup.find('div', class_='gameinfobox')
|
||||
if not infos:
|
||||
print(' no gameinfobox')
|
||||
else:
|
||||
info = info.find_all('tr')
|
||||
info = [(x.th.string, x.td) for x in info if x.th and x.th.string]
|
||||
info = [x for x in info if x[0] not in ignored_gameinfos]
|
||||
info = dict(info)
|
||||
unique_gameinfo_fields.update(info.keys())
|
||||
|
||||
# consume fields of gameinfobox
|
||||
# genre
|
||||
key, idx = key_selection_gameinfobox(('Genre', 'Genres'), info.keys())
|
||||
if key:
|
||||
genres = extract_field_content(key, idx, info)
|
||||
entry['genre']
|
||||
del info[key]
|
||||
|
||||
# platforms
|
||||
key = 'Platforms'
|
||||
if key in info:
|
||||
platforms = extract_field_content(key, 1, info)
|
||||
# platforms = [x if x != 'Mac' else 'macOS' for x in platforms] # replace Mac with macOS
|
||||
entry['platform'] = platforms
|
||||
del info[key]
|
||||
|
||||
# developer
|
||||
key, idx = key_selection_gameinfobox(('Developer', 'Developers'), info.keys())
|
||||
if key:
|
||||
entry['developer'] = extract_field_content(key, idx, info)
|
||||
del info[key]
|
||||
|
||||
# code license
|
||||
key, idx = key_selection_gameinfobox(('Code license', 'Code licenses'), info.keys())
|
||||
if key:
|
||||
entry['code license'] = extract_field_content(key, idx, info)
|
||||
del info[key]
|
||||
|
||||
# media license
|
||||
key, idx = key_selection_gameinfobox(('Media license', 'Media licenses'), info.keys())
|
||||
if key:
|
||||
entry['assets license'] = extract_field_content(key, idx, info)
|
||||
del info[key]
|
||||
|
||||
# engine
|
||||
key, idx = key_selection_gameinfobox(('Engine', 'Engines'), info.keys())
|
||||
if key:
|
||||
entry['engine'] = extract_field_content(key, idx, info)
|
||||
del info[key]
|
||||
|
||||
# library
|
||||
key, idx = key_selection_gameinfobox(('Library', 'Libraries'), info.keys())
|
||||
if key:
|
||||
entry['library'] = extract_field_content(key, idx, info)
|
||||
del info[key]
|
||||
|
||||
# programming language
|
||||
key, idx = key_selection_gameinfobox(('P. language', 'P. languages'), info.keys())
|
||||
if key:
|
||||
languages = extract_field_content(key, idx, info)
|
||||
languages = [x for x in languages if x != 'HTML5'] # ignore HTML5
|
||||
entry['code language'] = languages
|
||||
del info[key]
|
||||
|
||||
# unconsumed
|
||||
if info:
|
||||
print('unconsumed gameinfo keys {}'.format(info.keys()))
|
||||
raise RuntimeError()
|
||||
infos = infos.find_all('tr')
|
||||
for x in infos:
|
||||
if x.th and x.td:
|
||||
# row with header
|
||||
key = x.th.get_text()
|
||||
content = x.td.get_text()
|
||||
content = content.split(',')
|
||||
content = [x.strip() for x in content]
|
||||
entry[key] = content
|
||||
if not x.th and x.td:
|
||||
# row without header: contribute section
|
||||
x = x.find_all('li')
|
||||
x = [(x.a.string, x.a['href']) for x in x if x.a]
|
||||
for key, content in x:
|
||||
entry[key] = content
|
||||
|
||||
# parse "for available as package in"
|
||||
tables = soup.find_all('table', class_='wikitable')
|
||||
@ -187,18 +143,56 @@ if __name__ == "__main__":
|
||||
entry['categories'] = categories
|
||||
|
||||
entries.append(entry)
|
||||
# print(entry)
|
||||
|
||||
counter += 1
|
||||
if counter > 20:
|
||||
# break
|
||||
pass
|
||||
|
||||
unique_gameinfo_fields = sorted(list(unique_gameinfo_fields))
|
||||
print('unique gameinfo fields: {}'.format(unique_gameinfo_fields))
|
||||
|
||||
# save entries
|
||||
json_path = os.path.join(os.path.dirname(__file__), 'lgw_import.json')
|
||||
text = json.dumps(entries, indent=1)
|
||||
write_text(json_path, text)
|
||||
utils.write_text(entries_file, text)
|
||||
|
||||
|
||||
def clean_lgw_content():
|
||||
|
||||
# paths
|
||||
import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
|
||||
entries_file = os.path.join(import_path, '_lgw.json')
|
||||
cleaned_entries_file = os.path.join(import_path, '_lgw.cleaned.json')
|
||||
|
||||
# load entries
|
||||
text = utils.read_text(entries_file)
|
||||
entries = json.loads(text)
|
||||
|
||||
# rename keys
|
||||
key_replacements = (('developer', ('Developer', 'Developers')), ('code license', ('Code license', 'Code licenses')), ('engine', ('Engine', 'Engines')), ('genre', ('Genre', 'Genres')))
|
||||
for index, entry in enumerate(entries):
|
||||
for new_key, old_keys in key_replacements:
|
||||
for key in old_keys:
|
||||
if key in entry:
|
||||
entry[new_key] = entry[key]
|
||||
del entry[key]
|
||||
break
|
||||
|
||||
entries[index] = entry
|
||||
|
||||
# check for unique field names
|
||||
unique_fields = set()
|
||||
for entry in entries:
|
||||
unique_fields.update(entry.keys())
|
||||
print('unique lgw fields: {}'.format(sorted(list(unique_fields))))
|
||||
|
||||
# which fields are mandatory
|
||||
for entry in entries:
|
||||
remove_fields = [field for field in unique_fields if field not in entry]
|
||||
unique_fields -= set(remove_fields)
|
||||
print('mandatory lgw fields: {}'.format(sorted(list(unique_fields))))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# stage one
|
||||
# download_lgw_content()
|
||||
|
||||
# stage two
|
||||
# parse_lgw_content()
|
||||
|
||||
# stage three
|
||||
clean_lgw_content()
|
@ -243,7 +243,7 @@ if __name__ == "__main__":
|
||||
|
||||
# determine file name
|
||||
print('create new entry for {}'.format(lgw_name))
|
||||
file_name = derive_canonical_file_name(lgw_name)
|
||||
file_name = canonical_game_name(lgw_name) + '.md'
|
||||
target_file = os.path.join(games_path, file_name)
|
||||
if os.path.isfile(target_file):
|
||||
print('warning: file {} already existing, save under slightly different name'.format(file_name))
|
||||
|
@ -13,7 +13,9 @@ import http.client
|
||||
import datetime
|
||||
import json
|
||||
import textwrap
|
||||
from utils.osg import *
|
||||
import os
|
||||
import re
|
||||
from utils import constants as c, utils, osg
|
||||
|
||||
|
||||
def update_readme_and_tocs(infos):
|
||||
@ -29,12 +31,12 @@ def update_readme_and_tocs(infos):
|
||||
print('update readme and toc files')
|
||||
|
||||
# delete content of toc path
|
||||
for file in os.listdir(tocs_path):
|
||||
os.remove(os.path.join(tocs_path, file))
|
||||
for file in os.listdir(c.tocs_path):
|
||||
os.remove(os.path.join(c.tocs_path, file))
|
||||
|
||||
# read readme
|
||||
readme_file = os.path.join(root_path, 'README.md')
|
||||
readme_text = read_text(readme_file)
|
||||
readme_file = os.path.join(c.root_path, 'README.md')
|
||||
readme_text = utils.read_text(readme_file)
|
||||
|
||||
# compile regex for identifying the building blocks in the readme
|
||||
regex = re.compile(r"(.*?)(\[comment\]: # \(start.*?end of autogenerated content\))(.*)", re.DOTALL)
|
||||
@ -55,7 +57,7 @@ def update_readme_and_tocs(infos):
|
||||
|
||||
# create by category
|
||||
categories_text = []
|
||||
for keyword in recommended_keywords:
|
||||
for keyword in osg.recommended_keywords:
|
||||
infos_filtered = [x for x in infos if keyword in x['keywords']]
|
||||
title = keyword.capitalize()
|
||||
name = keyword.replace(' ', '-')
|
||||
@ -67,7 +69,7 @@ def update_readme_and_tocs(infos):
|
||||
|
||||
# create by platform
|
||||
platforms_text = []
|
||||
for platform in valid_platforms:
|
||||
for platform in osg.valid_platforms:
|
||||
infos_filtered = [x for x in infos if platform in x.get('platform', [])]
|
||||
title = platform
|
||||
name = platform.lower()
|
||||
@ -80,7 +82,7 @@ def update_readme_and_tocs(infos):
|
||||
text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + tocs_text + "\n[comment]: # (end of autogenerated content)" + end
|
||||
|
||||
# write to readme
|
||||
write_text(readme_file, text)
|
||||
utils.write_text(readme_file, text)
|
||||
|
||||
|
||||
def create_toc(title, file, entries):
|
||||
@ -88,7 +90,7 @@ def create_toc(title, file, entries):
|
||||
|
||||
"""
|
||||
# file path
|
||||
toc_file = os.path.join(tocs_path, file)
|
||||
toc_file = os.path.join(c.tocs_path, file)
|
||||
|
||||
# header line
|
||||
text = '[comment]: # (autogenerated content, do not edit)\n# {}\n\n'.format(title)
|
||||
@ -105,7 +107,7 @@ def create_toc(title, file, entries):
|
||||
text += '\n'.join(rows)
|
||||
|
||||
# write to toc file
|
||||
write_text(toc_file, text)
|
||||
utils.write_text(toc_file, text)
|
||||
|
||||
|
||||
def check_validity_external_links():
|
||||
@ -127,7 +129,7 @@ def check_validity_external_links():
|
||||
ignored_urls = ('https://git.tukaani.org/xz.git')
|
||||
|
||||
# iterate over all entries
|
||||
for _, entry_path, content in entry_iterator(games_path):
|
||||
for _, entry_path, content in osg.entry_iterator():
|
||||
|
||||
# apply regex
|
||||
matches = regex.findall(content)
|
||||
@ -169,12 +171,12 @@ def check_template_leftovers():
|
||||
print('check for template leftovers')
|
||||
|
||||
# load template and get all lines
|
||||
text = read_text(os.path.join(root_path, 'template.md'))
|
||||
text = utils.read_text(os.path.join(c.root_path, 'template.md'))
|
||||
text = text.split('\n')
|
||||
check_strings = [x for x in text if x and not x.startswith('##')]
|
||||
|
||||
# iterate over all entries
|
||||
for _, entry_path, content in entry_iterator(games_path):
|
||||
for _, entry_path, content in osg.entry_iterator():
|
||||
|
||||
for check_string in check_strings:
|
||||
if content.find(check_string) >= 0:
|
||||
@ -196,7 +198,7 @@ def fix_entries():
|
||||
regex = re.compile(r"(.*)- Keywords:([^\n]*)(.*)", re.DOTALL)
|
||||
|
||||
# iterate over all entries
|
||||
for entry, entry_path, content in entry_iterator(games_path):
|
||||
for entry, entry_path, content in osg.entry_iterator():
|
||||
|
||||
# match with regex
|
||||
matches = regex.findall(content)
|
||||
@ -211,7 +213,7 @@ def fix_entries():
|
||||
elements = list(set(elements))
|
||||
|
||||
# get category out
|
||||
for keyword in recommended_keywords:
|
||||
for keyword in osg.recommended_keywords:
|
||||
if keyword in elements:
|
||||
elements.remove(keyword)
|
||||
category = keyword
|
||||
@ -243,13 +245,13 @@ def fix_entries():
|
||||
|
||||
if new_content != content:
|
||||
# write again
|
||||
write_text(entry_path, new_content)
|
||||
utils.write_text(entry_path, new_content)
|
||||
|
||||
# code dependencies
|
||||
regex = re.compile(r"(.*)- Code dependencies:([^\n]*)(.*)", re.DOTALL)
|
||||
|
||||
# iterate over all entries
|
||||
for entry, entry_path, content in entry_iterator(games_path):
|
||||
for entry, entry_path, content in osg.entry_iterator():
|
||||
# match with regex
|
||||
matches = regex.findall(content)
|
||||
|
||||
@ -279,13 +281,13 @@ def fix_entries():
|
||||
|
||||
if new_content != content:
|
||||
# write again
|
||||
write_text(entry_path, new_content)
|
||||
utils.write_text(entry_path, new_content)
|
||||
|
||||
# build systems
|
||||
regex = re.compile(r"(.*)- Build system:([^\n]*)(.*)", re.DOTALL)
|
||||
|
||||
# iterate over all entries
|
||||
for entry, entry_path, content in entry_iterator(games_path):
|
||||
for entry, entry_path, content in osg.entry_iterator():
|
||||
# match with regex
|
||||
matches = regex.findall(content)
|
||||
|
||||
@ -311,7 +313,7 @@ def fix_entries():
|
||||
|
||||
if new_content != content:
|
||||
# write again
|
||||
write_text(entry_path, new_content)
|
||||
utils.write_text(entry_path, new_content)
|
||||
|
||||
|
||||
def update_statistics(infos):
|
||||
@ -324,7 +326,7 @@ def update_statistics(infos):
|
||||
print('update statistics')
|
||||
|
||||
# start the page
|
||||
statistics_file = os.path.join(root_path, 'statistics.md')
|
||||
statistics_file = os.path.join(c.root_path, 'statistics.md')
|
||||
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
|
||||
|
||||
# total number
|
||||
@ -524,7 +526,7 @@ def update_statistics(infos):
|
||||
statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n'
|
||||
|
||||
# write to statistics file
|
||||
write_text(statistics_file, statistics)
|
||||
utils.write_text(statistics_file, statistics)
|
||||
|
||||
|
||||
def export_json(infos):
|
||||
@ -585,9 +587,9 @@ def export_json(infos):
|
||||
db['data'] = entries
|
||||
|
||||
# output
|
||||
json_path = os.path.join(games_path, os.path.pardir, 'docs', 'data.json')
|
||||
json_path = os.path.join(c.games_path, os.path.pardir, 'docs', 'data.json')
|
||||
text = json.dumps(db, indent=1)
|
||||
write_text(json_path, text)
|
||||
utils.write_text(json_path, text)
|
||||
|
||||
|
||||
def git_repo(repo):
|
||||
@ -710,9 +712,9 @@ def export_primary_code_repositories_json():
|
||||
primary_repos[k] = sorted(set(v))
|
||||
|
||||
# write them to tools/git
|
||||
json_path = os.path.join(root_path, 'tools', 'archives.json')
|
||||
json_path = os.path.join(c.root_path, 'tools', 'archives.json')
|
||||
text = json.dumps(primary_repos, indent=1)
|
||||
write_text(json_path, text)
|
||||
utils.write_text(json_path, text)
|
||||
|
||||
|
||||
def export_git_code_repositories_json():
|
||||
@ -739,40 +741,31 @@ def export_git_code_repositories_json():
|
||||
urls.sort()
|
||||
|
||||
# write them to tools/git
|
||||
json_path = os.path.join(root_path, 'tools', 'git_repositories.json')
|
||||
json_path = os.path.join(c.root_path, 'tools', 'git_repositories.json')
|
||||
text = json.dumps(urls, indent=1)
|
||||
write_text(json_path, text)
|
||||
utils.write_text(json_path, text)
|
||||
|
||||
|
||||
def sort_text_file(file, name):
|
||||
"""
|
||||
Reads a text file, splits in lines, removes duplicates, sort, writes back.
|
||||
"""
|
||||
text = read_text(file)
|
||||
text = utils.read_text(file)
|
||||
text = text.split('\n')
|
||||
text = sorted(list(set(text)), key=str.casefold)
|
||||
print('{} contains {} items'.format(name, len(text)))
|
||||
text = '\n'.join(text)
|
||||
write_text(file, text)
|
||||
|
||||
def strip_url(url):
|
||||
for prefix in ('http://', 'https://'):
|
||||
if url.startswith(prefix):
|
||||
url = url[len(prefix):]
|
||||
for suffix in ('/', '.git'):
|
||||
if url.endswith(suffix):
|
||||
url = url[:-len(suffix)]
|
||||
return url
|
||||
utils.write_text(file, text)
|
||||
|
||||
def clean_backlog(stripped_game_urls):
|
||||
|
||||
# read backlog and split
|
||||
file = os.path.join(root_path, 'tools', 'backlog.txt')
|
||||
text = read_text(file)
|
||||
file = os.path.join(c.root_path, 'tools', 'backlog.txt')
|
||||
text = utils.read_text(file)
|
||||
text = text.split('\n')
|
||||
|
||||
# remove those that are in stripped_game_urls
|
||||
text = [x for x in text if strip_url(x) not in stripped_game_urls]
|
||||
text = [x for x in text if utils.strip_url(x) not in stripped_game_urls]
|
||||
|
||||
# remove duplicates and sort
|
||||
text = sorted(list(set(text)), key=str.casefold)
|
||||
@ -780,18 +773,14 @@ def clean_backlog(stripped_game_urls):
|
||||
|
||||
# join and save again
|
||||
text = '\n'.join(text)
|
||||
write_text(file, text)
|
||||
utils.write_text(file, text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# paths
|
||||
root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
|
||||
games_path = os.path.join(root_path, 'games')
|
||||
tocs_path = os.path.join(games_path, 'tocs')
|
||||
|
||||
# backlog
|
||||
game_urls = extract_links(games_path)
|
||||
stripped_game_urls = [strip_url(x) for x in game_urls]
|
||||
game_urls = osg.extract_links()
|
||||
stripped_game_urls = [utils.strip_url(x) for x in game_urls]
|
||||
clean_backlog(stripped_game_urls)
|
||||
|
||||
|
||||
@ -802,7 +791,7 @@ if __name__ == "__main__":
|
||||
fix_entries()
|
||||
|
||||
# assemble info
|
||||
infos = assemble_infos(games_path)
|
||||
infos = osg.assemble_infos()
|
||||
|
||||
# recount and write to readme and to tocs
|
||||
update_readme_and_tocs(infos)
|
||||
@ -823,5 +812,5 @@ if __name__ == "__main__":
|
||||
# check_validity_external_links()
|
||||
|
||||
# sort backlog and rejected
|
||||
# sort_text_file(os.path.join(root_path, 'tools', 'backlog.txt'), 'backlog')
|
||||
sort_text_file(os.path.join(root_path, 'tools', 'rejected.txt'), 'rejected games list')
|
||||
# sort_text_file(os.path.join(c.root_path, 'tools', 'backlog.txt'), 'backlog')
|
||||
sort_text_file(os.path.join(c.root_path, 'tools', 'rejected.txt'), 'rejected games list')
|
||||
|
@ -229,9 +229,9 @@ if __name__ == "__main__":
|
||||
urls = osgc_entry['url']
|
||||
if type(urls) == str:
|
||||
urls = [urls]
|
||||
urls = [strip_url(url) for url in urls]
|
||||
our_urls = our_entry['home']
|
||||
our_urls = [x.replace('http://', '').replace('https://', '') for x in our_urls]
|
||||
urls = [x.replace('http://', '').replace('https://', '') for x in urls]
|
||||
our_urls = [strip_url(url) for url in our_urls]
|
||||
for url in urls:
|
||||
if url not in our_urls:
|
||||
p += ' home url {} missing\n'.format(url)
|
||||
@ -309,7 +309,7 @@ if __name__ == "__main__":
|
||||
|
||||
# determine file name
|
||||
print('create new entry for {}'.format(osgc_name))
|
||||
file_name = derive_canonical_file_name(osgc_name)
|
||||
file_name = canonical_game_name(osgc_name) + '.md'
|
||||
target_file = os.path.join(games_path, file_name)
|
||||
if os.path.isfile(target_file):
|
||||
print('warning: file {} already existing, save under slightly different name'.format(file_name))
|
||||
|
12
tools/utils/constants.py
Normal file
12
tools/utils/constants.py
Normal file
@ -0,0 +1,12 @@
|
||||
"""
|
||||
Paths, properties.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
# paths
|
||||
root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
|
||||
games_path = os.path.join(root_path, 'games')
|
||||
tocs_path = os.path.join(games_path, 'tocs')
|
||||
|
||||
local_properties_file = os.path.join(root_path, 'local.properties')
|
@ -3,8 +3,9 @@ Specific functions working on the games.
|
||||
"""
|
||||
|
||||
import re
|
||||
import os
|
||||
from difflib import SequenceMatcher
|
||||
from utils.utils import *
|
||||
from utils import utils, constants as c
|
||||
|
||||
essential_fields = ('Home', 'State', 'Keywords', 'Code repository', 'Code language', 'Code license')
|
||||
valid_fields = ('Home', 'Media', 'State', 'Play', 'Download', 'Platform', 'Keywords', 'Code repository', 'Code language',
|
||||
@ -19,38 +20,37 @@ def game_name_similarity(a, b):
|
||||
return SequenceMatcher(None, str.casefold(a), str.casefold(b)).ratio()
|
||||
|
||||
|
||||
def entry_iterator(games_path):
|
||||
def entry_iterator():
|
||||
"""
|
||||
|
||||
"""
|
||||
|
||||
# get all entries (ignore everything starting with underscore)
|
||||
entries = os.listdir(games_path)
|
||||
entries = os.listdir(c.games_path)
|
||||
|
||||
# iterate over all entries
|
||||
for entry in entries:
|
||||
entry_path = os.path.join(games_path, entry)
|
||||
entry_path = os.path.join(c.games_path, entry)
|
||||
|
||||
# ignore directories ("tocs" for example)
|
||||
if os.path.isdir(entry_path):
|
||||
continue
|
||||
|
||||
# read entry
|
||||
content = read_text(entry_path)
|
||||
content = utils.read_text(entry_path)
|
||||
|
||||
# yield
|
||||
yield entry, entry_path, content
|
||||
|
||||
|
||||
def derive_canonical_file_name(name):
|
||||
def canonical_game_name(name):
|
||||
"""
|
||||
Derives a canonical file name from a game name
|
||||
Derives a canonical game name from an actual game name (suitable for file names, ...)
|
||||
"""
|
||||
name = regex_sanitize_name.sub('', name)
|
||||
name = regex_sanitize_name_space_eater.sub('_', name)
|
||||
name = name.replace('_-_', '-')
|
||||
name = name.casefold()
|
||||
name = name + '.md'
|
||||
return name
|
||||
|
||||
|
||||
@ -193,7 +193,7 @@ def parse_entry(content):
|
||||
return info
|
||||
|
||||
|
||||
def assemble_infos(games_path):
|
||||
def assemble_infos():
|
||||
"""
|
||||
Parses all entries and assembles interesting infos about them.
|
||||
"""
|
||||
@ -204,7 +204,7 @@ def assemble_infos(games_path):
|
||||
infos = []
|
||||
|
||||
# iterate over all entries
|
||||
for entry, _, content in entry_iterator(games_path):
|
||||
for entry, _, content in entry_iterator():
|
||||
|
||||
# parse entry
|
||||
info = parse_entry(content)
|
||||
@ -213,12 +213,12 @@ def assemble_infos(games_path):
|
||||
info['file'] = entry
|
||||
|
||||
# check canonical file name
|
||||
canonical_file_name = derive_canonical_file_name(info['name'])
|
||||
canonical_file_name = canonical_game_name(info['name']) + '.md'
|
||||
# we also allow -X with X =2..9 as possible extension (because of duplicate canonical file names)
|
||||
if canonical_file_name != entry and canonical_file_name != entry[:-5] + '.md':
|
||||
print('file {} should be {}'.format(entry, canonical_file_name))
|
||||
source_file = os.path.join(games_path, entry)
|
||||
target_file = os.path.join(games_path, canonical_file_name)
|
||||
source_file = os.path.join(c.games_path, entry)
|
||||
target_file = os.path.join(c.games_path, canonical_file_name)
|
||||
if not os.path.isfile(target_file):
|
||||
pass
|
||||
# os.rename(source_file, target_file)
|
||||
@ -228,7 +228,8 @@ def assemble_infos(games_path):
|
||||
|
||||
return infos
|
||||
|
||||
def extract_links(games_path):
|
||||
|
||||
def extract_links():
|
||||
"""
|
||||
Parses all entries and extracts http(s) links from them
|
||||
"""
|
||||
@ -238,7 +239,7 @@ def extract_links(games_path):
|
||||
|
||||
# iterate over all entries
|
||||
urls = set()
|
||||
for _, _, content in entry_iterator(games_path):
|
||||
for _, _, content in entry_iterator():
|
||||
|
||||
# apply regex
|
||||
matches = regex.findall(content)
|
||||
|
5
tools/utils/osg_github.py
Normal file
5
tools/utils/osg_github.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""
|
||||
Everything specific to the Github API (via PyGithub).
|
||||
"""
|
||||
|
||||
from github import Github
|
@ -9,7 +9,6 @@ import tarfile
|
||||
import time
|
||||
import urllib.request
|
||||
import zipfile
|
||||
import errno
|
||||
import stat
|
||||
|
||||
|
||||
@ -266,4 +265,34 @@ def unzip(zip_file, destination_directory):
|
||||
# done creating files, now update dir dt
|
||||
for name in dirs:
|
||||
date_time = dirs[name]
|
||||
os.utime(name, (date_time, date_time))
|
||||
os.utime(name, (date_time, date_time))
|
||||
|
||||
|
||||
def strip_url(url):
|
||||
for prefix in ('http://', 'https://'):
|
||||
if url.startswith(prefix):
|
||||
url = url[len(prefix):]
|
||||
for prefix in ('www'):
|
||||
if url.startswith(prefix):
|
||||
url = url[len(prefix):]
|
||||
for suffix in ('/', '.git', '/en', '/index.html'):
|
||||
if url.endswith(suffix):
|
||||
url = url[:-len(suffix)]
|
||||
return url
|
||||
|
||||
|
||||
def load_properties(filepath, sep='=', comment_char='#'):
|
||||
"""
|
||||
Read the file as a properties file (in Java).
|
||||
"""
|
||||
properties = {}
|
||||
with open(filepath, "rt") as file:
|
||||
for line in file:
|
||||
line = line.strip()
|
||||
if not line.startswith(comment_char):
|
||||
line = line.split(sep)
|
||||
assert(len(line)==2)
|
||||
key = line[0].strip()
|
||||
value = line[1].strip()
|
||||
properties[key] = value
|
||||
return properties
|
Reference in New Issue
Block a user