further improvement of maintenance scripts

This commit is contained in:
Trilarion
2020-09-09 13:01:44 +02:00
parent c985780dc2
commit 60f9523906
77 changed files with 424 additions and 478 deletions

View File

@ -566,6 +566,7 @@
"https://github.com/dmecke/OpenSoccerStar.git",
"https://github.com/dogballs/cattle-bity.git",
"https://github.com/dorkster/freeblocks.git",
"https://github.com/dougmencken/HeadOverHeels.git",
"https://github.com/doxygen/doxygen.git",
"https://github.com/dreamchess/dreamchess.git",
"https://github.com/dsallen7/ransack-python.git",
@ -666,6 +667,7 @@
"https://github.com/ihofmann/open-websoccer.git",
"https://github.com/indyjo/Thunder-Lightning.git",
"https://github.com/inexorgame/inexor-core.git",
"https://github.com/inexorgame/vulkan-renderer.git",
"https://github.com/infidel-/cult.git",
"https://github.com/inolen/quakejs.git",
"https://github.com/instead-hub/instead.git",

View File

@ -1,298 +0,0 @@
import json
import textwrap
import os
import re
import utils.constants
from utils import constants as c, utils
def export_json(infos):
"""
Parses all entries, collects interesting info and stores it in a json file suitable for displaying
with a dynamic table in a browser.
"""
print('export to json for web display')
# make database out of it
db = {'headings': ['Game', 'Description', 'Download', 'State', 'Keywords', 'Source']}
entries = []
for info in infos:
# game & description
entry = ['{} (<a href="{}">home</a>, <a href="{}">entry</a>)'.format(info['Name'], info['home'][0],
r'https://github.com/Trilarion/opensourcegames/blob/master/entries/' +
info['file']),
textwrap.shorten(info['description'], width=60, placeholder='..')]
# download
field = 'download'
if field in info and info[field]:
entry.append('<a href="{}">Link</a>'.format(info[field][0]))
else:
entry.append('')
# state (field state is essential)
entry.append('{} / {}'.format(info['state'][0],
'inactive since {}'.format(info['inactive']) if 'inactive' in info else 'active'))
# keywords
field = 'keywords'
if field in info and info[field]:
entry.append(', '.join(info[field]))
else:
entry.append('')
# source
text = []
field = 'code repository'
if field in info and info[field]:
text.append('<a href="{}">Source</a>'.format(info[field][0]))
field = 'code language'
if field in info and info[field]:
text.append(', '.join(info[field]))
field = 'code license'
if field in info and info[field]:
text.append(info[field][0])
entry.append(' - '.join(text))
# append to entries
entries.append(entry)
# sort entries by game name
entries.sort(key=lambda x: str.casefold(x[0]))
db['data'] = entries
# output
json_path = os.path.join(c.entries_path, os.path.pardir, 'docs', 'data.json')
text = json.dumps(db, indent=1)
utils.write_text(json_path, text)
def git_repo(repo):
"""
Tests if a repo is a git repo, then returns the repo url, possibly modifying it slightly.
"""
# generic (https://*.git) or (http://*.git) ending on git
if (repo.startswith('https://') or repo.startswith('http://')) and repo.endswith('.git'):
return repo
# for all others we just check if they start with the typical urls of git services
services = ['https://git.tuxfamily.org/', 'http://git.pond.sub.org/', 'https://gitorious.org/',
'https://git.code.sf.net/p/']
for service in services:
if repo.startswith(service):
return repo
if repo.startswith('git://'):
return repo
# the rest is ignored
return None
def svn_repo(repo):
"""
"""
if repo.startswith('https://svn.code.sf.net/p/'):
return repo
if repo.startswith('http://svn.uktrainsim.com/svn/'):
return repo
if repo == 'https://rpg.hamsterrepublic.com/source/wip':
return repo
if repo.startswith('http://svn.savannah.gnu.org/svn/'):
return repo
if repo.startswith('svn://'):
return repo
if repo.startswith('https://svn.icculus.org/') or repo.startswith('http://svn.icculus.org/'):
return repo
# not svn
return None
def hg_repo(repo):
"""
"""
if repo.startswith('https://bitbucket.org/') and not repo.endswith('.git'):
return repo
if repo.startswith('http://hg.'):
return repo
# not hg
return None
def export_primary_code_repositories_json(infos):
"""
"""
print('export to json for local repository update')
primary_repos = {'git': [], 'svn': [], 'hg': []}
unconsumed_entries = []
# for every entry filter those that are known git repositories (add additional repositories)
field = 'code repository-raw'
for info in infos:
# if field 'Code repository' is available
if field in info:
consumed = False
repos = info[field]
if repos:
# split at comma
repos = repos.split(',')
# keep the first and all others containing "(+)"
additional_repos = [x for x in repos[1:] if "(+)" in x]
repos = repos[0:1]
repos.extend(additional_repos)
for repo in repos:
# remove parenthesis and strip of white spaces
repo = re.sub(r'\([^)]*\)', '', repo)
repo = repo.strip()
url = git_repo(repo)
if url:
primary_repos['git'].append(url)
consumed = True
continue
url = svn_repo(repo)
if url:
primary_repos['svn'].append(url)
consumed = True
continue
url = hg_repo(repo)
if url:
primary_repos['hg'].append(url)
consumed = True
continue
if not consumed:
unconsumed_entries.append([info['Name'], info[field]])
# print output
if 'code repository' in info:
print('Entry "{}" unconsumed repo: {}'.format(info['Name'], info[field]))
# sort them alphabetically (and remove duplicates)
for k, v in primary_repos.items():
primary_repos[k] = sorted(set(v))
# statistics of gits
git_repos = primary_repos['git']
print('{} Git repositories'.format(len(git_repos)))
for domain in (
'repo.or.cz', 'anongit.kde.org', 'bitbucket.org', 'git.code.sf.net', 'git.savannah', 'git.tuxfamily',
'github.com',
'gitlab.com', 'gitlab.com/osgames', 'gitlab.gnome.org'):
print('{} on {}'.format(sum(1 if domain in x else 0 for x in git_repos), domain))
# write them to code/git
json_path = os.path.join(c.root_path, 'code', 'archives.json')
text = json.dumps(primary_repos, indent=1)
utils.write_text(json_path, text)
def export_git_code_repositories_json():
"""
"""
urls = []
field = 'code repository'
# for every entry, get all git
for info in infos:
# if field 'Code repository' is available
if field in info:
repos = info[field]
if repos:
# take the first
repo = repos[0]
url = git_repo(repo)
if url:
urls.append(url)
# sort them alphabetically (and remove duplicates)
urls.sort()
# write them to code/git
json_path = os.path.join(c.root_path, 'code', 'git_repositories.json')
text = json.dumps(urls, indent=1)
utils.write_text(json_path, text)
def check_validity_backlog():
import requests
# read backlog and split
file = os.path.join(c.root_path, 'code', 'backlog.txt')
text = utils.read_text(file)
urls = text.split('\n')
urls = [x.split(' ')[0] for x in urls]
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}
for url in urls:
try:
r = requests.get(url, headers=headers, timeout=5)
except Exception as e:
print('{} gave error: {}'.format(url, e))
else:
if r.status_code != requests.codes.ok:
print('{} returned status code: {}'.format(url, r.status_code))
if r.is_redirect or r.history:
print('{} redirected to {}, {}'.format(url, r.url, r.history))
def check_code_dependencies(infos):
"""
"""
# get all names of frameworks and library also using osg.code_dependencies_aliases
valid_dependencies = list(utils.constants.general_code_dependencies_without_entry.keys())
for info in infos:
if any((x in ('framework', 'library', 'game engine') for x in info['keywords'])):
name = info['Name']
if name in utils.constants.code_dependencies_aliases:
valid_dependencies.extend(utils.constants.code_dependencies_aliases[name])
else:
valid_dependencies.append(name)
# get all referenced code dependencies
referenced_dependencies = {}
for info in infos:
deps = info.get('code dependencies', [])
for dependency in deps:
if dependency in referenced_dependencies:
referenced_dependencies[dependency] += 1
else:
referenced_dependencies[dependency] = 1
# delete those that are valid dependencies
referenced_dependencies = [(k, v) for k, v in referenced_dependencies.items() if k not in valid_dependencies]
# sort by number
referenced_dependencies.sort(key=lambda x: x[1], reverse=True)
# print out
print('Code dependencies not included as entry')
for dep in referenced_dependencies:
print('{} ({})'.format(*dep))

View File

@ -1,17 +1,40 @@
"""
Runs a series of maintenance operations on the collection of entry files, updating the table of content files for
each category as well as creating a statistics file.
Runs a series of maintenance operations on the collection of entry files, updating the table of content files for
each category as well as creating a statistics file.
Counts the number of records each sub-folder and updates the overview.
Sorts the entries in the contents files of each sub folder alphabetically.
Counts the number of records each sub-folder and updates the overview.
Sorts the entries in the contents files of each sub folder alphabetically.
"""
import os
import re
import datetime
import json
import textwrap
from utils import osg, osg_ui, utils, constants as c
import requests
def check_validity_backlog():
import requests
# read backlog and split
file = os.path.join(c.root_path, 'code', 'backlog.txt')
text = utils.read_text(file)
urls = text.split('\n')
urls = [x.split(' ')[0] for x in urls]
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}
for url in urls:
try:
r = requests.get(url, headers=headers, timeout=5)
except Exception as e:
print('{} gave error: {}'.format(url, e))
else:
if r.status_code != requests.codes.ok:
print('{} returned status code: {}'.format(url, r.status_code))
if r.is_redirect or r.history:
print('{} redirected to {}, {}'.format(url, r.url, r.history))
def create_toc(title, file, entries):
"""
@ -89,6 +112,78 @@ class EntriesMaintainer:
print('{}: found {}'.format(os.path.basename(entry_path), check_string))
print('checked for template leftovers')
def check_inconsistencies(self):
"""
:return:
"""
if not self.entries:
print('entries not yet loaded')
return
# get all keywords and print similar keywords
keywords = []
for entry in self.entries:
keywords.extend(entry['Keywords'])
if b'first\xe2\x80\x90person'.decode() in entry['Keywords']:
print(entry['File'])
keywords = [x.value for x in keywords]
# reduce those starting with "multiplayer"
keywords = [x if not x.startswith('multiplayer') else 'multiplayer' for x in keywords]
# check unique keywords
unique_keywords = list(set(keywords))
unique_keywords_counts = [keywords.count(l) for l in unique_keywords]
for index, name in enumerate(unique_keywords):
for other_index in range(index+1, len(unique_keywords)):
other_name = unique_keywords[other_index]
if osg.name_similarity(name, other_name) > 0.8:
print(' Keywords {} ({}) - {} ({}) are similar'.format(name, unique_keywords_counts[index], other_name, unique_keywords_counts[other_index]))
# get all names of frameworks and library also using osg.code_dependencies_aliases
valid_dependencies = list(c.general_code_dependencies_without_entry.keys())
for entry in self.entries:
if any((x in ('framework', 'library', 'game engine') for x in entry['Keywords'])):
name = entry['Title']
if name in c.code_dependencies_aliases:
valid_dependencies.extend(c.code_dependencies_aliases[name])
else:
valid_dependencies.append(name)
# get all referenced code dependencies
referenced_dependencies = {}
for entry in self.entries:
deps = entry.get('Code dependencies', [])
for dependency in deps:
dependency = dependency.value
if dependency in referenced_dependencies:
referenced_dependencies[dependency] += 1
else:
referenced_dependencies[dependency] = 1
# delete those that are valid dependencies
referenced_dependencies = [(k, v) for k, v in referenced_dependencies.items() if k not in valid_dependencies]
# sort by number
referenced_dependencies.sort(key=lambda x: x[1], reverse=True)
# print out
print('Code dependencies not included as entry')
for dep in referenced_dependencies:
print('{} ({})'.format(*dep))
# if there is the "Play" field, it should have "Web" as Platform
for entry in self.entries:
name = entry['File']
if 'Play' in entry:
if not 'Platform' in entry:
print('Entry "{}" has "Play" field but not "Platform" field, add it with "Web"'.format(name))
elif not 'Web' in entry['Platform']:
print('Entry "{}" has "Play" field but not "Web" in "Platform" field'.format(name))
# javascript/typescript as language but not web as platform?
# if there is a @see-download there should be download fields...
def clean_rejected(self):
"""
@ -548,10 +643,171 @@ class EntriesMaintainer:
print('statistics updated')
def update_html(self):
pass
"""
Parses all entries, collects interesting info and stores it in a json file suitable for displaying
with a dynamic table in a browser.
"""
if not self.entries:
print('entries not yet loaded')
return
# make database out of it
db = {'headings': ['Game', 'Description', 'Download', 'State', 'Keywords', 'Source']}
entries = []
for info in self.entries:
# game & description
entry = ['{} (<a href="{}">home</a>, <a href="{}">entry</a>)'.format(info['Title'], info['Home'][0],
r'https://github.com/Trilarion/opensourcegames/blob/master/entries/' +
info['File']),
textwrap.shorten(info.get('Note', ''), width=60, placeholder='..')]
# download
field = 'Download'
if field in info and info[field]:
entry.append('<a href="{}">Link</a>'.format(info[field][0]))
else:
entry.append('')
# state (field state is essential)
entry.append('{} / {}'.format(info['State'][0],
'inactive since {}'.format(osg.extract_inactive_year(info)) if osg.is_inactive(info) else 'active'))
# keywords
keywords = info['Keywords']
keywords = [x.value for x in keywords]
entry.append(', '.join(keywords))
# source
text = []
field = 'Code repository'
if field in info and info[field]:
text.append('<a href="{}">Source</a>'.format(info[field][0].value))
languages = info['Code language']
languages = [x.value for x in languages]
text.append(', '.join(languages))
licenses = info['Code license']
licenses = [x.value for x in licenses]
text.append(', '.join(licenses))
entry.append(' - '.join(text))
# append to entries
entries.append(entry)
# sort entries by game name
entries.sort(key=lambda x: str.casefold(x[0]))
db['data'] = entries
# output
text = json.dumps(db, indent=1)
utils.write_text(c.json_db_file, text)
print('HTML updated')
def update_repos(self):
pass
"""
export to json for local repository update of primary repos
"""
if not self.entries:
print('entries not yet loaded')
return
primary_repos = {'git': [], 'svn': [], 'hg': []}
unconsumed_entries = []
# for every entry filter those that are known git repositories (add additional repositories)
for entry in self.entries:
repos = entry['Code repository']
repos = [x.value for x in repos]
# keep the first and all others containing @add
if not repos:
continue
repos = [repos[0]] + [x for x in repos[1:] if "@add" in x]
for repo in repos:
consumed = False
repo = repo.split(' ')[0].strip()
url = osg.git_repo(repo)
if url:
primary_repos['git'].append(url)
consumed = True
continue
url = osg.svn_repo(repo)
if url:
primary_repos['svn'].append(url)
consumed = True
continue
url = osg.hg_repo(repo)
if url:
primary_repos['hg'].append(url)
consumed = True
continue
if not consumed:
unconsumed_entries.append([entry['Title'], repo])
print('Entry "{}" unconsumed repo: {}'.format(entry['File'], repo))
# sort them alphabetically (and remove duplicates)
for k, v in primary_repos.items():
primary_repos[k] = sorted(set(v))
# statistics of gits
git_repos = primary_repos['git']
print('{} Git repositories'.format(len(git_repos)))
for domain in (
'repo.or.cz', 'anongit.kde.org', 'bitbucket.org', 'git.code.sf.net', 'git.savannah', 'git.tuxfamily',
'github.com',
'gitlab.com', 'gitlab.com/osgames', 'gitlab.gnome.org'):
print('{} on {}'.format(sum(1 if domain in x else 0 for x in git_repos), domain))
# write them to code/git
json_path = os.path.join(c.root_path, 'code', 'archives.json')
text = json.dumps(primary_repos, indent=1)
utils.write_text(json_path, text)
print('Repositories updated')
def collect_git_repos(self):
"""
for every entry, get all git
:return:
"""
git_repos = []
for entry in self.entries:
repos = entry['Code repository']
repos = [x.value for x in repos]
for repo in repos:
repo = repo.split(' ')[0].strip()
url = osg.git_repo(repo)
if url:
git_repos.append(repo)
# sort them alphabetically (and remove duplicates)
git_repos = sorted(list(set(git_repos)), key=str.casefold)
# write them to code/git
json_path = os.path.join(c.root_path, 'code', 'git_repositories.json')
text = json.dumps(git_repos, indent=1)
utils.write_text(json_path, text)
def special_ops(self):
"""
For special operations that are one-time and may change.
:return:
"""
if not self.entries:
print('entries not yet loaded')
return
# remove all downloads that only have a single entry with @see-home (this is the default anyway)
field = 'Download'
for entry in self.entries:
if field in entry:
content = entry[field]
if len(content) == 1 and content[0].value == '@see-home' and not content[0].comment:
del entry[field]
print('special ops finished')
def complete_run(self):
pass
@ -566,6 +822,7 @@ if __name__ == "__main__":
'Write entries': m.write_entries,
'Check template leftovers': m.check_template_leftovers,
'Check external links': m.check_external_links,
'Check inconsistencies': m.check_inconsistencies,
'Check rejected entries': m.clean_rejected,
'Check external links (takes quite long)': m.check_external_links,
'Clean backlog': m.clean_backlog,
@ -573,6 +830,7 @@ if __name__ == "__main__":
'Update statistics': m.update_statistics,
'Update HTML': m.update_html,
'Update repository list': m.update_repos,
'Special': m.special_ops,
'Complete run': m.complete_run
}

View File

@ -17,6 +17,7 @@ developer_file = os.path.join(root_path, 'developers.md')
backlog_file = os.path.join(code_path, 'backlog.txt')
rejected_file = os.path.join(code_path, 'rejected.txt')
statistics_file = os.path.join(root_path, 'statistics.md')
json_db_file = os.path.join(root_path, 'docs', 'data.json')
# local config
local_config_file = os.path.join(root_path, 'local-config.ini')

View File

@ -504,4 +504,55 @@ def all_urls(entries):
subvalue = subvalue.strip()
if is_url(subvalue):
urls[subvalue] = urls.get(subvalue, []) + [file]
return urls
return urls
def git_repo(repo):
"""
Tests if a repo URL is a git repo, then returns the repo url.
"""
# everything that starts with 'git://'
if repo.startswith('git://'):
return repo
# generic (https://*.git) or (http://*.git) ending on git
if (repo.startswith('https://') or repo.startswith('http://')) and repo.endswith('.git'):
return repo
# for all others we just check if they start with the typical urls of git services
services = ['https://git.tuxfamily.org/', 'http://git.pond.sub.org/', 'https://gitorious.org/',
'https://git.code.sf.net/p/']
if any(repo.startswith(service) for service in services):
return repo
# the rest is not recognized as a git url
return None
def svn_repo(repo):
"""
Tests if a repo URL is a svn repo, then returns the repo url.
"""
# we can just go for known providers of svn
services = ('svn://', 'https://svn.code.sf.net/p/', 'http://svn.savannah.gnu.org/svn/', 'https://svn.icculus.org/', 'http://svn.icculus.org/', 'http://svn.uktrainsim.com/svn/', 'https://rpg.hamsterrepublic.com/source/wip')
if any(repo.startswith(service) for service in services):
return repo
# not svn
return None
def hg_repo(repo):
"""
Tests if a repo URL is a hg repo, then returns the repo url.
"""
if repo.startswith('https://bitbucket.org/') and not repo.endswith('.git'):
return repo
if repo.startswith('http://hg.'):
return repo
# not hg
return None