maintenance scripts improved

This commit is contained in:
Trilarion 2020-09-08 22:37:05 +02:00
parent 82af77b017
commit c985780dc2
6 changed files with 590 additions and 533 deletions

View File

@ -1,497 +1,10 @@
"""
Runs a series of maintenance operations on the collection of entry files, updating the table of content files for
each category as well as creating a statistics file.
Counts the number of records each sub-folder and updates the overview.
Sorts the entries in the contents files of each sub folder alphabetically.
This script runs with Python 3, it could also with Python 2 with some minor tweaks probably.
"""
import requests
import datetime
import json import json
import textwrap import textwrap
import os import os
import re import re
import utils.constants import utils.constants
from utils import constants as c, utils, osg from utils import constants as c, utils
def extract_links():
"""
Parses all entries and extracts http(s) links from them
"""
# regex for finding urls (can be in <> or in ]() or after a whitespace
regex = re.compile(r"[\s\n]<(http.+?)>|]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n,]")
# iterate over all entries
urls = set()
for _, _, content in entry_iterator():
# apply regex
matches = regex.findall(content)
# for each match
for match in matches:
# for each possible clause
for url in match:
# if there was something (and not a sourceforge git url)
if url:
urls.add(url)
urls = sorted(list(urls), key=str.casefold)
return urls
def check_validity_external_links():
"""
Checks all external links it can find for validity. Prints those with non OK HTTP responses. Does only need to be run
from time to time.
"""
# regex for finding urls (can be in <> or in ]() or after a whitespace
regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n\)]")
# ignore the following patterns (they give false positives here)
ignored_urls = ('https://git.tukaani.org/xz.git', 'https://git.code.sf.net/', 'http://hg.hedgewars.org/hedgewars/', 'https://git.xiph.org/vorbis.git', 'http://svn.uktrainsim.com/svn/openrails', 'https://www.srb2.org/', 'http://wiki.srb2.org/')
# some do redirect, but we nedertheless want the original URL in the database
redirect_okay = ('https://octaforge.org/', 'https://svn.openttd.org/', 'https://godotengine.org/download')
# extract all links from entries
import urllib3
urllib3.disable_warnings() # otherwise we cannot verify those with SSL errors without getting warnings
urls = {}
for entry, _, content in osg.entry_iterator():
# apply regex
matches = regex.findall(content)
# for each match
for match in matches:
for url in match:
if url and not any((url.startswith(x) for x in ignored_urls)):
# ignore bzr.sourceforge, no web address found
if 'bzr.sourceforge.net/bzrroot/' in url:
continue
# add "/" at the end
if any((url.startswith(x) for x in ('https://anongit.freedesktop.org/git', 'https://git.savannah.gnu.org/git/', 'https://git.savannah.nongnu.org/git/', 'https://git.artsoft.org/'))):
url += '/'
if url.startswith('https://bitbucket.org/') and url.endswith('.git'):
url = url[:-4] + '/commits/'
if url.startswith('https://svn.code.sf.net/p/'):
url = 'http' + url[5:] + '/'
if url.startswith('http://cvs.savannah.nongnu.org:/sources/'):
url = 'http://cvs.savannah.nongnu.org/viewvc/' + url[40:] + '/'
if url.startswith('http://cvs.savannah.gnu.org:/sources/'):
url = 'http://cvs.savannah.gnu.org/viewvc/' + url[37:] + '/'
# generally ".git" at the end is not working well, except sometimes
if url.endswith('.git') and not any((url.startswith(x) for x in ('https://repo.or.cz', 'https://git.tuxfamily.org/fanwor/fanwor'))):
url = url[:-4]
if url in urls:
urls[url].add(entry)
else:
urls[url] = {entry}
print('found {} unique links'.format(len(urls)))
print("start checking external links (can take a while)")
# now iterate over all urls
for url, names in urls.items():
names = list(names) # was a set
if len(names) == 1:
names = names[0]
try:
verify = True
# some have an expired certificate but otherwise still work
if any((url.startswith(x) for x in ('https://perso.b2b2c.ca/~sarrazip/dev/', 'https://dreerally.com/', 'https://henlin.net/', 'https://www.megamek.org/', 'https://pixeldoctrine.com/', 'https://gitorious.org/', 'https://www.opmon-game.ga/'))):
verify = False
r = requests.head(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=20, allow_redirects=True, verify=verify)
if r.status_code == 405: # head method not supported, try get
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=20, allow_redirects=True, verify=verify)
# check for bad status
if r.status_code != requests.codes.ok:
print('{}: {} - {}'.format(names, url, r.status_code))
# check for redirect
if r.history and url not in redirect_okay:
# only / added or http->https sometimes
redirected_url = r.url
if redirected_url == url + '/':
output = '{}: {} -> {} - redirect "/" at end '
elif redirected_url == 'https' + url[4:]:
output = '{}: {} -> {} - redirect "https" at start'
else:
output = '{}: {} -> {} - redirect '
print(output.format(names, url, redirected_url))
except Exception as e:
error_name = type(e).__name__
if error_name == 'SSLError' and any((url.startswith(x) for x in ('https://gitorious.org/', 'https://www.freedroid.org/download/'))):
continue # even though verify is False, these errors still get through
print('{}: {} - exception {}'.format(names, url, error_name))
def fix_entries():
"""
Fixes the keywords, code dependencies, build systems, .. entries, mostly by automatically sorting them.
"""
keyword_synonyms = {'RTS': ('real time', 'strategy'), 'realtime': 'real time'}
# TODO also sort other fields, only read once and then do all, move to separate file
# example Javascript to JavaScript and then add whenever the known languages check hits
print('fix entries')
# keywords
regex = re.compile(r"(.*)- Keywords:([^\n]*)(.*)", re.DOTALL)
# iterate over all entries
for entry, entry_path, content in osg.entry_iterator():
# match with regex
matches = regex.findall(content)
if len(matches) != 1:
raise RuntimeError('Could not find keywords in entry "{}"'.format(entry))
match = matches[0]
# get elements out, split, strip, delete duplicates
elements = match[1].split(',')
elements = [x.strip() for x in elements]
elements = list(set(elements))
# get category out
for keyword in utils.constants.recommended_keywords:
if keyword in elements:
elements.remove(keyword)
category = keyword
break
# special treatments here
elements = [x if x != 'TBS' and x != 'TB' else 'turn based' for x in elements]
elements = [x if x != 'RTS' else 'real time' for x in elements]
elements = [x if x != 'MMO' else 'massive multiplayer online' for x in elements]
elements = [x if x != 'MMO' else 'multiplayer online' for x in elements]
elements = [x if x != 'SP' else 'singleplayer' for x in elements]
elements = [x if x != 'MP' else 'multiplayer' for x in elements]
elements = [x if x != 'engine' else 'game engine' for x in elements]
elements = [x if x != 'rpg' else 'role playing' for x in elements]
elements = [x if x != 'turn based' else 'turn-based' for x in elements]
for keyword in ('browser', 'misc', 'tools'):
if keyword in elements:
elements.remove(keyword)
# sort
elements.sort(key=str.casefold)
# add category
elements.insert(0, category)
keywords = '- Keywords: {}'.format(', '.join(elements))
new_content = match[0] + keywords + match[2]
if new_content != content:
# write again
utils.write_text(entry_path, new_content)
# code dependencies
regex = re.compile(r"(.*)- Code dependencies:([^\n]*)(.*)", re.DOTALL)
# iterate over all entries
for entry, entry_path, content in osg.entry_iterator():
# match with regex
matches = regex.findall(content)
if not matches:
# no code dependencies given
continue
match = matches[0]
# get code dependencies out, split, strip, delete duplicates
elements = match[1].split(',')
elements = [x.strip() for x in elements]
elements = list(set(elements))
# special treatments here
elements = [x if x != 'Blender' else 'Blender game engine' for x in elements]
elements = [x if x.lower() != 'libgdx' else 'libGDX' for x in elements]
elements = [x if x != 'SDL 2' else 'SDL2' for x in elements]
elements = [x if x.lower() != "ren'py" else "Ren'Py" for x in elements]
# sort
elements.sort(key=str.casefold)
code_dependencies = '- Code dependencies: {}'.format(', '.join(elements))
new_content = match[0] + code_dependencies + match[2]
if new_content != content:
# write again
utils.write_text(entry_path, new_content)
# build systems
regex = re.compile(r"(.*)- Build system:([^\n]*)(.*)", re.DOTALL)
# iterate over all entries
for entry, entry_path, content in osg.entry_iterator():
# match with regex
matches = regex.findall(content)
if not matches:
# no build system given
continue
match = matches[0]
# get code dependencies out, split, strip, delete duplicates
elements = match[1].split(',')
elements = [x.strip() for x in elements]
elements = list(set(elements))
# special treatments here
# sort
elements.sort(key=str.casefold)
build_system = '- Build system: {}'.format(', '.join(elements))
new_content = match[0] + build_system + match[2]
if new_content != content:
# write again
utils.write_text(entry_path, new_content)
def update_statistics(infos):
"""
Generates the statistics page.
Should be done every time the entries change.
"""
print('update statistics')
# start the page
statistics_file = os.path.join(c.root_path, 'statistics.md')
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
# total number
number_entries = len(infos)
rel = lambda x: x / number_entries * 100 # conversion to percent
statistics += 'analyzed {} entries on {}\n\n'.format(number_entries,
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
# State (beta, mature, inactive)
statistics += '## State\n\n'
number_state_beta = sum(1 for x in infos if 'beta' in x['state'])
number_state_mature = sum(1 for x in infos if 'mature' in x['state'])
number_inactive = sum(1 for x in infos if 'inactive' in x)
statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(
number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive,
rel(number_inactive))
if number_inactive > 0:
entries_inactive = [(x['Name'], x['inactive']) for x in infos if 'inactive' in x]
entries_inactive.sort(key=lambda x: str.casefold(x[0])) # first sort by name
entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n'
# Language
statistics += '## Code Languages\n\n'
field = 'code language'
# those without language tag
# TODO the language tag is now an essential field, this cannot happen anymore
# number_no_language = sum(1 for x in infois if field not in x)
# if number_no_language > 0:
# statistics += 'Without language tag: {} ({:.1f}%)\n\n'.format(number_no_language, rel(number_no_language))
# entries_no_language = [x['Name'] for x in infois if field not in x]
# entries_no_language.sort()
# statistics += ', '.join(entries_no_language) + '\n\n'
# get all languages together
languages = []
for info in infos:
if field in info:
languages.extend(info[field])
unique_languages = set(languages)
unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages]
unique_languages.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_languages.sort(key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first)
unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_languages]
statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n'
# Licenses
statistics += '## Code licenses\n\n'
field = 'code license'
# those without license
number_no_license = sum(1 for x in infos if field not in x)
if number_no_license > 0:
statistics += 'Without license tag: {} ({:.1f}%)\n\n'.format(number_no_license, rel(number_no_license))
entries_no_license = [x['Name'] for x in infos if field not in x]
entries_no_license.sort()
statistics += ', '.join(entries_no_license) + '\n\n'
# get all licenses together
licenses = []
for info in infos:
if field in info:
licenses.extend(info[field])
unique_licenses = set(licenses)
unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses]
unique_licenses.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_licenses.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_licenses]
statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n'
# Keywords
statistics += '## Keywords\n\n'
field = 'keywords'
# get all keywords together
keywords = []
for info in infos:
if field in info:
keywords.extend(info[field])
# reduce those starting with "inspired by"
keywords = [x if not x.startswith('inspired by') else 'inspired' for x in keywords]
# reduce those starting with "multiplayer"
keywords = [x if not x.startswith('multiplayer') else 'multiplayer' for x in keywords]
unique_keywords = set(keywords)
unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords]
unique_keywords.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_keywords.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_keywords = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_keywords]
statistics += '##### Keywords frequency\n\n' + '\n'.join(unique_keywords) + '\n\n'
# no download or play field
statistics += '## Entries without download or play fields\n\n'
entries = []
for info in infos:
if 'download' not in info and 'play' not in info:
entries.append(info['Name'])
entries.sort(key=str.casefold)
statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'
# code hosted not on github, gitlab, bitbucket, launchpad, sourceforge
popular_code_repositories = ('github.com', 'gitlab.com', 'bitbucket.org', 'code.sf.net', 'code.launchpad.net')
statistics += '## Entries with a code repository not on a popular site\n\n'
entries = []
field = 'code repository'
for info in infos:
if field in info:
popular = False
for repo in info[field]:
for popular_repo in popular_code_repositories:
if popular_repo in repo:
popular = True
break
# if there were repositories, but none popular, add them to the list
if not popular:
entries.append(info['Name'])
# print(info[field])
entries.sort(key=str.casefold)
statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'
# Code dependencies
statistics += '## Code dependencies\n\n'
field = 'code dependencies'
# get all code dependencies together
code_dependencies = []
entries_with_code_dependency = 0
for info in infos:
if field in info:
code_dependencies.extend(info[field])
entries_with_code_dependency += 1
statistics += 'With code dependency field {} ({:.1f}%)\n\n'.format(entries_with_code_dependency,
rel(entries_with_code_dependency))
unique_code_dependencies = set(code_dependencies)
unique_code_dependencies = [(l, code_dependencies.count(l) / len(code_dependencies)) for l in
unique_code_dependencies]
unique_code_dependencies.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_code_dependencies.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_code_dependencies = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_code_dependencies]
statistics += '##### Code dependencies frequency\n\n' + '\n'.join(unique_code_dependencies) + '\n\n'
# Build systems:
statistics += '## Build systems\n\n'
field = 'build system'
# get all build systems together
build_systems = []
for info in infos:
if field in info:
build_systems.extend(info[field])
statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format(rel(len(build_systems)))
unique_build_systems = set(build_systems)
unique_build_systems = [(l, build_systems.count(l) / len(build_systems)) for l in unique_build_systems]
unique_build_systems.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_build_systems.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_build_systems = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_build_systems]
statistics += '##### Build systems frequency ({})\n\n'.format(len(build_systems)) + '\n'.join(
unique_build_systems) + '\n\n'
# C, C++ projects without build system information
c_cpp_project_without_build_system = []
for info in infos:
if field not in info and ('C' in info['code language'] or 'C++' in info['code language']):
c_cpp_project_without_build_system.append(info['Name'])
c_cpp_project_without_build_system.sort(key=str.casefold)
statistics += '##### C and C++ projects without build system information ({})\n\n'.format(
len(c_cpp_project_without_build_system)) + ', '.join(c_cpp_project_without_build_system) + '\n\n'
# C, C++ projects with build system information but without CMake as build system
c_cpp_project_not_cmake = []
for info in infos:
if field in info and 'CMake' in info[field] and (
'C' in info['code language'] or 'C++' in info['code language']):
c_cpp_project_not_cmake.append(info['Name'])
c_cpp_project_not_cmake.sort(key=str.casefold)
statistics += '##### C and C++ projects with a build system different from CMake ({})\n\n'.format(
len(c_cpp_project_not_cmake)) + ', '.join(c_cpp_project_not_cmake) + '\n\n'
# Platform
statistics += '## Platform\n\n'
field = 'platform'
# get all platforms together
platforms = []
for info in infos:
if field in info:
platforms.extend(info[field])
statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format(rel(len(platforms)))
unique_platforms = set(platforms)
unique_platforms = [(l, platforms.count(l) / len(platforms)) for l in unique_platforms]
unique_platforms.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_platforms.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_platforms = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_platforms]
statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n'
# write to statistics file
utils.write_text(statistics_file, statistics)
def export_json(infos): def export_json(infos):
@ -720,18 +233,6 @@ def export_git_code_repositories_json():
utils.write_text(json_path, text) utils.write_text(json_path, text)
def sort_text_file(file, name):
"""
Reads a text file, splits in lines, removes duplicates, sort, writes back.
"""
text = utils.read_text(file)
text = text.split('\n')
text = sorted(list(set(text)), key=str.casefold)
print('{} contains {} items'.format(name, len(text)))
text = '\n'.join(text)
utils.write_text(file, text)
def check_validity_backlog(): def check_validity_backlog():
import requests import requests
@ -792,36 +293,6 @@ def check_code_dependencies(infos):
print('{} ({})'.format(*dep)) print('{} ({})'.format(*dep))
if __name__ == "__main__":
check_validity_backlog()
# fix entries
fix_entries()
# recount and write to readme and to tocs
update_readme_and_tocs(infos)
# generate report
update_statistics(infos)
# update database for html table
export_json(infos)
# collect list of primary code repositories
export_primary_code_repositories_json(infos)
# check code dependencies
check_code_dependencies(infos)
# collect list of git code repositories (only one per project) for git_statistics script
export_git_code_repositories_json()
# check external links (only rarely)
# check_validity_external_links()
# sort rejected games list file
sort_text_file(os.path.join(c.root_path, 'code', 'rejected.txt'), 'rejected games list')

581
code/maintenance_entries.py Normal file
View File

@ -0,0 +1,581 @@
"""
Runs a series of maintenance operations on the collection of entry files, updating the table of content files for
each category as well as creating a statistics file.
Counts the number of records each sub-folder and updates the overview.
Sorts the entries in the contents files of each sub folder alphabetically.
"""
import os
import re
import datetime
from utils import osg, osg_ui, utils, constants as c
import requests
def create_toc(title, file, entries):
"""
"""
# file path
toc_file = os.path.join(c.tocs_path, file)
# header line
text = '[comment]: # (autogenerated content, do not edit)\n# {}\n\n'.format(title)
# assemble rows
rows = []
for entry in entries:
info = entry['Code language'] + entry['Code license'] + entry['State']
info = [x.value for x in info]
rows.append('- **[{}]({})** ({})'.format(entry['Title'], '../' + entry['File'], ', '.join(info)))
# sort rows (by title)
rows.sort(key=str.casefold)
# add to text
text += '\n'.join(rows)
# write to toc file
utils.write_text(toc_file, text)
print('Readme and TOCs updated')
def sort_text_file(file, name):
"""
Reads a text file, splits in lines, removes duplicates, sort, writes back.
"""
text = utils.read_text(file)
text = text.split('\n')
text = sorted(list(set(text)), key=str.casefold)
print('{} contains {} items'.format(name, len(text)))
text = '\n'.join(text)
utils.write_text(file, text)
class EntriesMaintainer:
def __init__(self):
self.entries = None
def read_entries(self):
self.entries = osg.read_entries()
print('{} entries read'.format(len(self.entries)))
def write_entries(self):
if not self.entries:
print('entries not yet loaded')
return
osg.write_entries(self.entries)
print('entries written')
def check_template_leftovers(self):
"""
Checks for template leftovers.
Should be run only occasionally.
"""
# load template and get all lines
text = utils.read_text(os.path.join(c.root_path, 'template.md'))
text = text.split('\n')
check_strings = [x for x in text if x and not x.startswith('##')]
# iterate over all entries
for _, entry_path, content in osg.entry_iterator():
for check_string in check_strings:
if content.find(check_string) >= 0:
print('{}: found {}'.format(os.path.basename(entry_path), check_string))
print('checked for template leftovers')
def clean_rejected(self):
"""
:return:
"""
# sort rejected games list file
sort_text_file(os.path.join(c.root_path, 'code', 'rejected.txt'), 'rejected games list')
def clean_backlog(self):
"""
:return:
"""
if not self.entries:
print('entries not yet loaded')
return
# get urls from entries
included_urls = osg.all_urls(self.entries)
included_urls = list(included_urls.keys()) # only need the URLs here
# get urls from rejected file
text = utils.read_text(c.rejected_file)
regex = re.compile(r"\((http.*?)\)", re.MULTILINE)
matches = regex.findall(text)
rejected_urls = []
for match in matches:
urls = match.split(',')
urls = [x.strip() for x in urls]
rejected_urls.extend(urls)
included_urls.extend(rejected_urls)
# those that only have a web archive version, also get the original version
more_urls = []
for url in included_urls:
if url.startswith('https://web.archive.org/web'):
# print(url) # sometimes the http is missing in archive links (would need proper parsing)
url = url[url.index('http', 5):]
more_urls.append(url)
included_urls.extend(more_urls)
# now we strip the urls
stripped_urls = [utils.strip_url(x) for x in included_urls]
stripped_urls = set(stripped_urls) # removes duplicates for performance
# read backlog and get urls from there
text = utils.read_text(c.backlog_file)
text = text.split('\n')
# remove those that are in stripped_game_urls
text = [x for x in text if utils.strip_url(x) not in stripped_urls]
# remove duplicates and sort
text = sorted(list(set(text)), key=str.casefold)
print('backlog contains {} items'.format(len(text)))
# join and save again
text = '\n'.join(text)
utils.write_text(c.backlog_file, text)
print('backlog cleaned')
def check_external_links(self):
"""
Checks all external links it can find for validity. Prints those with non OK HTTP responses. Does only need to be run
from time to time.
"""
# regex for finding urls (can be in <> or in ]() or after a whitespace
regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n\)]")
# ignore the following patterns (they give false positives here)
ignored_urls = (
'https://git.tukaani.org/xz.git', 'https://git.code.sf.net/', 'http://hg.hedgewars.org/hedgewars/',
'https://git.xiph.org/vorbis.git', 'http://svn.uktrainsim.com/svn/openrails', 'https://www.srb2.org/',
'http://wiki.srb2.org/')
# some do redirect, but we nedertheless want the original URL in the database
redirect_okay = ('https://octaforge.org/', 'https://svn.openttd.org/', 'https://godotengine.org/download')
# extract all links from entries
import urllib3
urllib3.disable_warnings() # otherwise we cannot verify those with SSL errors without getting warnings
urls = {}
for entry, _, content in osg.entry_iterator():
# apply regex
matches = regex.findall(content)
# for each match
for match in matches:
for url in match:
if url and not any((url.startswith(x) for x in ignored_urls)):
# ignore bzr.sourceforge, no web address found
if 'bzr.sourceforge.net/bzrroot/' in url:
continue
# add "/" at the end
if any((url.startswith(x) for x in (
'https://anongit.freedesktop.org/git', 'https://git.savannah.gnu.org/git/',
'https://git.savannah.nongnu.org/git/', 'https://git.artsoft.org/'))):
url += '/'
if url.startswith('https://bitbucket.org/') and url.endswith('.git'):
url = url[:-4] + '/commits/'
if url.startswith('https://svn.code.sf.net/p/'):
url = 'http' + url[5:] + '/'
if url.startswith('http://cvs.savannah.nongnu.org:/sources/'):
url = 'http://cvs.savannah.nongnu.org/viewvc/' + url[40:] + '/'
if url.startswith('http://cvs.savannah.gnu.org:/sources/'):
url = 'http://cvs.savannah.gnu.org/viewvc/' + url[37:] + '/'
# generally ".git" at the end is not working well, except sometimes
if url.endswith('.git') and not any((url.startswith(x) for x in (
'https://repo.or.cz', 'https://git.tuxfamily.org/fanwor/fanwor'))):
url = url[:-4]
if url in urls:
urls[url].add(entry)
else:
urls[url] = {entry}
print('found {} unique links'.format(len(urls)))
print("start checking external links (can take a while)")
# now iterate over all urls
for url, names in urls.items():
names = list(names) # was a set
if len(names) == 1:
names = names[0]
try:
verify = True
# some have an expired certificate but otherwise still work
if any((url.startswith(x) for x in (
'https://perso.b2b2c.ca/~sarrazip/dev/', 'https://dreerally.com/', 'https://henlin.net/',
'https://www.megamek.org/', 'https://pixeldoctrine.com/', 'https://gitorious.org/',
'https://www.opmon-game.ga/'))):
verify = False
r = requests.head(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=20,
allow_redirects=True, verify=verify)
if r.status_code == 405: # head method not supported, try get
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'},
timeout=20, allow_redirects=True, verify=verify)
# check for bad status
if r.status_code != requests.codes.ok:
print('{}: {} - {}'.format(names, url, r.status_code))
# check for redirect
if r.history and url not in redirect_okay:
# only / added or http->https sometimes
redirected_url = r.url
if redirected_url == url + '/':
output = '{}: {} -> {} - redirect "/" at end '
elif redirected_url == 'https' + url[4:]:
output = '{}: {} -> {} - redirect "https" at start'
else:
output = '{}: {} -> {} - redirect '
print(output.format(names, url, redirected_url))
except Exception as e:
error_name = type(e).__name__
if error_name == 'SSLError' and any((url.startswith(x) for x in (
'https://gitorious.org/', 'https://www.freedroid.org/download/'))):
continue # even though verify is False, these errors still get through
print('{}: {} - exception {}'.format(names, url, error_name))
def update_readme_tocs(self):
"""
Recounts entries in sub categories and writes them to the readme.
Also updates the _toc files in the categories directories.
Note: The Readme must have a specific structure at the beginning, starting with "# Open Source Games" and ending
on "A collection.."
Needs to be performed regularly.
"""
# completely delete content of toc path
for file in os.listdir(c.tocs_path):
os.remove(os.path.join(c.tocs_path, file))
# read readme
readme_file = os.path.join(c.root_path, 'README.md')
readme_text = utils.read_text(readme_file)
# compile regex for identifying the building blocks in the readme
regex = re.compile(r"(.*?)(\[comment\]: # \(start.*?end of autogenerated content\))(.*)", re.DOTALL)
# apply regex
matches = regex.findall(readme_text)
if len(matches) != 1:
raise RuntimeError('readme file has invalid structure')
matches = matches[0]
start = matches[0]
end = matches[2]
tocs_text = ''
# split into games, tools, frameworks, libraries
games = [x for x in self.entries if not any([y in x['Keywords'] for y in ('tool', 'framework', 'library')])]
tools = [x for x in self.entries if 'tool' in x['Keywords']]
frameworks = [x for x in self.entries if 'framework' in x['Keywords']]
libraries = [x for x in self.entries if 'library' in x['Keywords']]
# create games, tools, frameworks, libraries tocs
title = 'Games'
file = '_games.md'
tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(games))
create_toc(title, file, games)
title = 'Tools'
file = '_tools.md'
tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(tools))
create_toc(title, file, tools)
title = 'Frameworks'
file = '_frameworks.md'
tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(frameworks))
create_toc(title, file, frameworks)
title = 'Libraries'
file = '_libraries.md'
tocs_text += '**[{}](entries/tocs/{}#{})** ({})\n'.format(title, file, title, len(libraries))
create_toc(title, file, libraries)
# create by category
categories_text = []
for keyword in c.recommended_keywords:
filtered = [x for x in self.entries if keyword in x['Keywords']]
title = keyword.capitalize()
name = keyword.replace(' ', '-')
file = '_{}.md'.format(name)
categories_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(title, file, name, len(filtered)))
create_toc(title, file, filtered)
categories_text.sort()
tocs_text += '\nBy category: {}\n'.format(', '.join(categories_text))
# create by platform
platforms_text = []
for platform in c.valid_platforms:
filtered = [x for x in self.entries if platform in x.get('Platform', [])]
title = platform
name = platform.lower()
file = '_{}.md'.format(name)
platforms_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(title, file, name, len(filtered)))
create_toc(title, file, filtered)
tocs_text += '\nBy platform: {}\n'.format(', '.join(platforms_text))
# insert new text in the middle (the \n before the second comment is necessary, otherwise Markdown displays it as part of the bullet list)
text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + tocs_text + "\n[comment]: # (end of autogenerated content)" + end
# write to readme
utils.write_text(readme_file, text)
def update_statistics(self):
"""
Generates the statistics page.
Should be done every time the entries change.
"""
if not self.entries:
print('entries not yet loaded')
return
# start the page
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
# total number
number_entries = len(self.entries)
rel = lambda x: x / number_entries * 100 # conversion to percent
statistics += 'analyzed {} entries on {}\n\n'.format(number_entries,
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
# State (beta, mature, inactive)
statistics += '## State\n\n'
number_state_beta = sum(1 for x in self.entries if 'beta' in x['State'])
number_state_mature = sum(1 for x in self.entries if 'mature' in x['State'])
number_inactive = sum(1 for x in self.entries if osg.is_inactive(x))
statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(
number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive,
rel(number_inactive))
if number_inactive > 0:
entries_inactive = [(x['Title'], osg.extract_inactive_year(x)) for x in self.entries if osg.is_inactive(x)]
entries_inactive.sort(key=lambda x: str.casefold(x[0])) # first sort by name
entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n'
# Language
statistics += '## Code Languages\n\n'
field = 'Code language'
# get all languages together
languages = []
for entry in self.entries:
languages.extend(entry[field])
languages = [x.value for x in languages]
unique_languages = set(languages)
unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages]
unique_languages.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_languages.sort(key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first)
unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_languages]
statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n'
# Licenses
statistics += '## Code licenses\n\n'
field = 'Code license'
# get all licenses together
licenses = []
for entry in self.entries:
licenses.extend(entry[field])
licenses = [x.value for x in licenses]
unique_licenses = set(licenses)
unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses]
unique_licenses.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_licenses.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_licenses]
statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n'
# Keywords
statistics += '## Keywords\n\n'
field = 'Keywords'
# get all keywords together
keywords = []
for entry in self.entries:
keywords.extend(entry[field])
keywords = [x.value for x in keywords]
# reduce those starting with "multiplayer"
keywords = [x if not x.startswith('multiplayer') else 'multiplayer' for x in keywords]
unique_keywords = set(keywords)
unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords]
unique_keywords.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_keywords.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_keywords = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_keywords]
statistics += '##### Keywords frequency\n\n' + '\n'.join(unique_keywords) + '\n\n'
# no download or play field
statistics += '## Entries without download or play fields\n\n'
entries = []
for entry in self.entries:
if 'Download' not in entry and 'Play' not in entry:
entries.append(entry['Title'])
entries.sort(key=str.casefold)
statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'
# code hosted not on github, gitlab, bitbucket, launchpad, sourceforge
popular_code_repositories = ('github.com', 'gitlab.com', 'bitbucket.org', 'code.sf.net', 'code.launchpad.net')
statistics += '## Entries with a code repository not on a popular site\n\n'
entries = []
field = 'Code repository'
for entry in self.entries:
popular = False
for repo in entry[field]:
for popular_repo in popular_code_repositories:
if popular_repo in repo.value:
popular = True
break
# if there were repositories, but none popular, add them to the list
if not popular:
entries.append(entry['Title'])
# print(info[field])
entries.sort(key=str.casefold)
statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'
# Code dependencies
statistics += '## Code dependencies\n\n'
field = 'Code dependencies'
# get all code dependencies together
code_dependencies = []
entries_with_code_dependency = 0
for entry in self.entries:
if field in entry:
code_dependencies.extend(entry[field])
entries_with_code_dependency += 1
code_dependencies = [x.value for x in code_dependencies]
statistics += 'With code dependency field {} ({:.1f}%)\n\n'.format(entries_with_code_dependency,
rel(entries_with_code_dependency))
unique_code_dependencies = set(code_dependencies)
unique_code_dependencies = [(l, code_dependencies.count(l) / len(code_dependencies)) for l in
unique_code_dependencies]
unique_code_dependencies.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_code_dependencies.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_code_dependencies = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_code_dependencies]
statistics += '##### Code dependencies frequency\n\n' + '\n'.join(unique_code_dependencies) + '\n\n'
# Build systems:
statistics += '## Build systems\n\n'
field = 'Build system'
# get all build systems together
build_systems = []
for entry in self.entries:
if field in entry['Building']:
build_systems.extend(entry['Building'][field])
build_systems = [x.value for x in build_systems]
statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format(
rel(len(build_systems)))
unique_build_systems = set(build_systems)
unique_build_systems = [(l, build_systems.count(l) / len(build_systems)) for l in unique_build_systems]
unique_build_systems.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_build_systems.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_build_systems = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_build_systems]
statistics += '##### Build systems frequency ({})\n\n'.format(len(build_systems)) + '\n'.join(
unique_build_systems) + '\n\n'
# C, C++ projects without build system information
c_cpp_project_without_build_system = []
for entry in self.entries:
if field not in entry and ('C' in entry['Code language'] or 'C++' in entry['Code language']):
c_cpp_project_without_build_system.append(entry['Title'])
c_cpp_project_without_build_system.sort(key=str.casefold)
statistics += '##### C and C++ projects without build system information ({})\n\n'.format(
len(c_cpp_project_without_build_system)) + ', '.join(c_cpp_project_without_build_system) + '\n\n'
# C, C++ projects with build system information but without CMake as build system
c_cpp_project_not_cmake = []
for entry in entries:
if field in entry and 'CMake' in entry[field] and (
'C' in entry['Code language'] or 'C++' in entry['Code language']):
c_cpp_project_not_cmake.append(entry['Title'])
c_cpp_project_not_cmake.sort(key=str.casefold)
statistics += '##### C and C++ projects with a build system different from CMake ({})\n\n'.format(
len(c_cpp_project_not_cmake)) + ', '.join(c_cpp_project_not_cmake) + '\n\n'
# Platform
statistics += '## Platform\n\n'
field = 'Platform'
# get all platforms together
platforms = []
for entry in self.entries:
if field in entry:
platforms.extend(entry[field])
platforms = [x.value for x in platforms]
statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format(rel(len(platforms)))
unique_platforms = set(platforms)
unique_platforms = [(l, platforms.count(l) / len(platforms)) for l in unique_platforms]
unique_platforms.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_platforms.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_platforms = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_platforms]
statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n'
# write to statistics file
utils.write_text(c.statistics_file, statistics)
print('statistics updated')
def update_html(self):
pass
def update_repos(self):
pass
def complete_run(self):
pass
if __name__ == "__main__":
m = EntriesMaintainer()
actions = {
'Read entries': m.read_entries,
'Write entries': m.write_entries,
'Check template leftovers': m.check_template_leftovers,
'Check external links': m.check_external_links,
'Check rejected entries': m.clean_rejected,
'Check external links (takes quite long)': m.check_external_links,
'Clean backlog': m.clean_backlog,
'Update Readme and TOCs': m.update_readme_tocs,
'Update statistics': m.update_statistics,
'Update HTML': m.update_html,
'Update repository list': m.update_repos,
'Complete run': m.complete_run
}
osg_ui.run_simple_button_app('Entries developer', actions)

View File

@ -16,6 +16,7 @@ developer_file = os.path.join(root_path, 'developers.md')
backlog_file = os.path.join(code_path, 'backlog.txt') backlog_file = os.path.join(code_path, 'backlog.txt')
rejected_file = os.path.join(code_path, 'rejected.txt') rejected_file = os.path.join(code_path, 'rejected.txt')
statistics_file = os.path.join(root_path, 'statistics.md')
# local config # local config
local_config_file = os.path.join(root_path, 'local-config.ini') local_config_file = os.path.join(root_path, 'local-config.ini')

View File

@ -370,11 +370,16 @@ def check_and_process_entry(entry):
return entry return entry
def is_inactive(entry):
state = entry['State']
phrase = 'inactive since '
return any(x.startswith(phrase) for x in state)
def extract_inactive_year(entry): def extract_inactive_year(entry):
state = entry['State'] state = entry['State']
phrase = 'inactive since ' phrase = 'inactive since '
inactive_year = [x[len(phrase):] for x in state if x.startswith(phrase)] inactive_year = [x.value[len(phrase):] for x in state if x.startswith(phrase)]
assert len(inactive_year) <= 1 assert len(inactive_year) <= 1
if inactive_year: if inactive_year:
return inactive_year[0] return inactive_year[0]

View File

@ -13,7 +13,6 @@
- Code dependencies: SDL - Code dependencies: SDL
Puzzle game based on Oxyd. Puzzle game based on Oxyd.
Inspired by Oxyd.
## Building ## Building

View File

@ -5,7 +5,7 @@
- Inspirations: Iron Seed - Inspirations: Iron Seed
- State: mature, inactive since 2013 - State: mature, inactive since 2013
- Download: https://web.archive.org/web/20150802151352/http://www.ironseed.com/ironseed-v1.20.0016-2013-03-17.zip - Download: https://web.archive.org/web/20150802151352/http://www.ironseed.com/ironseed-v1.20.0016-2013-03-17.zip
- Keywords: remake, inspired by Iron Seed - Keywords: remake
- Code repository: @see-download - Code repository: @see-download
- Code language: Pascal - Code language: Pascal
- Code license: GPL-3.0 (not with the source code) - Code license: GPL-3.0 (not with the source code)