maintenance scripts improved
This commit is contained in:
parent
82af77b017
commit
c985780dc2
@ -1,497 +1,10 @@
|
|||||||
"""
|
|
||||||
Runs a series of maintenance operations on the collection of entry files, updating the table of content files for
|
|
||||||
each category as well as creating a statistics file.
|
|
||||||
|
|
||||||
Counts the number of records each sub-folder and updates the overview.
|
|
||||||
Sorts the entries in the contents files of each sub folder alphabetically.
|
|
||||||
|
|
||||||
This script runs with Python 3, it could also with Python 2 with some minor tweaks probably.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import requests
|
|
||||||
import datetime
|
|
||||||
import json
|
import json
|
||||||
import textwrap
|
import textwrap
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import utils.constants
|
import utils.constants
|
||||||
from utils import constants as c, utils, osg
|
from utils import constants as c, utils
|
||||||
|
|
||||||
def extract_links():
|
|
||||||
"""
|
|
||||||
Parses all entries and extracts http(s) links from them
|
|
||||||
"""
|
|
||||||
|
|
||||||
# regex for finding urls (can be in <> or in ]() or after a whitespace
|
|
||||||
regex = re.compile(r"[\s\n]<(http.+?)>|]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n,]")
|
|
||||||
|
|
||||||
# iterate over all entries
|
|
||||||
urls = set()
|
|
||||||
for _, _, content in entry_iterator():
|
|
||||||
|
|
||||||
# apply regex
|
|
||||||
matches = regex.findall(content)
|
|
||||||
|
|
||||||
# for each match
|
|
||||||
for match in matches:
|
|
||||||
|
|
||||||
# for each possible clause
|
|
||||||
for url in match:
|
|
||||||
|
|
||||||
# if there was something (and not a sourceforge git url)
|
|
||||||
if url:
|
|
||||||
urls.add(url)
|
|
||||||
urls = sorted(list(urls), key=str.casefold)
|
|
||||||
return urls
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def check_validity_external_links():
|
|
||||||
"""
|
|
||||||
Checks all external links it can find for validity. Prints those with non OK HTTP responses. Does only need to be run
|
|
||||||
from time to time.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# regex for finding urls (can be in <> or in ]() or after a whitespace
|
|
||||||
regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n\)]")
|
|
||||||
|
|
||||||
# ignore the following patterns (they give false positives here)
|
|
||||||
ignored_urls = ('https://git.tukaani.org/xz.git', 'https://git.code.sf.net/', 'http://hg.hedgewars.org/hedgewars/', 'https://git.xiph.org/vorbis.git', 'http://svn.uktrainsim.com/svn/openrails', 'https://www.srb2.org/', 'http://wiki.srb2.org/')
|
|
||||||
|
|
||||||
# some do redirect, but we nedertheless want the original URL in the database
|
|
||||||
redirect_okay = ('https://octaforge.org/', 'https://svn.openttd.org/', 'https://godotengine.org/download')
|
|
||||||
|
|
||||||
# extract all links from entries
|
|
||||||
import urllib3
|
|
||||||
urllib3.disable_warnings() # otherwise we cannot verify those with SSL errors without getting warnings
|
|
||||||
urls = {}
|
|
||||||
for entry, _, content in osg.entry_iterator():
|
|
||||||
# apply regex
|
|
||||||
matches = regex.findall(content)
|
|
||||||
# for each match
|
|
||||||
for match in matches:
|
|
||||||
for url in match:
|
|
||||||
if url and not any((url.startswith(x) for x in ignored_urls)):
|
|
||||||
# ignore bzr.sourceforge, no web address found
|
|
||||||
if 'bzr.sourceforge.net/bzrroot/' in url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# add "/" at the end
|
|
||||||
if any((url.startswith(x) for x in ('https://anongit.freedesktop.org/git', 'https://git.savannah.gnu.org/git/', 'https://git.savannah.nongnu.org/git/', 'https://git.artsoft.org/'))):
|
|
||||||
url += '/'
|
|
||||||
|
|
||||||
if url.startswith('https://bitbucket.org/') and url.endswith('.git'):
|
|
||||||
url = url[:-4] + '/commits/'
|
|
||||||
if url.startswith('https://svn.code.sf.net/p/'):
|
|
||||||
url = 'http' + url[5:] + '/'
|
|
||||||
if url.startswith('http://cvs.savannah.nongnu.org:/sources/'):
|
|
||||||
url = 'http://cvs.savannah.nongnu.org/viewvc/' + url[40:] + '/'
|
|
||||||
if url.startswith('http://cvs.savannah.gnu.org:/sources/'):
|
|
||||||
url = 'http://cvs.savannah.gnu.org/viewvc/' + url[37:] + '/'
|
|
||||||
|
|
||||||
# generally ".git" at the end is not working well, except sometimes
|
|
||||||
if url.endswith('.git') and not any((url.startswith(x) for x in ('https://repo.or.cz', 'https://git.tuxfamily.org/fanwor/fanwor'))):
|
|
||||||
url = url[:-4]
|
|
||||||
|
|
||||||
if url in urls:
|
|
||||||
urls[url].add(entry)
|
|
||||||
else:
|
|
||||||
urls[url] = {entry}
|
|
||||||
print('found {} unique links'.format(len(urls)))
|
|
||||||
print("start checking external links (can take a while)")
|
|
||||||
|
|
||||||
# now iterate over all urls
|
|
||||||
for url, names in urls.items():
|
|
||||||
names = list(names) # was a set
|
|
||||||
if len(names) == 1:
|
|
||||||
names = names[0]
|
|
||||||
try:
|
|
||||||
verify = True
|
|
||||||
# some have an expired certificate but otherwise still work
|
|
||||||
if any((url.startswith(x) for x in ('https://perso.b2b2c.ca/~sarrazip/dev/', 'https://dreerally.com/', 'https://henlin.net/', 'https://www.megamek.org/', 'https://pixeldoctrine.com/', 'https://gitorious.org/', 'https://www.opmon-game.ga/'))):
|
|
||||||
verify = False
|
|
||||||
r = requests.head(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=20, allow_redirects=True, verify=verify)
|
|
||||||
if r.status_code == 405: # head method not supported, try get
|
|
||||||
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=20, allow_redirects=True, verify=verify)
|
|
||||||
# check for bad status
|
|
||||||
if r.status_code != requests.codes.ok:
|
|
||||||
print('{}: {} - {}'.format(names, url, r.status_code))
|
|
||||||
# check for redirect
|
|
||||||
if r.history and url not in redirect_okay:
|
|
||||||
# only / added or http->https sometimes
|
|
||||||
redirected_url = r.url
|
|
||||||
if redirected_url == url + '/':
|
|
||||||
output = '{}: {} -> {} - redirect "/" at end '
|
|
||||||
elif redirected_url == 'https' + url[4:]:
|
|
||||||
output = '{}: {} -> {} - redirect "https" at start'
|
|
||||||
else:
|
|
||||||
output = '{}: {} -> {} - redirect '
|
|
||||||
print(output.format(names, url, redirected_url))
|
|
||||||
except Exception as e:
|
|
||||||
error_name = type(e).__name__
|
|
||||||
if error_name == 'SSLError' and any((url.startswith(x) for x in ('https://gitorious.org/', 'https://www.freedroid.org/download/'))):
|
|
||||||
continue # even though verify is False, these errors still get through
|
|
||||||
print('{}: {} - exception {}'.format(names, url, error_name))
|
|
||||||
|
|
||||||
|
|
||||||
def fix_entries():
|
|
||||||
"""
|
|
||||||
Fixes the keywords, code dependencies, build systems, .. entries, mostly by automatically sorting them.
|
|
||||||
"""
|
|
||||||
|
|
||||||
keyword_synonyms = {'RTS': ('real time', 'strategy'), 'realtime': 'real time'}
|
|
||||||
|
|
||||||
# TODO also sort other fields, only read once and then do all, move to separate file
|
|
||||||
# example Javascript to JavaScript and then add whenever the known languages check hits
|
|
||||||
|
|
||||||
print('fix entries')
|
|
||||||
|
|
||||||
# keywords
|
|
||||||
regex = re.compile(r"(.*)- Keywords:([^\n]*)(.*)", re.DOTALL)
|
|
||||||
|
|
||||||
# iterate over all entries
|
|
||||||
for entry, entry_path, content in osg.entry_iterator():
|
|
||||||
|
|
||||||
# match with regex
|
|
||||||
matches = regex.findall(content)
|
|
||||||
if len(matches) != 1:
|
|
||||||
raise RuntimeError('Could not find keywords in entry "{}"'.format(entry))
|
|
||||||
|
|
||||||
match = matches[0]
|
|
||||||
|
|
||||||
# get elements out, split, strip, delete duplicates
|
|
||||||
elements = match[1].split(',')
|
|
||||||
elements = [x.strip() for x in elements]
|
|
||||||
elements = list(set(elements))
|
|
||||||
|
|
||||||
# get category out
|
|
||||||
for keyword in utils.constants.recommended_keywords:
|
|
||||||
if keyword in elements:
|
|
||||||
elements.remove(keyword)
|
|
||||||
category = keyword
|
|
||||||
break
|
|
||||||
|
|
||||||
# special treatments here
|
|
||||||
elements = [x if x != 'TBS' and x != 'TB' else 'turn based' for x in elements]
|
|
||||||
elements = [x if x != 'RTS' else 'real time' for x in elements]
|
|
||||||
elements = [x if x != 'MMO' else 'massive multiplayer online' for x in elements]
|
|
||||||
elements = [x if x != 'MMO' else 'multiplayer online' for x in elements]
|
|
||||||
elements = [x if x != 'SP' else 'singleplayer' for x in elements]
|
|
||||||
elements = [x if x != 'MP' else 'multiplayer' for x in elements]
|
|
||||||
elements = [x if x != 'engine' else 'game engine' for x in elements]
|
|
||||||
elements = [x if x != 'rpg' else 'role playing' for x in elements]
|
|
||||||
elements = [x if x != 'turn based' else 'turn-based' for x in elements]
|
|
||||||
for keyword in ('browser', 'misc', 'tools'):
|
|
||||||
if keyword in elements:
|
|
||||||
elements.remove(keyword)
|
|
||||||
|
|
||||||
# sort
|
|
||||||
elements.sort(key=str.casefold)
|
|
||||||
|
|
||||||
# add category
|
|
||||||
elements.insert(0, category)
|
|
||||||
|
|
||||||
keywords = '- Keywords: {}'.format(', '.join(elements))
|
|
||||||
|
|
||||||
new_content = match[0] + keywords + match[2]
|
|
||||||
|
|
||||||
if new_content != content:
|
|
||||||
# write again
|
|
||||||
utils.write_text(entry_path, new_content)
|
|
||||||
|
|
||||||
# code dependencies
|
|
||||||
regex = re.compile(r"(.*)- Code dependencies:([^\n]*)(.*)", re.DOTALL)
|
|
||||||
|
|
||||||
# iterate over all entries
|
|
||||||
for entry, entry_path, content in osg.entry_iterator():
|
|
||||||
# match with regex
|
|
||||||
matches = regex.findall(content)
|
|
||||||
|
|
||||||
if not matches:
|
|
||||||
# no code dependencies given
|
|
||||||
continue
|
|
||||||
|
|
||||||
match = matches[0]
|
|
||||||
|
|
||||||
# get code dependencies out, split, strip, delete duplicates
|
|
||||||
elements = match[1].split(',')
|
|
||||||
elements = [x.strip() for x in elements]
|
|
||||||
elements = list(set(elements))
|
|
||||||
|
|
||||||
# special treatments here
|
|
||||||
elements = [x if x != 'Blender' else 'Blender game engine' for x in elements]
|
|
||||||
elements = [x if x.lower() != 'libgdx' else 'libGDX' for x in elements]
|
|
||||||
elements = [x if x != 'SDL 2' else 'SDL2' for x in elements]
|
|
||||||
elements = [x if x.lower() != "ren'py" else "Ren'Py" for x in elements]
|
|
||||||
|
|
||||||
# sort
|
|
||||||
elements.sort(key=str.casefold)
|
|
||||||
|
|
||||||
code_dependencies = '- Code dependencies: {}'.format(', '.join(elements))
|
|
||||||
|
|
||||||
new_content = match[0] + code_dependencies + match[2]
|
|
||||||
|
|
||||||
if new_content != content:
|
|
||||||
# write again
|
|
||||||
utils.write_text(entry_path, new_content)
|
|
||||||
|
|
||||||
# build systems
|
|
||||||
regex = re.compile(r"(.*)- Build system:([^\n]*)(.*)", re.DOTALL)
|
|
||||||
|
|
||||||
# iterate over all entries
|
|
||||||
for entry, entry_path, content in osg.entry_iterator():
|
|
||||||
# match with regex
|
|
||||||
matches = regex.findall(content)
|
|
||||||
|
|
||||||
if not matches:
|
|
||||||
# no build system given
|
|
||||||
continue
|
|
||||||
|
|
||||||
match = matches[0]
|
|
||||||
|
|
||||||
# get code dependencies out, split, strip, delete duplicates
|
|
||||||
elements = match[1].split(',')
|
|
||||||
elements = [x.strip() for x in elements]
|
|
||||||
elements = list(set(elements))
|
|
||||||
|
|
||||||
# special treatments here
|
|
||||||
|
|
||||||
# sort
|
|
||||||
elements.sort(key=str.casefold)
|
|
||||||
|
|
||||||
build_system = '- Build system: {}'.format(', '.join(elements))
|
|
||||||
|
|
||||||
new_content = match[0] + build_system + match[2]
|
|
||||||
|
|
||||||
if new_content != content:
|
|
||||||
# write again
|
|
||||||
utils.write_text(entry_path, new_content)
|
|
||||||
|
|
||||||
|
|
||||||
def update_statistics(infos):
|
|
||||||
"""
|
|
||||||
Generates the statistics page.
|
|
||||||
|
|
||||||
Should be done every time the entries change.
|
|
||||||
"""
|
|
||||||
|
|
||||||
print('update statistics')
|
|
||||||
|
|
||||||
# start the page
|
|
||||||
statistics_file = os.path.join(c.root_path, 'statistics.md')
|
|
||||||
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
|
|
||||||
|
|
||||||
# total number
|
|
||||||
number_entries = len(infos)
|
|
||||||
rel = lambda x: x / number_entries * 100 # conversion to percent
|
|
||||||
|
|
||||||
statistics += 'analyzed {} entries on {}\n\n'.format(number_entries,
|
|
||||||
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
|
|
||||||
|
|
||||||
# State (beta, mature, inactive)
|
|
||||||
statistics += '## State\n\n'
|
|
||||||
|
|
||||||
number_state_beta = sum(1 for x in infos if 'beta' in x['state'])
|
|
||||||
number_state_mature = sum(1 for x in infos if 'mature' in x['state'])
|
|
||||||
number_inactive = sum(1 for x in infos if 'inactive' in x)
|
|
||||||
statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(
|
|
||||||
number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive,
|
|
||||||
rel(number_inactive))
|
|
||||||
|
|
||||||
if number_inactive > 0:
|
|
||||||
entries_inactive = [(x['Name'], x['inactive']) for x in infos if 'inactive' in x]
|
|
||||||
entries_inactive.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
|
||||||
entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
|
|
||||||
entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
|
|
||||||
statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n'
|
|
||||||
|
|
||||||
# Language
|
|
||||||
statistics += '## Code Languages\n\n'
|
|
||||||
field = 'code language'
|
|
||||||
|
|
||||||
# those without language tag
|
|
||||||
# TODO the language tag is now an essential field, this cannot happen anymore
|
|
||||||
# number_no_language = sum(1 for x in infois if field not in x)
|
|
||||||
# if number_no_language > 0:
|
|
||||||
# statistics += 'Without language tag: {} ({:.1f}%)\n\n'.format(number_no_language, rel(number_no_language))
|
|
||||||
# entries_no_language = [x['Name'] for x in infois if field not in x]
|
|
||||||
# entries_no_language.sort()
|
|
||||||
# statistics += ', '.join(entries_no_language) + '\n\n'
|
|
||||||
|
|
||||||
# get all languages together
|
|
||||||
languages = []
|
|
||||||
for info in infos:
|
|
||||||
if field in info:
|
|
||||||
languages.extend(info[field])
|
|
||||||
|
|
||||||
unique_languages = set(languages)
|
|
||||||
unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages]
|
|
||||||
unique_languages.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
|
||||||
unique_languages.sort(key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first)
|
|
||||||
unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_languages]
|
|
||||||
statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n'
|
|
||||||
|
|
||||||
# Licenses
|
|
||||||
statistics += '## Code licenses\n\n'
|
|
||||||
field = 'code license'
|
|
||||||
|
|
||||||
# those without license
|
|
||||||
number_no_license = sum(1 for x in infos if field not in x)
|
|
||||||
if number_no_license > 0:
|
|
||||||
statistics += 'Without license tag: {} ({:.1f}%)\n\n'.format(number_no_license, rel(number_no_license))
|
|
||||||
entries_no_license = [x['Name'] for x in infos if field not in x]
|
|
||||||
entries_no_license.sort()
|
|
||||||
statistics += ', '.join(entries_no_license) + '\n\n'
|
|
||||||
|
|
||||||
# get all licenses together
|
|
||||||
licenses = []
|
|
||||||
for info in infos:
|
|
||||||
if field in info:
|
|
||||||
licenses.extend(info[field])
|
|
||||||
|
|
||||||
unique_licenses = set(licenses)
|
|
||||||
unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses]
|
|
||||||
unique_licenses.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
|
||||||
unique_licenses.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
|
||||||
unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_licenses]
|
|
||||||
statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n'
|
|
||||||
|
|
||||||
# Keywords
|
|
||||||
statistics += '## Keywords\n\n'
|
|
||||||
field = 'keywords'
|
|
||||||
|
|
||||||
# get all keywords together
|
|
||||||
keywords = []
|
|
||||||
for info in infos:
|
|
||||||
if field in info:
|
|
||||||
keywords.extend(info[field])
|
|
||||||
# reduce those starting with "inspired by"
|
|
||||||
keywords = [x if not x.startswith('inspired by') else 'inspired' for x in keywords]
|
|
||||||
# reduce those starting with "multiplayer"
|
|
||||||
keywords = [x if not x.startswith('multiplayer') else 'multiplayer' for x in keywords]
|
|
||||||
|
|
||||||
unique_keywords = set(keywords)
|
|
||||||
unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords]
|
|
||||||
unique_keywords.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
|
||||||
unique_keywords.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
|
||||||
unique_keywords = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_keywords]
|
|
||||||
statistics += '##### Keywords frequency\n\n' + '\n'.join(unique_keywords) + '\n\n'
|
|
||||||
|
|
||||||
# no download or play field
|
|
||||||
statistics += '## Entries without download or play fields\n\n'
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
for info in infos:
|
|
||||||
if 'download' not in info and 'play' not in info:
|
|
||||||
entries.append(info['Name'])
|
|
||||||
entries.sort(key=str.casefold)
|
|
||||||
statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'
|
|
||||||
|
|
||||||
# code hosted not on github, gitlab, bitbucket, launchpad, sourceforge
|
|
||||||
popular_code_repositories = ('github.com', 'gitlab.com', 'bitbucket.org', 'code.sf.net', 'code.launchpad.net')
|
|
||||||
statistics += '## Entries with a code repository not on a popular site\n\n'
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
field = 'code repository'
|
|
||||||
for info in infos:
|
|
||||||
if field in info:
|
|
||||||
popular = False
|
|
||||||
for repo in info[field]:
|
|
||||||
for popular_repo in popular_code_repositories:
|
|
||||||
if popular_repo in repo:
|
|
||||||
popular = True
|
|
||||||
break
|
|
||||||
# if there were repositories, but none popular, add them to the list
|
|
||||||
if not popular:
|
|
||||||
entries.append(info['Name'])
|
|
||||||
# print(info[field])
|
|
||||||
entries.sort(key=str.casefold)
|
|
||||||
statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'
|
|
||||||
|
|
||||||
# Code dependencies
|
|
||||||
statistics += '## Code dependencies\n\n'
|
|
||||||
field = 'code dependencies'
|
|
||||||
|
|
||||||
# get all code dependencies together
|
|
||||||
code_dependencies = []
|
|
||||||
entries_with_code_dependency = 0
|
|
||||||
for info in infos:
|
|
||||||
if field in info:
|
|
||||||
code_dependencies.extend(info[field])
|
|
||||||
entries_with_code_dependency += 1
|
|
||||||
statistics += 'With code dependency field {} ({:.1f}%)\n\n'.format(entries_with_code_dependency,
|
|
||||||
rel(entries_with_code_dependency))
|
|
||||||
|
|
||||||
unique_code_dependencies = set(code_dependencies)
|
|
||||||
unique_code_dependencies = [(l, code_dependencies.count(l) / len(code_dependencies)) for l in
|
|
||||||
unique_code_dependencies]
|
|
||||||
unique_code_dependencies.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
|
||||||
unique_code_dependencies.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
|
||||||
unique_code_dependencies = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_code_dependencies]
|
|
||||||
statistics += '##### Code dependencies frequency\n\n' + '\n'.join(unique_code_dependencies) + '\n\n'
|
|
||||||
|
|
||||||
# Build systems:
|
|
||||||
statistics += '## Build systems\n\n'
|
|
||||||
field = 'build system'
|
|
||||||
|
|
||||||
# get all build systems together
|
|
||||||
build_systems = []
|
|
||||||
for info in infos:
|
|
||||||
if field in info:
|
|
||||||
build_systems.extend(info[field])
|
|
||||||
|
|
||||||
statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format(rel(len(build_systems)))
|
|
||||||
|
|
||||||
unique_build_systems = set(build_systems)
|
|
||||||
unique_build_systems = [(l, build_systems.count(l) / len(build_systems)) for l in unique_build_systems]
|
|
||||||
unique_build_systems.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
|
||||||
unique_build_systems.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
|
||||||
unique_build_systems = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_build_systems]
|
|
||||||
statistics += '##### Build systems frequency ({})\n\n'.format(len(build_systems)) + '\n'.join(
|
|
||||||
unique_build_systems) + '\n\n'
|
|
||||||
|
|
||||||
# C, C++ projects without build system information
|
|
||||||
c_cpp_project_without_build_system = []
|
|
||||||
for info in infos:
|
|
||||||
if field not in info and ('C' in info['code language'] or 'C++' in info['code language']):
|
|
||||||
c_cpp_project_without_build_system.append(info['Name'])
|
|
||||||
c_cpp_project_without_build_system.sort(key=str.casefold)
|
|
||||||
statistics += '##### C and C++ projects without build system information ({})\n\n'.format(
|
|
||||||
len(c_cpp_project_without_build_system)) + ', '.join(c_cpp_project_without_build_system) + '\n\n'
|
|
||||||
|
|
||||||
# C, C++ projects with build system information but without CMake as build system
|
|
||||||
c_cpp_project_not_cmake = []
|
|
||||||
for info in infos:
|
|
||||||
if field in info and 'CMake' in info[field] and (
|
|
||||||
'C' in info['code language'] or 'C++' in info['code language']):
|
|
||||||
c_cpp_project_not_cmake.append(info['Name'])
|
|
||||||
c_cpp_project_not_cmake.sort(key=str.casefold)
|
|
||||||
statistics += '##### C and C++ projects with a build system different from CMake ({})\n\n'.format(
|
|
||||||
len(c_cpp_project_not_cmake)) + ', '.join(c_cpp_project_not_cmake) + '\n\n'
|
|
||||||
|
|
||||||
# Platform
|
|
||||||
statistics += '## Platform\n\n'
|
|
||||||
field = 'platform'
|
|
||||||
|
|
||||||
# get all platforms together
|
|
||||||
platforms = []
|
|
||||||
for info in infos:
|
|
||||||
if field in info:
|
|
||||||
platforms.extend(info[field])
|
|
||||||
|
|
||||||
statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format(rel(len(platforms)))
|
|
||||||
|
|
||||||
unique_platforms = set(platforms)
|
|
||||||
unique_platforms = [(l, platforms.count(l) / len(platforms)) for l in unique_platforms]
|
|
||||||
unique_platforms.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
|
||||||
unique_platforms.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
|
||||||
unique_platforms = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_platforms]
|
|
||||||
statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n'
|
|
||||||
|
|
||||||
# write to statistics file
|
|
||||||
utils.write_text(statistics_file, statistics)
|
|
||||||
|
|
||||||
|
|
||||||
def export_json(infos):
|
def export_json(infos):
|
||||||
@ -720,18 +233,6 @@ def export_git_code_repositories_json():
|
|||||||
utils.write_text(json_path, text)
|
utils.write_text(json_path, text)
|
||||||
|
|
||||||
|
|
||||||
def sort_text_file(file, name):
|
|
||||||
"""
|
|
||||||
Reads a text file, splits in lines, removes duplicates, sort, writes back.
|
|
||||||
"""
|
|
||||||
text = utils.read_text(file)
|
|
||||||
text = text.split('\n')
|
|
||||||
text = sorted(list(set(text)), key=str.casefold)
|
|
||||||
print('{} contains {} items'.format(name, len(text)))
|
|
||||||
text = '\n'.join(text)
|
|
||||||
utils.write_text(file, text)
|
|
||||||
|
|
||||||
|
|
||||||
def check_validity_backlog():
|
def check_validity_backlog():
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@ -792,36 +293,6 @@ def check_code_dependencies(infos):
|
|||||||
print('{} ({})'.format(*dep))
|
print('{} ({})'.format(*dep))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
|
|
||||||
check_validity_backlog()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# fix entries
|
|
||||||
fix_entries()
|
|
||||||
|
|
||||||
# recount and write to readme and to tocs
|
|
||||||
update_readme_and_tocs(infos)
|
|
||||||
|
|
||||||
# generate report
|
|
||||||
update_statistics(infos)
|
|
||||||
|
|
||||||
# update database for html table
|
|
||||||
export_json(infos)
|
|
||||||
|
|
||||||
# collect list of primary code repositories
|
|
||||||
export_primary_code_repositories_json(infos)
|
|
||||||
|
|
||||||
# check code dependencies
|
|
||||||
check_code_dependencies(infos)
|
|
||||||
|
|
||||||
# collect list of git code repositories (only one per project) for git_statistics script
|
|
||||||
export_git_code_repositories_json()
|
|
||||||
|
|
||||||
# check external links (only rarely)
|
|
||||||
# check_validity_external_links()
|
|
||||||
|
|
||||||
# sort rejected games list file
|
|
||||||
sort_text_file(os.path.join(c.root_path, 'code', 'rejected.txt'), 'rejected games list')
|
|
||||||
|
581
code/maintenance_entries.py
Normal file
581
code/maintenance_entries.py
Normal file
@ -0,0 +1,581 @@
|
|||||||
|
"""
|
||||||
|
Runs a series of maintenance operations on the collection of entry files, updating the table of content files for
|
||||||
|
each category as well as creating a statistics file.
|
||||||
|
|
||||||
|
Counts the number of records each sub-folder and updates the overview.
|
||||||
|
Sorts the entries in the contents files of each sub folder alphabetically.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
from utils import osg, osg_ui, utils, constants as c
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def create_toc(title, file, entries):
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
# file path
|
||||||
|
toc_file = os.path.join(c.tocs_path, file)
|
||||||
|
|
||||||
|
# header line
|
||||||
|
text = '[comment]: # (autogenerated content, do not edit)\n# {}\n\n'.format(title)
|
||||||
|
|
||||||
|
# assemble rows
|
||||||
|
rows = []
|
||||||
|
for entry in entries:
|
||||||
|
info = entry['Code language'] + entry['Code license'] + entry['State']
|
||||||
|
info = [x.value for x in info]
|
||||||
|
rows.append('- **[{}]({})** ({})'.format(entry['Title'], '../' + entry['File'], ', '.join(info)))
|
||||||
|
|
||||||
|
# sort rows (by title)
|
||||||
|
rows.sort(key=str.casefold)
|
||||||
|
|
||||||
|
# add to text
|
||||||
|
text += '\n'.join(rows)
|
||||||
|
|
||||||
|
# write to toc file
|
||||||
|
utils.write_text(toc_file, text)
|
||||||
|
|
||||||
|
print('Readme and TOCs updated')
|
||||||
|
|
||||||
|
|
||||||
|
def sort_text_file(file, name):
|
||||||
|
"""
|
||||||
|
Reads a text file, splits in lines, removes duplicates, sort, writes back.
|
||||||
|
"""
|
||||||
|
text = utils.read_text(file)
|
||||||
|
text = text.split('\n')
|
||||||
|
text = sorted(list(set(text)), key=str.casefold)
|
||||||
|
print('{} contains {} items'.format(name, len(text)))
|
||||||
|
text = '\n'.join(text)
|
||||||
|
utils.write_text(file, text)
|
||||||
|
|
||||||
|
|
||||||
|
class EntriesMaintainer:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.entries = None
|
||||||
|
|
||||||
|
def read_entries(self):
|
||||||
|
self.entries = osg.read_entries()
|
||||||
|
print('{} entries read'.format(len(self.entries)))
|
||||||
|
|
||||||
|
def write_entries(self):
|
||||||
|
if not self.entries:
|
||||||
|
print('entries not yet loaded')
|
||||||
|
return
|
||||||
|
osg.write_entries(self.entries)
|
||||||
|
print('entries written')
|
||||||
|
|
||||||
|
|
||||||
|
def check_template_leftovers(self):
|
||||||
|
"""
|
||||||
|
Checks for template leftovers.
|
||||||
|
Should be run only occasionally.
|
||||||
|
"""
|
||||||
|
# load template and get all lines
|
||||||
|
text = utils.read_text(os.path.join(c.root_path, 'template.md'))
|
||||||
|
text = text.split('\n')
|
||||||
|
check_strings = [x for x in text if x and not x.startswith('##')]
|
||||||
|
|
||||||
|
# iterate over all entries
|
||||||
|
for _, entry_path, content in osg.entry_iterator():
|
||||||
|
|
||||||
|
for check_string in check_strings:
|
||||||
|
if content.find(check_string) >= 0:
|
||||||
|
print('{}: found {}'.format(os.path.basename(entry_path), check_string))
|
||||||
|
print('checked for template leftovers')
|
||||||
|
|
||||||
|
def clean_rejected(self):
|
||||||
|
"""
|
||||||
|
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
# sort rejected games list file
|
||||||
|
sort_text_file(os.path.join(c.root_path, 'code', 'rejected.txt'), 'rejected games list')
|
||||||
|
|
||||||
|
def clean_backlog(self):
|
||||||
|
"""
|
||||||
|
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if not self.entries:
|
||||||
|
print('entries not yet loaded')
|
||||||
|
return
|
||||||
|
# get urls from entries
|
||||||
|
included_urls = osg.all_urls(self.entries)
|
||||||
|
included_urls = list(included_urls.keys()) # only need the URLs here
|
||||||
|
|
||||||
|
# get urls from rejected file
|
||||||
|
text = utils.read_text(c.rejected_file)
|
||||||
|
regex = re.compile(r"\((http.*?)\)", re.MULTILINE)
|
||||||
|
matches = regex.findall(text)
|
||||||
|
rejected_urls = []
|
||||||
|
for match in matches:
|
||||||
|
urls = match.split(',')
|
||||||
|
urls = [x.strip() for x in urls]
|
||||||
|
rejected_urls.extend(urls)
|
||||||
|
included_urls.extend(rejected_urls)
|
||||||
|
|
||||||
|
# those that only have a web archive version, also get the original version
|
||||||
|
more_urls = []
|
||||||
|
for url in included_urls:
|
||||||
|
if url.startswith('https://web.archive.org/web'):
|
||||||
|
# print(url) # sometimes the http is missing in archive links (would need proper parsing)
|
||||||
|
url = url[url.index('http', 5):]
|
||||||
|
more_urls.append(url)
|
||||||
|
included_urls.extend(more_urls)
|
||||||
|
|
||||||
|
# now we strip the urls
|
||||||
|
stripped_urls = [utils.strip_url(x) for x in included_urls]
|
||||||
|
stripped_urls = set(stripped_urls) # removes duplicates for performance
|
||||||
|
|
||||||
|
# read backlog and get urls from there
|
||||||
|
text = utils.read_text(c.backlog_file)
|
||||||
|
text = text.split('\n')
|
||||||
|
|
||||||
|
# remove those that are in stripped_game_urls
|
||||||
|
text = [x for x in text if utils.strip_url(x) not in stripped_urls]
|
||||||
|
|
||||||
|
# remove duplicates and sort
|
||||||
|
text = sorted(list(set(text)), key=str.casefold)
|
||||||
|
print('backlog contains {} items'.format(len(text)))
|
||||||
|
|
||||||
|
# join and save again
|
||||||
|
text = '\n'.join(text)
|
||||||
|
utils.write_text(c.backlog_file, text)
|
||||||
|
|
||||||
|
print('backlog cleaned')
|
||||||
|
|
||||||
|
def check_external_links(self):
|
||||||
|
"""
|
||||||
|
Checks all external links it can find for validity. Prints those with non OK HTTP responses. Does only need to be run
|
||||||
|
from time to time.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# regex for finding urls (can be in <> or in ]() or after a whitespace
|
||||||
|
regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n\)]")
|
||||||
|
|
||||||
|
# ignore the following patterns (they give false positives here)
|
||||||
|
ignored_urls = (
|
||||||
|
'https://git.tukaani.org/xz.git', 'https://git.code.sf.net/', 'http://hg.hedgewars.org/hedgewars/',
|
||||||
|
'https://git.xiph.org/vorbis.git', 'http://svn.uktrainsim.com/svn/openrails', 'https://www.srb2.org/',
|
||||||
|
'http://wiki.srb2.org/')
|
||||||
|
|
||||||
|
# some do redirect, but we nedertheless want the original URL in the database
|
||||||
|
redirect_okay = ('https://octaforge.org/', 'https://svn.openttd.org/', 'https://godotengine.org/download')
|
||||||
|
|
||||||
|
# extract all links from entries
|
||||||
|
import urllib3
|
||||||
|
urllib3.disable_warnings() # otherwise we cannot verify those with SSL errors without getting warnings
|
||||||
|
urls = {}
|
||||||
|
for entry, _, content in osg.entry_iterator():
|
||||||
|
# apply regex
|
||||||
|
matches = regex.findall(content)
|
||||||
|
# for each match
|
||||||
|
for match in matches:
|
||||||
|
for url in match:
|
||||||
|
if url and not any((url.startswith(x) for x in ignored_urls)):
|
||||||
|
# ignore bzr.sourceforge, no web address found
|
||||||
|
if 'bzr.sourceforge.net/bzrroot/' in url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# add "/" at the end
|
||||||
|
if any((url.startswith(x) for x in (
|
||||||
|
'https://anongit.freedesktop.org/git', 'https://git.savannah.gnu.org/git/',
|
||||||
|
'https://git.savannah.nongnu.org/git/', 'https://git.artsoft.org/'))):
|
||||||
|
url += '/'
|
||||||
|
|
||||||
|
if url.startswith('https://bitbucket.org/') and url.endswith('.git'):
|
||||||
|
url = url[:-4] + '/commits/'
|
||||||
|
if url.startswith('https://svn.code.sf.net/p/'):
|
||||||
|
url = 'http' + url[5:] + '/'
|
||||||
|
if url.startswith('http://cvs.savannah.nongnu.org:/sources/'):
|
||||||
|
url = 'http://cvs.savannah.nongnu.org/viewvc/' + url[40:] + '/'
|
||||||
|
if url.startswith('http://cvs.savannah.gnu.org:/sources/'):
|
||||||
|
url = 'http://cvs.savannah.gnu.org/viewvc/' + url[37:] + '/'
|
||||||
|
|
||||||
|
# generally ".git" at the end is not working well, except sometimes
|
||||||
|
if url.endswith('.git') and not any((url.startswith(x) for x in (
|
||||||
|
'https://repo.or.cz', 'https://git.tuxfamily.org/fanwor/fanwor'))):
|
||||||
|
url = url[:-4]
|
||||||
|
|
||||||
|
if url in urls:
|
||||||
|
urls[url].add(entry)
|
||||||
|
else:
|
||||||
|
urls[url] = {entry}
|
||||||
|
print('found {} unique links'.format(len(urls)))
|
||||||
|
print("start checking external links (can take a while)")
|
||||||
|
|
||||||
|
# now iterate over all urls
|
||||||
|
for url, names in urls.items():
|
||||||
|
names = list(names) # was a set
|
||||||
|
if len(names) == 1:
|
||||||
|
names = names[0]
|
||||||
|
try:
|
||||||
|
verify = True
|
||||||
|
# some have an expired certificate but otherwise still work
|
||||||
|
if any((url.startswith(x) for x in (
|
||||||
|
'https://perso.b2b2c.ca/~sarrazip/dev/', 'https://dreerally.com/', 'https://henlin.net/',
|
||||||
|
'https://www.megamek.org/', 'https://pixeldoctrine.com/', 'https://gitorious.org/',
|
||||||
|
'https://www.opmon-game.ga/'))):
|
||||||
|
verify = False
|
||||||
|
r = requests.head(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=20,
|
||||||
|
allow_redirects=True, verify=verify)
|
||||||
|
if r.status_code == 405: # head method not supported, try get
|
||||||
|
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'},
|
||||||
|
timeout=20, allow_redirects=True, verify=verify)
|
||||||
|
# check for bad status
|
||||||
|
if r.status_code != requests.codes.ok:
|
||||||
|
print('{}: {} - {}'.format(names, url, r.status_code))
|
||||||
|
# check for redirect
|
||||||
|
if r.history and url not in redirect_okay:
|
||||||
|
# only / added or http->https sometimes
|
||||||
|
redirected_url = r.url
|
||||||
|
if redirected_url == url + '/':
|
||||||
|
output = '{}: {} -> {} - redirect "/" at end '
|
||||||
|
elif redirected_url == 'https' + url[4:]:
|
||||||
|
output = '{}: {} -> {} - redirect "https" at start'
|
||||||
|
else:
|
||||||
|
output = '{}: {} -> {} - redirect '
|
||||||
|
print(output.format(names, url, redirected_url))
|
||||||
|
except Exception as e:
|
||||||
|
error_name = type(e).__name__
|
||||||
|
if error_name == 'SSLError' and any((url.startswith(x) for x in (
|
||||||
|
'https://gitorious.org/', 'https://www.freedroid.org/download/'))):
|
||||||
|
continue # even though verify is False, these errors still get through
|
||||||
|
print('{}: {} - exception {}'.format(names, url, error_name))
|
||||||
|
|
||||||
|
def update_readme_tocs(self):
|
||||||
|
"""
|
||||||
|
Recounts entries in sub categories and writes them to the readme.
|
||||||
|
Also updates the _toc files in the categories directories.
|
||||||
|
|
||||||
|
Note: The Readme must have a specific structure at the beginning, starting with "# Open Source Games" and ending
|
||||||
|
on "A collection.."
|
||||||
|
|
||||||
|
Needs to be performed regularly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# completely delete content of toc path
|
||||||
|
for file in os.listdir(c.tocs_path):
|
||||||
|
os.remove(os.path.join(c.tocs_path, file))
|
||||||
|
|
||||||
|
# read readme
|
||||||
|
readme_file = os.path.join(c.root_path, 'README.md')
|
||||||
|
readme_text = utils.read_text(readme_file)
|
||||||
|
|
||||||
|
# compile regex for identifying the building blocks in the readme
|
||||||
|
regex = re.compile(r"(.*?)(\[comment\]: # \(start.*?end of autogenerated content\))(.*)", re.DOTALL)
|
||||||
|
|
||||||
|
# apply regex
|
||||||
|
matches = regex.findall(readme_text)
|
||||||
|
if len(matches) != 1:
|
||||||
|
raise RuntimeError('readme file has invalid structure')
|
||||||
|
matches = matches[0]
|
||||||
|
start = matches[0]
|
||||||
|
end = matches[2]
|
||||||
|
|
||||||
|
tocs_text = ''
|
||||||
|
|
||||||
|
# split into games, tools, frameworks, libraries
|
||||||
|
games = [x for x in self.entries if not any([y in x['Keywords'] for y in ('tool', 'framework', 'library')])]
|
||||||
|
tools = [x for x in self.entries if 'tool' in x['Keywords']]
|
||||||
|
frameworks = [x for x in self.entries if 'framework' in x['Keywords']]
|
||||||
|
libraries = [x for x in self.entries if 'library' in x['Keywords']]
|
||||||
|
|
||||||
|
# create games, tools, frameworks, libraries tocs
|
||||||
|
title = 'Games'
|
||||||
|
file = '_games.md'
|
||||||
|
tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(games))
|
||||||
|
create_toc(title, file, games)
|
||||||
|
|
||||||
|
title = 'Tools'
|
||||||
|
file = '_tools.md'
|
||||||
|
tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(tools))
|
||||||
|
create_toc(title, file, tools)
|
||||||
|
|
||||||
|
title = 'Frameworks'
|
||||||
|
file = '_frameworks.md'
|
||||||
|
tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(frameworks))
|
||||||
|
create_toc(title, file, frameworks)
|
||||||
|
|
||||||
|
title = 'Libraries'
|
||||||
|
file = '_libraries.md'
|
||||||
|
tocs_text += '**[{}](entries/tocs/{}#{})** ({})\n'.format(title, file, title, len(libraries))
|
||||||
|
create_toc(title, file, libraries)
|
||||||
|
|
||||||
|
# create by category
|
||||||
|
categories_text = []
|
||||||
|
for keyword in c.recommended_keywords:
|
||||||
|
filtered = [x for x in self.entries if keyword in x['Keywords']]
|
||||||
|
title = keyword.capitalize()
|
||||||
|
name = keyword.replace(' ', '-')
|
||||||
|
file = '_{}.md'.format(name)
|
||||||
|
categories_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(title, file, name, len(filtered)))
|
||||||
|
create_toc(title, file, filtered)
|
||||||
|
categories_text.sort()
|
||||||
|
tocs_text += '\nBy category: {}\n'.format(', '.join(categories_text))
|
||||||
|
|
||||||
|
# create by platform
|
||||||
|
platforms_text = []
|
||||||
|
for platform in c.valid_platforms:
|
||||||
|
filtered = [x for x in self.entries if platform in x.get('Platform', [])]
|
||||||
|
title = platform
|
||||||
|
name = platform.lower()
|
||||||
|
file = '_{}.md'.format(name)
|
||||||
|
platforms_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(title, file, name, len(filtered)))
|
||||||
|
create_toc(title, file, filtered)
|
||||||
|
tocs_text += '\nBy platform: {}\n'.format(', '.join(platforms_text))
|
||||||
|
|
||||||
|
# insert new text in the middle (the \n before the second comment is necessary, otherwise Markdown displays it as part of the bullet list)
|
||||||
|
text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + tocs_text + "\n[comment]: # (end of autogenerated content)" + end
|
||||||
|
|
||||||
|
# write to readme
|
||||||
|
utils.write_text(readme_file, text)
|
||||||
|
|
||||||
|
def update_statistics(self):
|
||||||
|
"""
|
||||||
|
Generates the statistics page.
|
||||||
|
|
||||||
|
Should be done every time the entries change.
|
||||||
|
"""
|
||||||
|
if not self.entries:
|
||||||
|
print('entries not yet loaded')
|
||||||
|
return
|
||||||
|
|
||||||
|
# start the page
|
||||||
|
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
|
||||||
|
|
||||||
|
# total number
|
||||||
|
number_entries = len(self.entries)
|
||||||
|
rel = lambda x: x / number_entries * 100 # conversion to percent
|
||||||
|
|
||||||
|
statistics += 'analyzed {} entries on {}\n\n'.format(number_entries,
|
||||||
|
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
|
||||||
|
|
||||||
|
# State (beta, mature, inactive)
|
||||||
|
statistics += '## State\n\n'
|
||||||
|
|
||||||
|
number_state_beta = sum(1 for x in self.entries if 'beta' in x['State'])
|
||||||
|
number_state_mature = sum(1 for x in self.entries if 'mature' in x['State'])
|
||||||
|
number_inactive = sum(1 for x in self.entries if osg.is_inactive(x))
|
||||||
|
statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(
|
||||||
|
number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive,
|
||||||
|
rel(number_inactive))
|
||||||
|
|
||||||
|
if number_inactive > 0:
|
||||||
|
entries_inactive = [(x['Title'], osg.extract_inactive_year(x)) for x in self.entries if osg.is_inactive(x)]
|
||||||
|
entries_inactive.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
||||||
|
entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
|
||||||
|
entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
|
||||||
|
statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n'
|
||||||
|
|
||||||
|
# Language
|
||||||
|
statistics += '## Code Languages\n\n'
|
||||||
|
field = 'Code language'
|
||||||
|
|
||||||
|
# get all languages together
|
||||||
|
languages = []
|
||||||
|
for entry in self.entries:
|
||||||
|
languages.extend(entry[field])
|
||||||
|
languages = [x.value for x in languages]
|
||||||
|
|
||||||
|
unique_languages = set(languages)
|
||||||
|
unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages]
|
||||||
|
unique_languages.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
||||||
|
unique_languages.sort(key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first)
|
||||||
|
unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_languages]
|
||||||
|
statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n'
|
||||||
|
|
||||||
|
# Licenses
|
||||||
|
statistics += '## Code licenses\n\n'
|
||||||
|
field = 'Code license'
|
||||||
|
|
||||||
|
# get all licenses together
|
||||||
|
licenses = []
|
||||||
|
for entry in self.entries:
|
||||||
|
licenses.extend(entry[field])
|
||||||
|
licenses = [x.value for x in licenses]
|
||||||
|
|
||||||
|
unique_licenses = set(licenses)
|
||||||
|
unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses]
|
||||||
|
unique_licenses.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
||||||
|
unique_licenses.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
||||||
|
unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_licenses]
|
||||||
|
statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n'
|
||||||
|
|
||||||
|
# Keywords
|
||||||
|
statistics += '## Keywords\n\n'
|
||||||
|
field = 'Keywords'
|
||||||
|
|
||||||
|
# get all keywords together
|
||||||
|
keywords = []
|
||||||
|
for entry in self.entries:
|
||||||
|
keywords.extend(entry[field])
|
||||||
|
keywords = [x.value for x in keywords]
|
||||||
|
|
||||||
|
# reduce those starting with "multiplayer"
|
||||||
|
keywords = [x if not x.startswith('multiplayer') else 'multiplayer' for x in keywords]
|
||||||
|
|
||||||
|
unique_keywords = set(keywords)
|
||||||
|
unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords]
|
||||||
|
unique_keywords.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
||||||
|
unique_keywords.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
||||||
|
unique_keywords = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_keywords]
|
||||||
|
statistics += '##### Keywords frequency\n\n' + '\n'.join(unique_keywords) + '\n\n'
|
||||||
|
|
||||||
|
# no download or play field
|
||||||
|
statistics += '## Entries without download or play fields\n\n'
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for entry in self.entries:
|
||||||
|
if 'Download' not in entry and 'Play' not in entry:
|
||||||
|
entries.append(entry['Title'])
|
||||||
|
entries.sort(key=str.casefold)
|
||||||
|
statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'
|
||||||
|
|
||||||
|
# code hosted not on github, gitlab, bitbucket, launchpad, sourceforge
|
||||||
|
popular_code_repositories = ('github.com', 'gitlab.com', 'bitbucket.org', 'code.sf.net', 'code.launchpad.net')
|
||||||
|
statistics += '## Entries with a code repository not on a popular site\n\n'
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
field = 'Code repository'
|
||||||
|
for entry in self.entries:
|
||||||
|
popular = False
|
||||||
|
for repo in entry[field]:
|
||||||
|
for popular_repo in popular_code_repositories:
|
||||||
|
if popular_repo in repo.value:
|
||||||
|
popular = True
|
||||||
|
break
|
||||||
|
# if there were repositories, but none popular, add them to the list
|
||||||
|
if not popular:
|
||||||
|
entries.append(entry['Title'])
|
||||||
|
# print(info[field])
|
||||||
|
entries.sort(key=str.casefold)
|
||||||
|
statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'
|
||||||
|
|
||||||
|
# Code dependencies
|
||||||
|
statistics += '## Code dependencies\n\n'
|
||||||
|
field = 'Code dependencies'
|
||||||
|
|
||||||
|
# get all code dependencies together
|
||||||
|
code_dependencies = []
|
||||||
|
entries_with_code_dependency = 0
|
||||||
|
for entry in self.entries:
|
||||||
|
if field in entry:
|
||||||
|
code_dependencies.extend(entry[field])
|
||||||
|
entries_with_code_dependency += 1
|
||||||
|
code_dependencies = [x.value for x in code_dependencies]
|
||||||
|
statistics += 'With code dependency field {} ({:.1f}%)\n\n'.format(entries_with_code_dependency,
|
||||||
|
rel(entries_with_code_dependency))
|
||||||
|
|
||||||
|
unique_code_dependencies = set(code_dependencies)
|
||||||
|
unique_code_dependencies = [(l, code_dependencies.count(l) / len(code_dependencies)) for l in
|
||||||
|
unique_code_dependencies]
|
||||||
|
unique_code_dependencies.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
||||||
|
unique_code_dependencies.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
||||||
|
unique_code_dependencies = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_code_dependencies]
|
||||||
|
statistics += '##### Code dependencies frequency\n\n' + '\n'.join(unique_code_dependencies) + '\n\n'
|
||||||
|
|
||||||
|
# Build systems:
|
||||||
|
statistics += '## Build systems\n\n'
|
||||||
|
field = 'Build system'
|
||||||
|
|
||||||
|
# get all build systems together
|
||||||
|
build_systems = []
|
||||||
|
for entry in self.entries:
|
||||||
|
if field in entry['Building']:
|
||||||
|
build_systems.extend(entry['Building'][field])
|
||||||
|
build_systems = [x.value for x in build_systems]
|
||||||
|
|
||||||
|
statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format(
|
||||||
|
rel(len(build_systems)))
|
||||||
|
|
||||||
|
unique_build_systems = set(build_systems)
|
||||||
|
unique_build_systems = [(l, build_systems.count(l) / len(build_systems)) for l in unique_build_systems]
|
||||||
|
unique_build_systems.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
||||||
|
unique_build_systems.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
||||||
|
unique_build_systems = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_build_systems]
|
||||||
|
statistics += '##### Build systems frequency ({})\n\n'.format(len(build_systems)) + '\n'.join(
|
||||||
|
unique_build_systems) + '\n\n'
|
||||||
|
|
||||||
|
# C, C++ projects without build system information
|
||||||
|
c_cpp_project_without_build_system = []
|
||||||
|
for entry in self.entries:
|
||||||
|
if field not in entry and ('C' in entry['Code language'] or 'C++' in entry['Code language']):
|
||||||
|
c_cpp_project_without_build_system.append(entry['Title'])
|
||||||
|
c_cpp_project_without_build_system.sort(key=str.casefold)
|
||||||
|
statistics += '##### C and C++ projects without build system information ({})\n\n'.format(
|
||||||
|
len(c_cpp_project_without_build_system)) + ', '.join(c_cpp_project_without_build_system) + '\n\n'
|
||||||
|
|
||||||
|
# C, C++ projects with build system information but without CMake as build system
|
||||||
|
c_cpp_project_not_cmake = []
|
||||||
|
for entry in entries:
|
||||||
|
if field in entry and 'CMake' in entry[field] and (
|
||||||
|
'C' in entry['Code language'] or 'C++' in entry['Code language']):
|
||||||
|
c_cpp_project_not_cmake.append(entry['Title'])
|
||||||
|
c_cpp_project_not_cmake.sort(key=str.casefold)
|
||||||
|
statistics += '##### C and C++ projects with a build system different from CMake ({})\n\n'.format(
|
||||||
|
len(c_cpp_project_not_cmake)) + ', '.join(c_cpp_project_not_cmake) + '\n\n'
|
||||||
|
|
||||||
|
# Platform
|
||||||
|
statistics += '## Platform\n\n'
|
||||||
|
field = 'Platform'
|
||||||
|
|
||||||
|
# get all platforms together
|
||||||
|
platforms = []
|
||||||
|
for entry in self.entries:
|
||||||
|
if field in entry:
|
||||||
|
platforms.extend(entry[field])
|
||||||
|
platforms = [x.value for x in platforms]
|
||||||
|
|
||||||
|
statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format(rel(len(platforms)))
|
||||||
|
|
||||||
|
unique_platforms = set(platforms)
|
||||||
|
unique_platforms = [(l, platforms.count(l) / len(platforms)) for l in unique_platforms]
|
||||||
|
unique_platforms.sort(key=lambda x: str.casefold(x[0])) # first sort by name
|
||||||
|
unique_platforms.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
||||||
|
unique_platforms = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_platforms]
|
||||||
|
statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n'
|
||||||
|
|
||||||
|
# write to statistics file
|
||||||
|
utils.write_text(c.statistics_file, statistics)
|
||||||
|
|
||||||
|
print('statistics updated')
|
||||||
|
|
||||||
|
def update_html(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def update_repos(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def complete_run(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
m = EntriesMaintainer()
|
||||||
|
|
||||||
|
actions = {
|
||||||
|
'Read entries': m.read_entries,
|
||||||
|
'Write entries': m.write_entries,
|
||||||
|
'Check template leftovers': m.check_template_leftovers,
|
||||||
|
'Check external links': m.check_external_links,
|
||||||
|
'Check rejected entries': m.clean_rejected,
|
||||||
|
'Check external links (takes quite long)': m.check_external_links,
|
||||||
|
'Clean backlog': m.clean_backlog,
|
||||||
|
'Update Readme and TOCs': m.update_readme_tocs,
|
||||||
|
'Update statistics': m.update_statistics,
|
||||||
|
'Update HTML': m.update_html,
|
||||||
|
'Update repository list': m.update_repos,
|
||||||
|
'Complete run': m.complete_run
|
||||||
|
}
|
||||||
|
|
||||||
|
osg_ui.run_simple_button_app('Entries developer', actions)
|
||||||
|
|
||||||
|
|
@ -16,6 +16,7 @@ developer_file = os.path.join(root_path, 'developers.md')
|
|||||||
|
|
||||||
backlog_file = os.path.join(code_path, 'backlog.txt')
|
backlog_file = os.path.join(code_path, 'backlog.txt')
|
||||||
rejected_file = os.path.join(code_path, 'rejected.txt')
|
rejected_file = os.path.join(code_path, 'rejected.txt')
|
||||||
|
statistics_file = os.path.join(root_path, 'statistics.md')
|
||||||
|
|
||||||
# local config
|
# local config
|
||||||
local_config_file = os.path.join(root_path, 'local-config.ini')
|
local_config_file = os.path.join(root_path, 'local-config.ini')
|
||||||
|
@ -370,11 +370,16 @@ def check_and_process_entry(entry):
|
|||||||
|
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
|
def is_inactive(entry):
|
||||||
|
state = entry['State']
|
||||||
|
phrase = 'inactive since '
|
||||||
|
return any(x.startswith(phrase) for x in state)
|
||||||
|
|
||||||
|
|
||||||
def extract_inactive_year(entry):
|
def extract_inactive_year(entry):
|
||||||
state = entry['State']
|
state = entry['State']
|
||||||
phrase = 'inactive since '
|
phrase = 'inactive since '
|
||||||
inactive_year = [x[len(phrase):] for x in state if x.startswith(phrase)]
|
inactive_year = [x.value[len(phrase):] for x in state if x.startswith(phrase)]
|
||||||
assert len(inactive_year) <= 1
|
assert len(inactive_year) <= 1
|
||||||
if inactive_year:
|
if inactive_year:
|
||||||
return inactive_year[0]
|
return inactive_year[0]
|
||||||
|
@ -13,7 +13,6 @@
|
|||||||
- Code dependencies: SDL
|
- Code dependencies: SDL
|
||||||
|
|
||||||
Puzzle game based on Oxyd.
|
Puzzle game based on Oxyd.
|
||||||
Inspired by Oxyd.
|
|
||||||
|
|
||||||
## Building
|
## Building
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
- Inspirations: Iron Seed
|
- Inspirations: Iron Seed
|
||||||
- State: mature, inactive since 2013
|
- State: mature, inactive since 2013
|
||||||
- Download: https://web.archive.org/web/20150802151352/http://www.ironseed.com/ironseed-v1.20.0016-2013-03-17.zip
|
- Download: https://web.archive.org/web/20150802151352/http://www.ironseed.com/ironseed-v1.20.0016-2013-03-17.zip
|
||||||
- Keywords: remake, inspired by Iron Seed
|
- Keywords: remake
|
||||||
- Code repository: @see-download
|
- Code repository: @see-download
|
||||||
- Code language: Pascal
|
- Code language: Pascal
|
||||||
- Code license: GPL-3.0 (not with the source code)
|
- Code license: GPL-3.0 (not with the source code)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user