opensourcegames/code/maintenance_entries.py

840 lines
35 KiB
Python

"""
Runs a series of maintenance operations on the collection of entry files, updating the table of content files for
each category as well as creating a statistics file.
Counts the number of records each sub-folder and updates the overview.
Sorts the entries in the contents files of each sub folder alphabetically.
"""
import os
import re
import datetime
import json
import textwrap
from utils import osg, osg_ui, utils, constants as c
import requests
def check_validity_backlog():
import requests
# read backlog and split
file = os.path.join(c.root_path, 'code', 'backlog.txt')
text = utils.read_text(file)
urls = text.split('\n')
urls = [x.split(' ')[0] for x in urls]
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}
for url in urls:
try:
r = requests.get(url, headers=headers, timeout=5)
except Exception as e:
print('{} gave error: {}'.format(url, e))
else:
if r.status_code != requests.codes.ok:
print('{} returned status code: {}'.format(url, r.status_code))
if r.is_redirect or r.history:
print('{} redirected to {}, {}'.format(url, r.url, r.history))
def create_toc(title, file, entries):
"""
"""
# file path
toc_file = os.path.join(c.tocs_path, file)
# header line
text = '[comment]: # (autogenerated content, do not edit)\n# {}\n\n'.format(title)
# assemble rows
rows = []
for entry in entries:
info = entry['Code language'] + entry['Code license'] + entry['State']
info = [x.value for x in info]
rows.append('- **[{}]({})** ({})'.format(entry['Title'], '../' + entry['File'], ', '.join(info)))
# sort rows (by title)
rows.sort(key=str.casefold)
# add to text
text += '\n'.join(rows)
# write to toc file
utils.write_text(toc_file, text)
def sort_text_file(file, name):
"""
Reads a text file, splits in lines, removes duplicates, sort, writes back.
"""
text = utils.read_text(file)
text = text.split('\n')
text = sorted(list(set(text)), key=str.casefold)
print('{} contains {} items'.format(name, len(text)))
text = '\n'.join(text)
utils.write_text(file, text)
class EntriesMaintainer:
def __init__(self):
self.entries = None
def read_entries(self):
self.entries = osg.read_entries()
print('{} entries read'.format(len(self.entries)))
def write_entries(self):
if not self.entries:
print('entries not yet loaded')
return
osg.write_entries(self.entries)
print('entries written')
def check_template_leftovers(self):
"""
Checks for template leftovers.
Should be run only occasionally.
"""
# load template and get all lines
text = utils.read_text(os.path.join(c.root_path, 'template.md'))
text = text.split('\n')
check_strings = [x for x in text if x and not x.startswith('##')]
# iterate over all entries
for _, entry_path, content in osg.entry_iterator():
for check_string in check_strings:
if content.find(check_string) >= 0:
print('{}: found {}'.format(os.path.basename(entry_path), check_string))
print('checked for template leftovers')
def check_inconsistencies(self):
"""
:return:
"""
if not self.entries:
print('entries not yet loaded')
return
# get all keywords and print similar keywords
keywords = []
for entry in self.entries:
keywords.extend(entry['Keyword'])
if b'first\xe2\x80\x90person'.decode() in entry['Keyword']:
print(entry['File'])
keywords = [x.value for x in keywords]
# reduce those starting with "multiplayer"
keywords = [x if not x.startswith('multiplayer') else 'multiplayer' for x in keywords]
# check unique keywords
unique_keywords = list(set(keywords))
unique_keywords_counts = [keywords.count(l) for l in unique_keywords]
for index, name in enumerate(unique_keywords):
for other_index in range(index+1, len(unique_keywords)):
other_name = unique_keywords[other_index]
if osg.name_similarity(name, other_name) > 0.8:
print(' Keywords {} ({}) - {} ({}) are similar'.format(name, unique_keywords_counts[index], other_name, unique_keywords_counts[other_index]))
# get all names of frameworks and library also using osg.code_dependencies_aliases
valid_dependencies = list(c.general_code_dependencies_without_entry.keys())
for entry in self.entries:
if any((x in ('framework', 'library', 'game engine') for x in entry['Keyword'])):
name = entry['Title']
if name in c.code_dependencies_aliases:
valid_dependencies.extend(c.code_dependencies_aliases[name])
else:
valid_dependencies.append(name)
# get all referenced code dependencies
referenced_dependencies = {}
for entry in self.entries:
deps = entry.get('Code dependency', [])
for dependency in deps:
dependency = dependency.value
if dependency in referenced_dependencies:
referenced_dependencies[dependency] += 1
else:
referenced_dependencies[dependency] = 1
# delete those that are valid dependencies
referenced_dependencies = [(k, v) for k, v in referenced_dependencies.items() if k not in valid_dependencies]
# sort by number
referenced_dependencies.sort(key=lambda x: x[1], reverse=True)
# print out
print('Code dependencies not included as entry')
for dep in referenced_dependencies:
print('{} ({})'.format(*dep))
# if there is the "Play" field, it should have "Web" as Platform
for entry in self.entries:
name = entry['File']
if 'Play' in entry:
if not 'Platform' in entry:
print('Entry "{}" has "Play" field but not "Platform" field, add it with "Web"'.format(name))
elif not 'Web' in entry['Platform']:
print('Entry "{}" has "Play" field but not "Web" in "Platform" field'.format(name))
# javascript/typescript as language but not web as platform?
# if there is a @see-download there should be download fields...
def clean_rejected(self):
"""
:return:
"""
# sort rejected games list file
sort_text_file(os.path.join(c.root_path, 'code', 'rejected.txt'), 'rejected games list')
def clean_backlog(self):
"""
:return:
"""
if not self.entries:
print('entries not yet loaded')
return
# get urls from entries
included_urls = osg.all_urls(self.entries)
included_urls = list(included_urls.keys()) # only need the URLs here
# get urls from rejected file
text = utils.read_text(c.rejected_file)
regex = re.compile(r"\((http.*?)\)", re.MULTILINE)
matches = regex.findall(text)
rejected_urls = []
for match in matches:
urls = match.split(',')
urls = [x.strip() for x in urls]
rejected_urls.extend(urls)
included_urls.extend(rejected_urls)
# those that only have a web archive version, also get the original version
more_urls = []
for url in included_urls:
if url.startswith('https://web.archive.org/web'):
# print(url) # sometimes the http is missing in archive links (would need proper parsing)
url = url[url.index('http', 5):]
more_urls.append(url)
included_urls.extend(more_urls)
# now we strip the urls
stripped_urls = [utils.strip_url(x) for x in included_urls]
stripped_urls = set(stripped_urls) # removes duplicates for performance
# read backlog and get urls from there
text = utils.read_text(c.backlog_file)
text = text.split('\n')
# remove those that are in stripped_game_urls
text = [x for x in text if utils.strip_url(x) not in stripped_urls]
# remove duplicates and sort
text = sorted(list(set(text)), key=str.casefold)
print('backlog contains {} items'.format(len(text)))
# join and save again
text = '\n'.join(text)
utils.write_text(c.backlog_file, text)
print('backlog cleaned')
def check_external_links(self):
"""
Checks all external links it can find for validity. Prints those with non OK HTTP responses. Does only need to be run
from time to time.
"""
# regex for finding urls (can be in <> or in ]() or after a whitespace
regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n\)]")
# ignore the following patterns (they give false positives here)
ignored_urls = (
'https://git.tukaani.org/xz.git', 'https://git.code.sf.net/', 'http://hg.hedgewars.org/hedgewars/',
'https://git.xiph.org/vorbis.git', 'http://svn.uktrainsim.com/svn/openrails', 'https://www.srb2.org/',
'http://wiki.srb2.org/')
# some do redirect, but we nedertheless want the original URL in the database
redirect_okay = ('https://octaforge.org/', 'https://svn.openttd.org/', 'https://godotengine.org/download')
# extract all links from entries
import urllib3
urllib3.disable_warnings() # otherwise we cannot verify those with SSL errors without getting warnings
urls = {}
for entry, _, content in osg.entry_iterator():
# apply regex
matches = regex.findall(content)
# for each match
for match in matches:
for url in match:
if url and not any((url.startswith(x) for x in ignored_urls)):
# ignore bzr.sourceforge, no web address found
if 'bzr.sourceforge.net/bzrroot/' in url:
continue
# add "/" at the end
if any((url.startswith(x) for x in (
'https://anongit.freedesktop.org/git', 'https://git.savannah.gnu.org/git/',
'https://git.savannah.nongnu.org/git/', 'https://git.artsoft.org/'))):
url += '/'
if url.startswith('https://bitbucket.org/') and url.endswith('.git'):
url = url[:-4] + '/commits/'
if url.startswith('https://svn.code.sf.net/p/'):
url = 'http' + url[5:] + '/'
if url.startswith('http://cvs.savannah.nongnu.org:/sources/'):
url = 'http://cvs.savannah.nongnu.org/viewvc/' + url[40:] + '/'
if url.startswith('http://cvs.savannah.gnu.org:/sources/'):
url = 'http://cvs.savannah.gnu.org/viewvc/' + url[37:] + '/'
# generally ".git" at the end is not working well, except sometimes
if url.endswith('.git') and not any((url.startswith(x) for x in (
'https://repo.or.cz', 'https://git.tuxfamily.org/fanwor/fanwor'))):
url = url[:-4]
if url in urls:
urls[url].add(entry)
else:
urls[url] = {entry}
print('found {} unique links'.format(len(urls)))
print("start checking external links (can take a while)")
# now iterate over all urls
for url, names in urls.items():
names = list(names) # was a set
if len(names) == 1:
names = names[0]
try:
verify = True
# some have an expired certificate but otherwise still work
if any((url.startswith(x) for x in (
'https://perso.b2b2c.ca/~sarrazip/dev/', 'https://dreerally.com/', 'https://henlin.net/',
'https://www.megamek.org/', 'https://pixeldoctrine.com/', 'https://gitorious.org/',
'https://www.opmon-game.ga/'))):
verify = False
r = requests.head(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=20,
allow_redirects=True, verify=verify)
if r.status_code == 405: # head method not supported, try get
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'},
timeout=20, allow_redirects=True, verify=verify)
# check for bad status
if r.status_code != requests.codes.ok:
print('{}: {} - {}'.format(names, url, r.status_code))
# check for redirect
if r.history and url not in redirect_okay:
# only / added or http->https sometimes
redirected_url = r.url
if redirected_url == url + '/':
output = '{}: {} -> {} - redirect "/" at end '
elif redirected_url == 'https' + url[4:]:
output = '{}: {} -> {} - redirect "https" at start'
else:
output = '{}: {} -> {} - redirect '
print(output.format(names, url, redirected_url))
except Exception as e:
error_name = type(e).__name__
if error_name == 'SSLError' and any((url.startswith(x) for x in (
'https://gitorious.org/', 'https://www.freedroid.org/download/'))):
continue # even though verify is False, these errors still get through
print('{}: {} - exception {}'.format(names, url, error_name))
def update_readme_tocs(self):
"""
Recounts entries in sub categories and writes them to the readme.
Also updates the _toc files in the categories directories.
Note: The Readme must have a specific structure at the beginning, starting with "# Open Source Games" and ending
on "A collection.."
Needs to be performed regularly.
"""
# completely delete content of toc path
for file in os.listdir(c.tocs_path):
os.remove(os.path.join(c.tocs_path, file))
# read readme
readme_file = os.path.join(c.root_path, 'README.md')
readme_text = utils.read_text(readme_file)
# compile regex for identifying the building blocks in the readme
regex = re.compile(r"(.*?)(\[comment\]: # \(start.*?end of autogenerated content\))(.*)", re.DOTALL)
# apply regex
matches = regex.findall(readme_text)
if len(matches) != 1:
raise RuntimeError('readme file has invalid structure')
matches = matches[0]
start = matches[0]
end = matches[2]
tocs_text = ''
# split into games, tools, frameworks, libraries
games = [x for x in self.entries if not any([y in x['Keyword'] for y in ('tool', 'framework', 'library')])]
tools = [x for x in self.entries if 'tool' in x['Keyword']]
frameworks = [x for x in self.entries if 'framework' in x['Keyword']]
libraries = [x for x in self.entries if 'library' in x['Keyword']]
# create games, tools, frameworks, libraries tocs
title = 'Games'
file = '_games.md'
tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(games))
create_toc(title, file, games)
title = 'Tools'
file = '_tools.md'
tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(tools))
create_toc(title, file, tools)
title = 'Frameworks'
file = '_frameworks.md'
tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(frameworks))
create_toc(title, file, frameworks)
title = 'Libraries'
file = '_libraries.md'
tocs_text += '**[{}](entries/tocs/{}#{})** ({})\n'.format(title, file, title, len(libraries))
create_toc(title, file, libraries)
# create by category
categories_text = []
for keyword in c.recommended_keywords:
filtered = [x for x in self.entries if keyword in x['Keyword']]
title = keyword.capitalize()
name = keyword.replace(' ', '-')
file = '_{}.md'.format(name)
categories_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(title, file, name, len(filtered)))
create_toc(title, file, filtered)
categories_text.sort()
tocs_text += '\nBy category: {}\n'.format(', '.join(categories_text))
# create by platform
platforms_text = []
for platform in c.valid_platforms:
filtered = [x for x in self.entries if platform in x.get('Platform', [])]
title = platform
name = platform.lower()
file = '_{}.md'.format(name)
platforms_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(title, file, name, len(filtered)))
create_toc(title, file, filtered)
tocs_text += '\nBy platform: {}\n'.format(', '.join(platforms_text))
# insert new text in the middle (the \n before the second comment is necessary, otherwise Markdown displays it as part of the bullet list)
text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + tocs_text + "\n[comment]: # (end of autogenerated content)" + end
# write to readme
utils.write_text(readme_file, text)
print('Readme and TOCs updated')
def update_statistics(self):
"""
Generates the statistics page.
Should be done every time the entries change.
"""
if not self.entries:
print('entries not yet loaded')
return
# start the page
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
# total number
number_entries = len(self.entries)
rel = lambda x: x / number_entries * 100 # conversion to percent
statistics += 'analyzed {} entries on {}\n\n'.format(number_entries,
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
# State (beta, mature, inactive)
statistics += '## State\n\n'
number_state_beta = sum(1 for x in self.entries if 'beta' in x['State'])
number_state_mature = sum(1 for x in self.entries if 'mature' in x['State'])
number_inactive = sum(1 for x in self.entries if osg.is_inactive(x))
statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(
number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive,
rel(number_inactive))
if number_inactive > 0:
entries_inactive = [(x['Title'], osg.extract_inactive_year(x)) for x in self.entries if osg.is_inactive(x)]
entries_inactive.sort(key=lambda x: str.casefold(x[0])) # first sort by name
entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n'
# Language
statistics += '## Code Languages\n\n'
field = 'Code language'
# get all languages together
languages = []
for entry in self.entries:
languages.extend(entry[field])
languages = [x.value for x in languages]
unique_languages = set(languages)
unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages]
unique_languages.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_languages.sort(key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first)
unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_languages]
statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n'
# Licenses
statistics += '## Code licenses\n\n'
field = 'Code license'
# get all licenses together
licenses = []
for entry in self.entries:
licenses.extend(entry[field])
licenses = [x.value for x in licenses]
unique_licenses = set(licenses)
unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses]
unique_licenses.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_licenses.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_licenses]
statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n'
# Keywords
statistics += '## Keywords\n\n'
field = 'Keyword'
# get all keywords together
keywords = []
for entry in self.entries:
keywords.extend(entry[field])
keywords = [x.value for x in keywords]
# reduce those starting with "multiplayer"
keywords = [x if not x.startswith('multiplayer') else 'multiplayer' for x in keywords]
unique_keywords = set(keywords)
unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords]
unique_keywords.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_keywords.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_keywords = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_keywords]
statistics += '##### Keywords frequency\n\n' + '\n'.join(unique_keywords) + '\n\n'
# no download or play field
statistics += '## Entries without download or play fields\n\n'
entries = []
for entry in self.entries:
if 'Download' not in entry and 'Play' not in entry:
entries.append(entry['Title'])
entries.sort(key=str.casefold)
statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'
# code hosted not on github, gitlab, bitbucket, launchpad, sourceforge
popular_code_repositories = ('github.com', 'gitlab.com', 'bitbucket.org', 'code.sf.net', 'code.launchpad.net')
statistics += '## Entries with a code repository not on a popular site\n\n'
entries = []
field = 'Code repository'
for entry in self.entries:
popular = False
for repo in entry[field]:
for popular_repo in popular_code_repositories:
if popular_repo in repo.value:
popular = True
break
# if there were repositories, but none popular, add them to the list
if not popular:
entries.append(entry['Title'])
# print(info[field])
entries.sort(key=str.casefold)
statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n'
# Code dependencies
statistics += '## Code dependencies\n\n'
field = 'Code dependency'
# get all code dependencies together
code_dependencies = []
entries_with_code_dependency = 0
for entry in self.entries:
if field in entry:
code_dependencies.extend(entry[field])
entries_with_code_dependency += 1
code_dependencies = [x.value for x in code_dependencies]
statistics += 'With code dependency field {} ({:.1f}%)\n\n'.format(entries_with_code_dependency,
rel(entries_with_code_dependency))
unique_code_dependencies = set(code_dependencies)
unique_code_dependencies = [(l, code_dependencies.count(l) / len(code_dependencies)) for l in
unique_code_dependencies]
unique_code_dependencies.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_code_dependencies.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_code_dependencies = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_code_dependencies]
statistics += '##### Code dependencies frequency\n\n' + '\n'.join(unique_code_dependencies) + '\n\n'
# Build systems:
statistics += '## Build systems\n\n'
field = 'Build system'
# get all build systems together
build_systems = []
for entry in self.entries:
if field in entry['Building']:
build_systems.extend(entry['Building'][field])
build_systems = [x.value for x in build_systems]
statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format(
rel(len(build_systems)))
unique_build_systems = set(build_systems)
unique_build_systems = [(l, build_systems.count(l) / len(build_systems)) for l in unique_build_systems]
unique_build_systems.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_build_systems.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_build_systems = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_build_systems]
statistics += '##### Build systems frequency ({})\n\n'.format(len(build_systems)) + '\n'.join(
unique_build_systems) + '\n\n'
# C, C++ projects without build system information
c_cpp_project_without_build_system = []
for entry in self.entries:
if field not in entry and ('C' in entry['Code language'] or 'C++' in entry['Code language']):
c_cpp_project_without_build_system.append(entry['Title'])
c_cpp_project_without_build_system.sort(key=str.casefold)
statistics += '##### C and C++ projects without build system information ({})\n\n'.format(
len(c_cpp_project_without_build_system)) + ', '.join(c_cpp_project_without_build_system) + '\n\n'
# C, C++ projects with build system information but without CMake as build system
c_cpp_project_not_cmake = []
for entry in entries:
if field in entry and 'CMake' in entry[field] and (
'C' in entry['Code language'] or 'C++' in entry['Code language']):
c_cpp_project_not_cmake.append(entry['Title'])
c_cpp_project_not_cmake.sort(key=str.casefold)
statistics += '##### C and C++ projects with a build system different from CMake ({})\n\n'.format(
len(c_cpp_project_not_cmake)) + ', '.join(c_cpp_project_not_cmake) + '\n\n'
# Platform
statistics += '## Platform\n\n'
field = 'Platform'
# get all platforms together
platforms = []
for entry in self.entries:
if field in entry:
platforms.extend(entry[field])
platforms = [x.value for x in platforms]
statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format(rel(len(platforms)))
unique_platforms = set(platforms)
unique_platforms = [(l, platforms.count(l) / len(platforms)) for l in unique_platforms]
unique_platforms.sort(key=lambda x: str.casefold(x[0])) # first sort by name
unique_platforms.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_platforms = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_platforms]
statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n'
# write to statistics file
utils.write_text(c.statistics_file, statistics)
print('statistics updated')
def update_html(self):
"""
Parses all entries, collects interesting info and stores it in a json file suitable for displaying
with a dynamic table in a browser.
"""
if not self.entries:
print('entries not yet loaded')
return
# make database out of it
db = {'headings': ['Game', 'Description', 'Download', 'State', 'Keyword', 'Source']}
entries = []
for info in self.entries:
# game & description
entry = ['{} (<a href="{}">home</a>, <a href="{}">entry</a>)'.format(info['Title'], info['Home'][0],
r'https://github.com/Trilarion/opensourcegames/blob/master/entries/' +
info['File']),
textwrap.shorten(info.get('Note', ''), width=60, placeholder='..')]
# download
field = 'Download'
if field in info and info[field]:
entry.append('<a href="{}">Link</a>'.format(info[field][0]))
else:
entry.append('')
# state (field state is essential)
entry.append('{} / {}'.format(info['State'][0],
'inactive since {}'.format(osg.extract_inactive_year(info)) if osg.is_inactive(info) else 'active'))
# keywords
keywords = info['Keyword']
keywords = [x.value for x in keywords]
entry.append(', '.join(keywords))
# source
text = []
field = 'Code repository'
if field in info and info[field]:
text.append('<a href="{}">Source</a>'.format(info[field][0].value))
languages = info['Code language']
languages = [x.value for x in languages]
text.append(', '.join(languages))
licenses = info['Code license']
licenses = [x.value for x in licenses]
text.append(', '.join(licenses))
entry.append(' - '.join(text))
# append to entries
entries.append(entry)
# sort entries by game name
entries.sort(key=lambda x: str.casefold(x[0]))
db['data'] = entries
# output
text = json.dumps(db, indent=1)
utils.write_text(c.json_db_file, text)
print('HTML updated')
def update_repos(self):
"""
export to json for local repository update of primary repos
"""
if not self.entries:
print('entries not yet loaded')
return
primary_repos = {'git': [], 'svn': [], 'hg': []}
unconsumed_entries = []
# for every entry filter those that are known git repositories (add additional repositories)
for entry in self.entries:
repos = entry['Code repository']
repos = [x.value for x in repos]
# keep the first and all others containing @add
if not repos:
continue
repos = [repos[0]] + [x for x in repos[1:] if "@add" in x]
for repo in repos:
consumed = False
repo = repo.split(' ')[0].strip()
url = osg.git_repo(repo)
if url:
primary_repos['git'].append(url)
consumed = True
continue
url = osg.svn_repo(repo)
if url:
primary_repos['svn'].append(url)
consumed = True
continue
url = osg.hg_repo(repo)
if url:
primary_repos['hg'].append(url)
consumed = True
continue
if not consumed:
unconsumed_entries.append([entry['Title'], repo])
print('Entry "{}" unconsumed repo: {}'.format(entry['File'], repo))
# sort them alphabetically (and remove duplicates)
for k, v in primary_repos.items():
primary_repos[k] = sorted(set(v))
# statistics of gits
git_repos = primary_repos['git']
print('{} Git repositories'.format(len(git_repos)))
for domain in (
'repo.or.cz', 'anongit.kde.org', 'bitbucket.org', 'git.code.sf.net', 'git.savannah', 'git.tuxfamily',
'github.com',
'gitlab.com', 'gitlab.com/osgames', 'gitlab.gnome.org'):
print('{} on {}'.format(sum(1 if domain in x else 0 for x in git_repos), domain))
# write them to code/git
json_path = os.path.join(c.root_path, 'code', 'archives.json')
text = json.dumps(primary_repos, indent=1)
utils.write_text(json_path, text)
print('Repositories updated')
def collect_git_repos(self):
"""
for every entry, get all git
:return:
"""
git_repos = []
for entry in self.entries:
repos = entry['Code repository']
repos = [x.value for x in repos]
for repo in repos:
repo = repo.split(' ')[0].strip()
url = osg.git_repo(repo)
if url:
git_repos.append(repo)
# sort them alphabetically (and remove duplicates)
git_repos = sorted(list(set(git_repos)), key=str.casefold)
# write them to code/git
json_path = os.path.join(c.root_path, 'code', 'git_repositories.json')
text = json.dumps(git_repos, indent=1)
utils.write_text(json_path, text)
def special_ops(self):
"""
For special operations that are one-time and may change.
:return:
"""
if not self.entries:
print('entries not yet loaded')
return
# remove all downloads that only have a single entry with @see-home (this is the default anyway)
field = 'Download'
for entry in self.entries:
if field in entry:
content = entry[field]
if len(content) == 1 and content[0].value == '@see-home' and not content[0].comment:
del entry[field]
print('special ops finished')
def complete_run(self):
pass
if __name__ == "__main__":
m = EntriesMaintainer()
actions = {
'Read entries': m.read_entries,
'Write entries': m.write_entries,
'Check template leftovers': m.check_template_leftovers,
'Check inconsistencies': m.check_inconsistencies,
'Check rejected entries': m.clean_rejected,
'Check external links (takes quite long)': m.check_external_links,
'Clean backlog': m.clean_backlog,
'Update Readme and TOCs': m.update_readme_tocs,
'Update statistics': m.update_statistics,
'Update HTML': m.update_html,
'Update repository list': m.update_repos,
'Special': m.special_ops,
'Complete run': m.complete_run
}
osg_ui.run_simple_button_app('Entries developer', actions)