adapted generation of statistics and json results of maintenance script to new layout
This commit is contained in:
@ -22,29 +22,25 @@ valid_fields = ('Home', 'Media', 'State', 'Play', 'Download', 'Platform', 'Keywo
|
||||
valid_platforms = ('Windows', 'Linux', 'macOS', 'Android', 'Browser')
|
||||
recommended_keywords = ('action', 'arcade', 'adventure', 'visual novel', 'sports', 'platform', 'puzzle', 'role playing', 'simulation', 'strategy', 'card game', 'board game', 'music', 'educational', 'tool', 'game engine', 'framework', 'library')
|
||||
|
||||
def extract_overview_for_toc(file):
|
||||
|
||||
def entry_iterator():
|
||||
"""
|
||||
Parses a file for some interesting fields and concatenates the content.
|
||||
|
||||
To be displayed after the game name in the category TOCs.
|
||||
|
||||
"""
|
||||
info = infos[file]
|
||||
|
||||
output = []
|
||||
# get all entries (ignore everything starting with underscore)
|
||||
entries = os.listdir(games_path)
|
||||
entries = (x for x in entries if not x.startswith('_'))
|
||||
|
||||
if 'code language' in info:
|
||||
output.extend(info['code language'])
|
||||
# iterate over all entries
|
||||
for entry in entries:
|
||||
entry_path = os.path.join(games_path, entry)
|
||||
|
||||
if 'code license' in info:
|
||||
output.extend(info['code license'])
|
||||
# read entry
|
||||
content = read_text(entry_path)
|
||||
|
||||
# state
|
||||
if 'state' in info:
|
||||
output.extend(info['state'])
|
||||
|
||||
output = ", ".join(output)
|
||||
|
||||
return output
|
||||
# yield
|
||||
yield entry, entry_path, content
|
||||
|
||||
|
||||
def update_readme_and_tocs(infos):
|
||||
@ -57,7 +53,7 @@ def update_readme_and_tocs(infos):
|
||||
|
||||
Needs to be performed regularly.
|
||||
"""
|
||||
print('update readme file')
|
||||
print('update readme and toc files')
|
||||
|
||||
# delete all toc files
|
||||
entries = os.listdir(games_path)
|
||||
@ -66,6 +62,7 @@ def update_readme_and_tocs(infos):
|
||||
os.remove(os.path.join(games_path, entry))
|
||||
|
||||
# read readme
|
||||
readme_file = os.path.join(root_path, 'README.md')
|
||||
readme_text = read_text(readme_file)
|
||||
|
||||
# compile regex for identifying the building blocks
|
||||
@ -82,14 +79,14 @@ def update_readme_and_tocs(infos):
|
||||
# create all toc and readme entry
|
||||
title = 'All'
|
||||
file = '_all.md'
|
||||
update = ['- **[{}](games/{})** ({})\n'.format(title, file, len(infos))]
|
||||
update = ['- **[{}](games/{}#{})** ({})\n'.format(title, file, title, len(infos))]
|
||||
create_toc(title, file, infos)
|
||||
|
||||
for keyword in recommended_keywords:
|
||||
infos_filtered = [x for x in infos if keyword in x['keywords']]
|
||||
title = keyword.capitalize()
|
||||
file = '_{}.md'.format(keyword)
|
||||
update.append('- **[{}](games/{})** ({})\n'.format(title, file, len(infos_filtered)))
|
||||
update.append('- **[{}](games/{}#{})** ({})\n'.format(title, file, title, len(infos_filtered)))
|
||||
create_toc(title, file, infos_filtered)
|
||||
update = ''.join(update)
|
||||
|
||||
@ -121,79 +118,26 @@ def create_toc(title, file, entries):
|
||||
# add to text
|
||||
text += '\n'.join(rows)
|
||||
|
||||
# write to toc file
|
||||
write_text(toc_file, text)
|
||||
|
||||
|
||||
|
||||
def update_category_tocs():
|
||||
"""
|
||||
Lists all entries in all sub folders and generates the list in the toc file.
|
||||
|
||||
Needs to be performed regularly.
|
||||
"""
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# for each category
|
||||
for category_path in category_paths:
|
||||
print('generate toc for {}'.format(os.path.basename(category_path)))
|
||||
|
||||
# read toc header line
|
||||
toc_file = os.path.join(category_path, TOC)
|
||||
toc_header = read_first_line(toc_file) # stays as is
|
||||
|
||||
# get paths of all entries in this category
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
|
||||
# get titles (discarding first two ("# ") and last ("\n") characters)
|
||||
titles = [read_first_line(path)[2:-1] for path in entry_paths]
|
||||
|
||||
# get more interesting info
|
||||
more = [extract_overview_for_toc(path) for path in entry_paths]
|
||||
|
||||
# combine name, file name and more info
|
||||
info = zip(titles, [os.path.basename(path) for path in entry_paths], more)
|
||||
|
||||
# sort according to entry title (should be unique)
|
||||
info = sorted(info, key=lambda x:x[0])
|
||||
|
||||
# assemble output
|
||||
update = ['- **[{}]({})** ({})\n'.format(*entry) for entry in info]
|
||||
update = "".join(update)
|
||||
|
||||
# combine with toc header
|
||||
text = toc_header + '\n' + "[comment]: # (start of autogenerated content, do not edit)\n" + update + "\n[comment]: # (end of autogenerated content)"
|
||||
|
||||
# write to toc file
|
||||
with open(toc_file, mode='w', encoding='utf-8') as f:
|
||||
f.write(text)
|
||||
|
||||
|
||||
def check_validity_external_links():
|
||||
"""
|
||||
Checks all external links it can find for validity. Prints those with non OK HTTP responses. Does only need to be run
|
||||
from time to time.
|
||||
"""
|
||||
|
||||
print("check external links (can take a while)")
|
||||
|
||||
# regex for finding urls (can be in <> or in () or a whitespace
|
||||
regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+)")
|
||||
|
||||
# count
|
||||
number_checked_links = 0
|
||||
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# for each category
|
||||
for category_path in category_paths:
|
||||
print('check links for {}'.format(os.path.basename(category_path)))
|
||||
|
||||
# get entry paths
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
|
||||
# for each entry
|
||||
for entry_path in entry_paths:
|
||||
# read entry
|
||||
content = read_text(entry_path)
|
||||
# iterate over all entries
|
||||
for _, entry_path, content in entry_iterator():
|
||||
|
||||
# apply regex
|
||||
matches = regex.findall(content)
|
||||
@ -232,43 +176,36 @@ def check_template_leftovers():
|
||||
Should be run only occasionally.
|
||||
"""
|
||||
|
||||
print('check for template leftovers')
|
||||
|
||||
# load template and get all lines
|
||||
text = read_text(os.path.join(games_path, 'template.md'))
|
||||
text = read_text(os.path.join(root_path, 'template.md'))
|
||||
text = text.split('\n')
|
||||
check_strings = [x for x in text if x and not x.startswith('##')]
|
||||
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
# iterate over all entries
|
||||
for _, entry_path, content in entry_iterator():
|
||||
|
||||
# for each category
|
||||
for category_path in category_paths:
|
||||
# get paths of all entries in this category
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
for check_string in check_strings:
|
||||
if content.find(check_string) >= 0:
|
||||
raise RuntimeError('{}: found {}'.format(os.path.basename(entry_path), check_string))
|
||||
|
||||
for entry_path in entry_paths:
|
||||
# read it line by line
|
||||
content = read_text(entry_path)
|
||||
|
||||
for check_string in check_strings:
|
||||
if content.find(check_string) >= 0:
|
||||
print('{}: found {}'.format(os.path.basename(entry_path), check_string))
|
||||
|
||||
def fix_keywords():
|
||||
"""
|
||||
Fixes the keywords.
|
||||
"""
|
||||
|
||||
print('fix keywords')
|
||||
|
||||
regex = re.compile(r"(.*)(- Keywords:.*)(- Code repository: .*)", re.DOTALL)
|
||||
|
||||
# get all entries
|
||||
# get all entries (ignore everything starting with underscore)
|
||||
entries = os.listdir(games_path)
|
||||
entries = (x for x in entries if not x.startswith('_'))
|
||||
|
||||
# iterate over all entries
|
||||
for entry in entries:
|
||||
entry_path = os.path.join(games_path, entry)
|
||||
|
||||
# read entry
|
||||
content = read_text(entry_path)
|
||||
for entry, entry_path, content in entry_iterator():
|
||||
|
||||
# match with regex
|
||||
matches = regex.findall(content)
|
||||
@ -308,6 +245,7 @@ def fix_keywords():
|
||||
# write again
|
||||
write_text(entry_path, new_content)
|
||||
|
||||
|
||||
def parse_entry(content):
|
||||
"""
|
||||
Returns a dictionary of the features of the content
|
||||
@ -442,19 +380,13 @@ def assemble_infos():
|
||||
Parses all entries and assembles interesting infos about them.
|
||||
"""
|
||||
|
||||
print('assemble game infos')
|
||||
|
||||
# a database of all important infos about the entries
|
||||
infos = []
|
||||
|
||||
# get all entries (ignore everything starting with underscore)
|
||||
entries = os.listdir(games_path)
|
||||
entries = (x for x in entries if not x.startswith('_'))
|
||||
|
||||
# iterate over all entries
|
||||
for entry in entries:
|
||||
entry_path = os.path.join(games_path, entry)
|
||||
|
||||
# read entry
|
||||
content = read_text(entry_path)
|
||||
for entry, _, content in entry_iterator():
|
||||
|
||||
# parse entry
|
||||
info = parse_entry(content)
|
||||
@ -468,35 +400,35 @@ def assemble_infos():
|
||||
return infos
|
||||
|
||||
|
||||
def generate_statistics():
|
||||
def update_statistics(infos):
|
||||
"""
|
||||
Generates the statistics page.
|
||||
|
||||
Should be done every time the entries change.
|
||||
"""
|
||||
|
||||
# for this function replace infos with infos.values
|
||||
infois = infos.values()
|
||||
print('update statistics')
|
||||
|
||||
# start the page
|
||||
statistics_path = os.path.join(games_path, 'statistics.md')
|
||||
statistics_file = os.path.join(root_path, 'statistics.md')
|
||||
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
|
||||
|
||||
# total number
|
||||
number_entries = len(infois)
|
||||
number_entries = len(infos)
|
||||
rel = lambda x: x / number_entries * 100 # conversion to percent
|
||||
|
||||
statistics += 'analyzed {} entries on {}\n\n'.format(number_entries, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# State (beta, mature, inactive)
|
||||
statistics += '## State\n\n'
|
||||
|
||||
number_state_beta = sum(1 for x in infois if 'beta' in x['state'])
|
||||
number_state_mature = sum(1 for x in infois if 'mature' in x['state'])
|
||||
number_inactive = sum(1 for x in infois if 'inactive' in x)
|
||||
number_state_beta = sum(1 for x in infos if 'beta' in x['state'])
|
||||
number_state_mature = sum(1 for x in infos if 'mature' in x['state'])
|
||||
number_inactive = sum(1 for x in infos if 'inactive' in x)
|
||||
statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive))
|
||||
|
||||
if number_inactive > 0:
|
||||
entries_inactive = [(x['title'], x['inactive']) for x in infois if 'inactive' in x]
|
||||
entries_inactive = [(x['title'], x['inactive']) for x in infos if 'inactive' in x]
|
||||
entries_inactive.sort(key=lambda x: x[0]) # first sort by name
|
||||
entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
|
||||
entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
|
||||
@ -517,7 +449,7 @@ def generate_statistics():
|
||||
|
||||
# get all languages together
|
||||
languages = []
|
||||
for info in infois:
|
||||
for info in infos:
|
||||
if field in info:
|
||||
languages.extend(info[field])
|
||||
|
||||
@ -533,16 +465,16 @@ def generate_statistics():
|
||||
field = 'code license'
|
||||
|
||||
# those without license
|
||||
number_no_license = sum(1 for x in infois if field not in x)
|
||||
number_no_license = sum(1 for x in infos if field not in x)
|
||||
if number_no_license > 0:
|
||||
statistics += 'Without license tag: {} ({:.1f}%)\n\n'.format(number_no_license, rel(number_no_license))
|
||||
entries_no_license = [x['title'] for x in infois if field not in x]
|
||||
entries_no_license = [x['title'] for x in infos if field not in x]
|
||||
entries_no_license.sort()
|
||||
statistics += ', '.join(entries_no_license) + '\n\n'
|
||||
|
||||
# get all licenses together
|
||||
licenses = []
|
||||
for info in infois:
|
||||
for info in infos:
|
||||
if field in info:
|
||||
licenses.extend(info[field])
|
||||
|
||||
@ -559,7 +491,7 @@ def generate_statistics():
|
||||
|
||||
# get all keywords together
|
||||
keywords = []
|
||||
for info in infois:
|
||||
for info in infos:
|
||||
if field in info:
|
||||
keywords.extend(info[field])
|
||||
|
||||
@ -574,7 +506,7 @@ def generate_statistics():
|
||||
statistics += '## Entries without download or play fields\n\n'
|
||||
|
||||
entries = []
|
||||
for info in infois:
|
||||
for info in infos:
|
||||
if 'download' not in info and 'play' not in info:
|
||||
entries.append(info['title'])
|
||||
entries.sort()
|
||||
@ -586,7 +518,7 @@ def generate_statistics():
|
||||
|
||||
entries = []
|
||||
field = 'code repository'
|
||||
for info in infois:
|
||||
for info in infos:
|
||||
if field in info:
|
||||
popular = False
|
||||
for repo in info[field]:
|
||||
@ -607,11 +539,11 @@ def generate_statistics():
|
||||
|
||||
# get all build systems together
|
||||
build_systems = []
|
||||
for info in infois:
|
||||
for info in infos:
|
||||
if field in info:
|
||||
build_systems.extend(info[field])
|
||||
|
||||
statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format(len(build_systems) / len(infois) * 100)
|
||||
statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format(rel(len(build_systems)))
|
||||
|
||||
unique_build_systems = set(build_systems)
|
||||
unique_build_systems = [(l, build_systems.count(l) / len(build_systems)) for l in unique_build_systems]
|
||||
@ -622,7 +554,7 @@ def generate_statistics():
|
||||
|
||||
# C, C++ projects without build system information
|
||||
c_cpp_project_without_build_system = []
|
||||
for info in infois:
|
||||
for info in infos:
|
||||
if field not in info and ('C' in info['code language'] or 'C++' in info['code language']):
|
||||
c_cpp_project_without_build_system.append(info['title'])
|
||||
c_cpp_project_without_build_system.sort()
|
||||
@ -630,7 +562,7 @@ def generate_statistics():
|
||||
|
||||
# C, C++ projects with build system information but without CMake as build system
|
||||
c_cpp_project_not_cmake = []
|
||||
for info in infois:
|
||||
for info in infos:
|
||||
if field in info and 'CMake' in info[field] and ('C' in info['code language'] or 'C++' in info['code language']):
|
||||
c_cpp_project_not_cmake.append(info['title'])
|
||||
c_cpp_project_not_cmake.sort()
|
||||
@ -642,11 +574,11 @@ def generate_statistics():
|
||||
|
||||
# get all platforms together
|
||||
platforms = []
|
||||
for info in infois:
|
||||
for info in infos:
|
||||
if field in info:
|
||||
platforms.extend(info[field])
|
||||
|
||||
statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format(len(platforms) / len(infois) * 100)
|
||||
statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format(rel(len(platforms)))
|
||||
|
||||
unique_platforms = set(platforms)
|
||||
unique_platforms = [(l, platforms.count(l) / len(platforms)) for l in unique_platforms]
|
||||
@ -655,25 +587,27 @@ def generate_statistics():
|
||||
unique_platforms = ['- {} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_platforms]
|
||||
statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n'
|
||||
|
||||
with open(statistics_path, mode='w', encoding='utf-8') as f:
|
||||
f.write(statistics)
|
||||
# write to statistics file
|
||||
write_text(statistics_file, statistics)
|
||||
|
||||
|
||||
def export_json():
|
||||
def export_json(infos):
|
||||
"""
|
||||
Parses all entries, collects interesting info and stores it in a json file suitable for displaying
|
||||
with a dynamic table in a browser.
|
||||
"""
|
||||
|
||||
print('export to json for web display')
|
||||
|
||||
# make database out of it
|
||||
db = {'headings': ['Game', 'Description', 'Download', 'Category', 'State', 'Keywords', 'Source']}
|
||||
db = {'headings': ['Game', 'Description', 'Download', 'State', 'Keywords', 'Source']}
|
||||
|
||||
entries = []
|
||||
for info in infos.values():
|
||||
for info in infos:
|
||||
|
||||
# game & description
|
||||
entry = ['{} (<a href="{}">home</a>, <a href="{}">entry</a>)'.format(info['title'], info['home'][0],
|
||||
r'https://github.com/Trilarion/opensourcegames/blob/master/games/' + info['path']),
|
||||
r'https://github.com/Trilarion/opensourcegames/blob/master/games/' + info['file']),
|
||||
textwrap.shorten(info['description'], width=60, placeholder='..')]
|
||||
|
||||
# download
|
||||
@ -683,9 +617,6 @@ def export_json():
|
||||
else:
|
||||
entry.append('')
|
||||
|
||||
# category
|
||||
entry.append(info['category'])
|
||||
|
||||
# state (field state is essential)
|
||||
entry.append('{} / {}'.format(info['state'][0], 'inactive since {}'.format(info['inactive']) if 'inactive' in info else 'active'))
|
||||
|
||||
@ -780,13 +711,13 @@ def bzr_repo(repo):
|
||||
return None
|
||||
|
||||
|
||||
def update_primary_code_repositories():
|
||||
def export_primary_code_repositories_json():
|
||||
|
||||
primary_repos = {'git':[],'svn':[],'hg':[],'bzr':[]}
|
||||
unconsumed_entries = []
|
||||
|
||||
# for every entry filter those that are known git repositories (add additional repositories)
|
||||
for info in infos.values():
|
||||
for info in infos:
|
||||
field = 'code repository-raw'
|
||||
# if field 'Code repository' is available
|
||||
if field in info:
|
||||
@ -837,36 +768,37 @@ def update_primary_code_repositories():
|
||||
primary_repos[k] = sorted(set(v))
|
||||
|
||||
# write them to tools/git
|
||||
json_path = os.path.join(games_path, os.path.pardir, 'tools', 'archives.json')
|
||||
json_path = os.path.join(root_path, 'tools', 'archives.json')
|
||||
text = json.dumps(primary_repos, indent=1)
|
||||
write_text(json_path, text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# paths
|
||||
games_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'games'))
|
||||
readme_file = os.path.realpath(os.path.join(games_path, os.pardir, 'README.md'))
|
||||
root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
|
||||
games_path = os.path.join(root_path, 'games')
|
||||
|
||||
# check for unfilled template lines
|
||||
check_template_leftovers()
|
||||
|
||||
# fix keywords
|
||||
fix_keywords()
|
||||
|
||||
# assemble info
|
||||
infos = assemble_infos()
|
||||
|
||||
# recount and wriite to readme and to tocs
|
||||
# recount and write to readme and to tocs
|
||||
update_readme_and_tocs(infos)
|
||||
|
||||
# generate report
|
||||
#generate_statistics()
|
||||
update_statistics(infos)
|
||||
|
||||
# update database for html table
|
||||
#export_json()
|
||||
export_json(infos)
|
||||
|
||||
# check for unfilled template lines
|
||||
#check_template_leftovers()
|
||||
|
||||
# fix keywords
|
||||
# fix_keywords()
|
||||
# collect list of primary code repositories
|
||||
export_primary_code_repositories_json()
|
||||
|
||||
# check external links (only rarely)
|
||||
# check_validity_external_links()
|
||||
|
||||
# collect list of primary code repositories
|
||||
#update_primary_code_repositories()
|
||||
|
Reference in New Issue
Block a user