generate statistics again and fixing inconsistencies across the entries, html output with vanilla-datatable included

This commit is contained in:
Trilarion
2018-06-08 14:53:51 +02:00
parent 569c8388ef
commit 8220365691
86 changed files with 2921 additions and 231 deletions

View File

@ -310,23 +310,24 @@ def parse_entry(content):
# checks
# essential fields
essential_fields = ['home', 'state']
essential_fields = ['home', 'state', 'code repository']
for field in essential_fields:
if field not in info:
print('Essential field "{}" missing in entry {}'.format(field, info['title']))
print('Essential field "{}" missing in entry "{}"'.format(field, info['title']))
return info # so that the rest can run through
# state must contain either beta or mature but not both
v = info['state']
if 'beta' in v != 'mature' in v:
printf('State must be one of <beta, mature> in entry {}'.format(info['title']))
printf('State must be one of <"beta", "mature"> in entry "{}"'.format(info['title']))
return info # so that the rest can run through
# extract inactive
phrase = 'inactive since '
inactive_year = [x[len(phrase):] for x in info['state'] if x.startswith(phrase)]
assert len(inactive_year) <= 1
if inactive_year:
info['inactive'] = inactive_year
info['inactive'] = inactive_year[0]
return info
@ -367,6 +368,7 @@ def assemble_infos():
return infos
def generate_statistics():
"""
Generates the statistics page.
@ -378,9 +380,6 @@ def generate_statistics():
statistics_path = os.path.join(games_path, 'statistics.md')
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
# assemble infos
infos = assemble_infos()
# total number
number_entries = len(infos)
rel = lambda x: x / number_entries * 100 # conversion to percent
@ -395,53 +394,54 @@ def generate_statistics():
statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive))
if number_inactive > 0:
entries_inactive = [(x['file'], x['inactive']) for x in infos if 'inactive' in x]
entries_inactive = [(x['title'], x['inactive']) for x in infos if 'inactive' in x]
entries_inactive.sort(key=lambda x: x[0]) # first sort by name
entries_inactive.sort(key=lambda x: -x[1]) # then sort by inactive year (more recently first)
entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n'
entries_no_state = [x['file'] for x in infos if 'state' not in x]
if entries_no_state:
entries_no_state.sort()
statistics += '##### Without state tag ({})\n\n'.format(len(entries_no_state)) + ', '.join(entries_no_state) + '\n\n'
# Language
statistics += '## Languages\n\n'
number_no_language = sum(1 for x in infos if 'language' not in x)
statistics += '## Code Languages\n\n'
field = 'code language'
# those without language tag
number_no_language = sum(1 for x in infos if field not in x)
if number_no_language > 0:
statistics += 'Without language tag: {} ({:.1f}%)\n\n'.format(number_no_language, rel(number_no_language))
entries_no_language = [x['file'] for x in infos if 'language' not in x]
entries_no_language = [x['title'] for x in infos if field not in x]
entries_no_language.sort()
statistics += ', '.join(entries_no_language) + '\n\n'
# get all languages together
languages = []
for info in infos:
if 'language' in info:
languages.extend(info['language'])
if field in info:
languages.extend(info[field])
unique_languages = set(languages)
unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages]
unique_languages.sort(key=lambda x: x[0]) # first sort by name
unique_languages.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_languages.sort(key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first)
unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_languages]
statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n'
# Licenses
statistics += '## Code licenses\n\n'
number_no_license = sum(1 for x in infos if 'license' not in x)
field = 'code license'
# those without license
number_no_license = sum(1 for x in infos if field not in x)
if number_no_license > 0:
statistics += 'Without license tag: {} ({:.1f}%)\n\n'.format(number_no_license, rel(number_no_license))
entries_no_license = [x['file'] for x in infos if 'license' not in x]
entries_no_license = [x['title'] for x in infos if field not in x]
entries_no_license.sort()
statistics += ', '.join(entries_no_license) + '\n\n'
# get all licenses together
licenses = []
for info in infos:
if 'license' in info:
licenses.append(info['license'])
if field in info:
licenses.extend(info[field])
unique_licenses = set(licenses)
unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses]
@ -450,6 +450,23 @@ def generate_statistics():
unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_licenses]
statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n'
# Keywords
statistics += '## Keywords\n\n'
field = 'keywords'
# get all keywords together
keywords = []
for info in infos:
if field in info:
keywords.extend(info[field])
unique_keywords = set(keywords)
unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords]
unique_keywords.sort(key=lambda x: x[0]) # first sort by name
unique_keywords.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
unique_keywords = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_keywords]
statistics += '##### Keywords frequency\n\n' + ''.join(unique_keywords) + '\n'
with open(statistics_path, mode='w', encoding='utf-8') as f:
f.write(statistics)
@ -460,9 +477,6 @@ def export_json():
with a dynamic table in a browser.
"""
# assemble info
infos = assemble_infos()
# make database out of it
db = {}
db['headings'] = ['Name', 'Download']
@ -489,20 +503,23 @@ if __name__ == "__main__":
games_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'games'))
readme_file = os.path.realpath(os.path.join(games_path, os.pardir, 'README.md'))
# assemble info
infos = assemble_infos()
# recount and write to readme
#update_readme()
update_readme()
# generate list in toc files
#update_category_tocs()
update_category_tocs()
# generate report
#generate_statistics()
generate_statistics()
# update database for html table
export_json()
# check for unfilled template lines
# check_template_leftovers()
check_template_leftovers()
# check external links (only rarely)
# check_validity_external_links()