generate statistics again and fixing inconsistencies across the entries, html output with vanilla-datatable included
This commit is contained in:
@ -310,23 +310,24 @@ def parse_entry(content):
|
||||
# checks
|
||||
|
||||
# essential fields
|
||||
essential_fields = ['home', 'state']
|
||||
essential_fields = ['home', 'state', 'code repository']
|
||||
for field in essential_fields:
|
||||
if field not in info:
|
||||
print('Essential field "{}" missing in entry {}'.format(field, info['title']))
|
||||
print('Essential field "{}" missing in entry "{}"'.format(field, info['title']))
|
||||
return info # so that the rest can run through
|
||||
|
||||
# state must contain either beta or mature but not both
|
||||
v = info['state']
|
||||
if 'beta' in v != 'mature' in v:
|
||||
printf('State must be one of <beta, mature> in entry {}'.format(info['title']))
|
||||
printf('State must be one of <"beta", "mature"> in entry "{}"'.format(info['title']))
|
||||
return info # so that the rest can run through
|
||||
|
||||
# extract inactive
|
||||
phrase = 'inactive since '
|
||||
inactive_year = [x[len(phrase):] for x in info['state'] if x.startswith(phrase)]
|
||||
assert len(inactive_year) <= 1
|
||||
if inactive_year:
|
||||
info['inactive'] = inactive_year
|
||||
info['inactive'] = inactive_year[0]
|
||||
|
||||
return info
|
||||
|
||||
@ -367,6 +368,7 @@ def assemble_infos():
|
||||
|
||||
return infos
|
||||
|
||||
|
||||
def generate_statistics():
|
||||
"""
|
||||
Generates the statistics page.
|
||||
@ -378,9 +380,6 @@ def generate_statistics():
|
||||
statistics_path = os.path.join(games_path, 'statistics.md')
|
||||
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
|
||||
|
||||
# assemble infos
|
||||
infos = assemble_infos()
|
||||
|
||||
# total number
|
||||
number_entries = len(infos)
|
||||
rel = lambda x: x / number_entries * 100 # conversion to percent
|
||||
@ -395,53 +394,54 @@ def generate_statistics():
|
||||
statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive))
|
||||
|
||||
if number_inactive > 0:
|
||||
entries_inactive = [(x['file'], x['inactive']) for x in infos if 'inactive' in x]
|
||||
entries_inactive = [(x['title'], x['inactive']) for x in infos if 'inactive' in x]
|
||||
entries_inactive.sort(key=lambda x: x[0]) # first sort by name
|
||||
entries_inactive.sort(key=lambda x: -x[1]) # then sort by inactive year (more recently first)
|
||||
entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
|
||||
entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
|
||||
statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n'
|
||||
|
||||
entries_no_state = [x['file'] for x in infos if 'state' not in x]
|
||||
if entries_no_state:
|
||||
entries_no_state.sort()
|
||||
statistics += '##### Without state tag ({})\n\n'.format(len(entries_no_state)) + ', '.join(entries_no_state) + '\n\n'
|
||||
|
||||
# Language
|
||||
statistics += '## Languages\n\n'
|
||||
number_no_language = sum(1 for x in infos if 'language' not in x)
|
||||
statistics += '## Code Languages\n\n'
|
||||
field = 'code language'
|
||||
|
||||
# those without language tag
|
||||
number_no_language = sum(1 for x in infos if field not in x)
|
||||
if number_no_language > 0:
|
||||
statistics += 'Without language tag: {} ({:.1f}%)\n\n'.format(number_no_language, rel(number_no_language))
|
||||
entries_no_language = [x['file'] for x in infos if 'language' not in x]
|
||||
entries_no_language = [x['title'] for x in infos if field not in x]
|
||||
entries_no_language.sort()
|
||||
statistics += ', '.join(entries_no_language) + '\n\n'
|
||||
|
||||
# get all languages together
|
||||
languages = []
|
||||
for info in infos:
|
||||
if 'language' in info:
|
||||
languages.extend(info['language'])
|
||||
if field in info:
|
||||
languages.extend(info[field])
|
||||
|
||||
unique_languages = set(languages)
|
||||
unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages]
|
||||
unique_languages.sort(key=lambda x: x[0]) # first sort by name
|
||||
unique_languages.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
||||
unique_languages.sort(key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first)
|
||||
unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_languages]
|
||||
statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n'
|
||||
|
||||
# Licenses
|
||||
statistics += '## Code licenses\n\n'
|
||||
number_no_license = sum(1 for x in infos if 'license' not in x)
|
||||
field = 'code license'
|
||||
|
||||
# those without license
|
||||
number_no_license = sum(1 for x in infos if field not in x)
|
||||
if number_no_license > 0:
|
||||
statistics += 'Without license tag: {} ({:.1f}%)\n\n'.format(number_no_license, rel(number_no_license))
|
||||
entries_no_license = [x['file'] for x in infos if 'license' not in x]
|
||||
entries_no_license = [x['title'] for x in infos if field not in x]
|
||||
entries_no_license.sort()
|
||||
statistics += ', '.join(entries_no_license) + '\n\n'
|
||||
|
||||
# get all licenses together
|
||||
licenses = []
|
||||
for info in infos:
|
||||
if 'license' in info:
|
||||
licenses.append(info['license'])
|
||||
if field in info:
|
||||
licenses.extend(info[field])
|
||||
|
||||
unique_licenses = set(licenses)
|
||||
unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses]
|
||||
@ -450,6 +450,23 @@ def generate_statistics():
|
||||
unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_licenses]
|
||||
statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n'
|
||||
|
||||
# Keywords
|
||||
statistics += '## Keywords\n\n'
|
||||
field = 'keywords'
|
||||
|
||||
# get all keywords together
|
||||
keywords = []
|
||||
for info in infos:
|
||||
if field in info:
|
||||
keywords.extend(info[field])
|
||||
|
||||
unique_keywords = set(keywords)
|
||||
unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords]
|
||||
unique_keywords.sort(key=lambda x: x[0]) # first sort by name
|
||||
unique_keywords.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
||||
unique_keywords = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_keywords]
|
||||
statistics += '##### Keywords frequency\n\n' + ''.join(unique_keywords) + '\n'
|
||||
|
||||
with open(statistics_path, mode='w', encoding='utf-8') as f:
|
||||
f.write(statistics)
|
||||
|
||||
@ -460,9 +477,6 @@ def export_json():
|
||||
with a dynamic table in a browser.
|
||||
"""
|
||||
|
||||
# assemble info
|
||||
infos = assemble_infos()
|
||||
|
||||
# make database out of it
|
||||
db = {}
|
||||
db['headings'] = ['Name', 'Download']
|
||||
@ -489,20 +503,23 @@ if __name__ == "__main__":
|
||||
games_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'games'))
|
||||
readme_file = os.path.realpath(os.path.join(games_path, os.pardir, 'README.md'))
|
||||
|
||||
# assemble info
|
||||
infos = assemble_infos()
|
||||
|
||||
# recount and write to readme
|
||||
#update_readme()
|
||||
update_readme()
|
||||
|
||||
# generate list in toc files
|
||||
#update_category_tocs()
|
||||
update_category_tocs()
|
||||
|
||||
# generate report
|
||||
#generate_statistics()
|
||||
generate_statistics()
|
||||
|
||||
# update database for html table
|
||||
export_json()
|
||||
|
||||
# check for unfilled template lines
|
||||
# check_template_leftovers()
|
||||
check_template_leftovers()
|
||||
|
||||
# check external links (only rarely)
|
||||
# check_validity_external_links()
|
||||
|
Reference in New Issue
Block a user