generate statistics again and fixing inconsistencies across the entries, html output with vanilla-datatable included

2018-06-08 14:53:51 +02:00
parent 569c8388ef
commit 8220365691
86 changed files with 2921 additions and 231 deletions
--- a/tools/maintenance.py
+++ b/tools/maintenance.py
@ -310,23 +310,24 @@ def parse_entry(content):
    # checks

    # essential fields
-    essential_fields = ['home', 'state']
+    essential_fields = ['home', 'state', 'code repository']
    for field in essential_fields:
        if field not in info:
-            print('Essential field "{}" missing in entry {}'.format(field, info['title']))
+            print('Essential field "{}" missing in entry "{}"'.format(field, info['title']))
            return info # so that the rest can run through

    # state must contain either beta or mature but not both
    v = info['state']
    if 'beta' in v != 'mature' in v:
-        printf('State must be one of <beta, mature> in entry {}'.format(info['title']))
+        printf('State must be one of <"beta", "mature"> in entry "{}"'.format(info['title']))
        return info # so that the rest can run through

    # extract inactive
    phrase = 'inactive since '
    inactive_year = [x[len(phrase):] for x in info['state'] if x.startswith(phrase)]
+    assert len(inactive_year) <= 1
    if inactive_year:
-        info['inactive'] = inactive_year
+        info['inactive'] = inactive_year[0]

    return info

@ -367,6 +368,7 @@ def assemble_infos():

    return infos

+
 def generate_statistics():
    """
    Generates the statistics page.
@ -378,9 +380,6 @@ def generate_statistics():
    statistics_path = os.path.join(games_path, 'statistics.md')
    statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'

-    # assemble infos
-    infos = assemble_infos()
-
    # total number
    number_entries = len(infos)
    rel = lambda x: x / number_entries * 100 # conversion to percent
@ -395,53 +394,54 @@ def generate_statistics():
    statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive))

    if number_inactive > 0:
-        entries_inactive = [(x['file'], x['inactive']) for x in infos if 'inactive' in x]
+        entries_inactive = [(x['title'], x['inactive']) for x in infos if 'inactive' in x]
        entries_inactive.sort(key=lambda x: x[0])  # first sort by name
-        entries_inactive.sort(key=lambda x: -x[1]) # then sort by inactive year (more recently first)
+        entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
        entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
        statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n'

-    entries_no_state = [x['file'] for x in infos if 'state' not in x]
-    if entries_no_state:
-        entries_no_state.sort()
-        statistics += '##### Without state tag ({})\n\n'.format(len(entries_no_state)) + ', '.join(entries_no_state) + '\n\n'
-
    # Language
-    statistics += '## Languages\n\n'
-    number_no_language = sum(1 for x in infos if 'language' not in x)
+    statistics += '## Code Languages\n\n'
+    field = 'code language'
+
+    # those without language tag
+    number_no_language = sum(1 for x in infos if field not in x)
    if number_no_language > 0:
        statistics += 'Without language tag: {} ({:.1f}%)\n\n'.format(number_no_language, rel(number_no_language))
-        entries_no_language = [x['file'] for x in infos if 'language' not in x]
+        entries_no_language = [x['title'] for x in infos if field not in x]
        entries_no_language.sort()
        statistics += ', '.join(entries_no_language) + '\n\n'

    # get all languages together
    languages = []
    for info in infos:
-        if 'language' in info:
-            languages.extend(info['language'])
+        if field in info:
+            languages.extend(info[field])

    unique_languages = set(languages)
    unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages]
    unique_languages.sort(key=lambda x: x[0]) # first sort by name
-    unique_languages.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
+    unique_languages.sort(key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first)
    unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_languages]
    statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n'

    # Licenses
    statistics += '## Code licenses\n\n'
-    number_no_license = sum(1 for x in infos if 'license' not in x)
+    field = 'code license'
+
+    # those without license
+    number_no_license = sum(1 for x in infos if field not in x)
    if number_no_license > 0:
        statistics += 'Without license tag: {} ({:.1f}%)\n\n'.format(number_no_license, rel(number_no_license))
-        entries_no_license = [x['file'] for x in infos if 'license' not in x]
+        entries_no_license = [x['title'] for x in infos if field not in x]
        entries_no_license.sort()
        statistics += ', '.join(entries_no_license) + '\n\n'

    # get all licenses together
    licenses = []
    for info in infos:
-        if 'license' in info:
-            licenses.append(info['license'])
+        if field in info:
+            licenses.extend(info[field])

    unique_licenses = set(licenses)
    unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses]
@ -450,6 +450,23 @@ def generate_statistics():
    unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_licenses]
    statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n'

+    # Keywords
+    statistics += '## Keywords\n\n'
+    field = 'keywords'
+
+    # get all keywords together
+    keywords = []
+    for info in infos:
+        if field in info:
+            keywords.extend(info[field])
+
+    unique_keywords = set(keywords)
+    unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords]
+    unique_keywords.sort(key=lambda x: x[0]) # first sort by name
+    unique_keywords.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
+    unique_keywords = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_keywords]
+    statistics += '##### Keywords frequency\n\n' + ''.join(unique_keywords) + '\n'
+
    with open(statistics_path, mode='w', encoding='utf-8') as f:
        f.write(statistics)

@ -460,9 +477,6 @@ def export_json():
    with a dynamic table in a browser.
    """

-    # assemble info
-    infos = assemble_infos()
-
    # make database out of it
    db = {}
    db['headings'] = ['Name', 'Download']
@ -489,20 +503,23 @@ if __name__ == "__main__":
    games_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'games'))
    readme_file = os.path.realpath(os.path.join(games_path, os.pardir, 'README.md'))

+    # assemble info
+    infos = assemble_infos()
+
    # recount and write to readme
-    #update_readme()
+    update_readme()

    # generate list in toc files
-    #update_category_tocs()
+    update_category_tocs()

    # generate report
-    #generate_statistics()
+    generate_statistics()

    # update database for html table
    export_json()

    # check for unfilled template lines
-    # check_template_leftovers()
+    check_template_leftovers()

    # check external links (only rarely)
    # check_validity_external_links()