more statistics

2017-12-11 14:39:21 +01:00
parent 3cbf42e011
commit 3617df6c6d
3 changed files with 60 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -22,7 +22,7 @@ or fork this repository and submit a pull request.

 Help: [MarkDown Help](https://help.github.com/articles/github-flavored-markdown), [Markdown Cheatsheet](https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet)

-See also the [statistics](games/statistics.md) page showing interesting infos about code repositories, usage of licenses etc.
+See also the **[statistics](games/statistics.md)** page showing interesting infos about code repositories, usage of licenses etc.

 ## Background

--- a/games/maintenance.py
+++ b/games/maintenance.py
@ -298,6 +298,24 @@ def check_template_leftovers():
                if content.find(check_string) >= 0:
                    print('{}: found {}'.format(os.path.basename(entry_path), check_string))

+def parse_entry(content):
+    """
+    Returns a dictionary of the features of the content
+    """
+
+    info = {}
+
+    # language
+    regex = re.compile(r"- Language\(s\): (.*)")
+    matches = regex.findall(content)
+    if matches:
+        languages = matches[0].split(',')
+        languages = [x.strip() for x in languages]
+        info['Language'] = languages
+
+    return info
+
+
 def generate_statistics():
    """

@ -309,6 +327,7 @@ def generate_statistics():
    category_paths = get_category_paths()

    # for each category
+    infos = []
    for category_path in category_paths:
        # get paths of all entries in this category
        entry_paths = get_entry_paths(category_path)
@ -318,6 +337,37 @@ def generate_statistics():
            with open(entry_path) as f:
                content = f.read()

+            info = parse_entry(content)
+            info['file'] = os.path.basename(entry_path)
+            infos.append(info)
+
+    # total number
+    number_entries = len(infos)
+    statistics += 'analyzed {} entries\n\n'.format(number_entries)
+
+
+    # Language
+    statistics += '## Languages\n\n'
+    number_no_language = sum(1 for x in infos if 'Language' not in x)
+    if number_no_language > 0:
+        statistics += '{} ({:.1f}%) have no language tag\n'.format(number_no_language, number_no_language / number_entries * 100)
+        entries_no_language = [x['file'][:-3] for x in infos if 'Language' not in x] # [:-3] to cut off the .md
+        entries_no_language.sort()
+        statistics += '  ' + ', '.join(entries_no_language) + '\n\n'
+
+    # get all languages together
+    languages = []
+    for info in infos:
+        if 'Language' in info:
+            languages.extend(info['Language'])
+
+    unique_languages = set(languages)
+    unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages]
+    unique_languages.sort(key=lambda x: -x[1])
+    unique_languages = ['{} ({:.1f}%)'.format(x[0], x[1]*100) for x in unique_languages]
+    statistics += ', '.join(unique_languages) + '\n\n'
+
+
    with open(statistics_path, 'w') as f:
        f.write(statistics)

--- a/games/statistics.md
+++ b/games/statistics.md
@ -1,3 +1,12 @@
 [comment]: # (autogenerated content, do not edit)
 # Statistics

+analyzed 180 entries
+
+## Languages
+
+15 (8.3%) have no language tag
+  crimson_fields, dragon_history, evol_online, land_of_fire, meritous, murder_in_the_public_domain, open_general, openal, openrpgmaker, parpg, solaris, space_station_13, star_maiden_rio, vegatrek, xconq
+
+C++ (42.3%), C (24.5%), Java (7.2%), Python (6.7%), Lua (3.8%), Ren'py (1.9%), C# (1.4%), Javascript (1.4%), Haxe (1.0%), HTML (1.0%), PHP (1.0%), D (0.5%), Kotlin (0.5%), Clojure (0.5%), Pascal (0.5%), Delphi Pascal (0.5%), GDScript (0.5%), Objective C (0.5%), Typescript (0.5%), Java 8 (0.5%), PHO (0.5%), Ada (0.5%), Basic (0.5%), Delphi (0.5%), Angelscript (0.5%), VB (0.5%), ActionScript (0.5%)
+