change ValueWithComment from composition to Inheritance (simplifies code a lot)

2021-10-06 13:42:21 +02:00
parent b291102272
commit 32907d0498
17 changed files with 277 additions and 238 deletions
--- a/code/utils/constants.py
+++ b/code/utils/constants.py
@ -68,9 +68,10 @@ fields_without_comments = ('Inspiration', 'Play', 'Download', 'Platform', 'Code
 recommended_keywords = (
    'action', 'arcade', 'adventure', 'visual novel', 'sports', 'platform', 'puzzle', 'role playing', 'simulation',
    'strategy', 'cards', 'board', 'music', 'educational', 'tool', 'game engine', 'framework', 'library', 'remake')
+# TODO unmake remake a recommended keyword (should be the same as clone maybe), i.e. add another recommended keyword if only remake is in there

-# non game keywords take precedence
-non_game_keywords = ('framework', 'library', 'tool')
+# non game keywords take precedence over other (game) recommended keywords, at most one of them per entry
+non_game_keywords = ('framework', 'game engine', 'library', 'tool')

 # known programming languages, anything else will result in a warning during a maintenance operation
 # only these will be used when gathering statistics
--- a/code/utils/osg.py
+++ b/code/utils/osg.py
@ -340,11 +340,11 @@ def check_and_process_entry(entry):
    if canonical_file_name != file and canonical_file_name != file[:-5] + '.md':
        message += 'file name should be {}\n'.format(canonical_file_name)

-    # check that fields without comments have no comments, set to field without comment
+    # check that fields without comments have no comments (i.e. are no Values)
    for field in c.fields_without_comments:
        if field in entry:
            content = entry[field]
-            if any(item.has_comment() for item in content):
+            if any(isinstance(item, osg_parse.Value) for item in content):
                message += 'field without comments {} has comment\n'.format(field)

    # state must contain either beta or mature but not both
@ -359,8 +359,8 @@ def check_and_process_entry(entry):
    for field in c.url_fields:
        values = entry.get(field, [])
        for value in values:
-            if value.value.startswith('<') and value.value.endswith('>'):
-                value.value = value.value[1:-1]
+            if value.startswith('<') and value.endswith('>'):
+                value = value[1:-1]
            if not any(value.startswith(x) for x in c.valid_url_prefixes):
                message += 'URL "{}" in field "{}" does not start with a valid prefix'.format(value, field)

@ -368,7 +368,7 @@ def check_and_process_entry(entry):
    for repo in entry.get('Code repository', []):
        if any(repo.startswith(x) for x in ('@', '?')):
            continue
-        repo = repo.value.split(' ')[0].strip()
+        repo = repo.split(' ')[0].strip()
        if any((x in repo for x in ('github', 'gitlab', 'git.tuxfamily', 'git.savannah'))):
                if not repo.startswith('https://'):
                    message += 'Repo "{}" should start with https://'.format(repo)
@ -420,7 +420,7 @@ def is_inactive(entry):
 def extract_inactive_year(entry):
    state = entry['State']
    phrase = 'inactive since '
-    inactive_year = [x.value[len(phrase):] for x in state if x.startswith(phrase)]
+    inactive_year = [x[len(phrase):] for x in state if x.startswith(phrase)]
    assert len(inactive_year) <= 1
    if inactive_year:
        return int(inactive_year[0])
@ -457,9 +457,29 @@ def write_entry(entry):
    utils.write_text(entry_path, content)


-def create_entry_content(entry):
+def render_value(value):
    """

+    :param value:
+    :return:
+    """
+    if isinstance(value, osg_parse.Value):
+        comment = value.comment
+    else:
+        comment = None
+    if any(x in value for x in (',', ' (')):
+        value = '"{}"'.format(value)
+    if comment:
+        return '{} ({})'.format(value, comment)
+    else:
+        return value
+
+
+def create_entry_content(entry):
+    """
+    Creates the entry content from an internal representation as dictionary with fields to a text file representation
+    that can be stored in the md files. It should be compatible with the gramar and reading a file and re-creating the
+    content should not change the content. Importanly, the comments of the values have to be added here.
    :param entry:
    :return:
    """
@ -468,7 +488,7 @@ def create_entry_content(entry):
    content = '# {}\n\n'.format(entry['Title'])

    # we automatically sort some fields
-    sort_fun = lambda x: str.casefold(x.value)
+    sort_fun = lambda x: str.casefold(x)
    for field in ('Media', 'Inspiration', 'Code Language', 'Developer', 'Build system'):
        if field in entry:
            values = entry[field]
@ -479,12 +499,11 @@ def create_entry_content(entry):
    b = [x for x in keywords if x not in c.recommended_keywords]
    entry['Keyword'] = sorted(a, key=sort_fun) + sorted(b, key=sort_fun)

-    # now all properties in the recommended order
+    # now all properties are in the recommended order
    for field in c.valid_properties:
        if field in entry:
            e = entry[field]
-            e = ['"{}"'.format(x) if any(y in x.value for y in (',', ' (')) else x for x in e]
-            e = [str(x) for x in e]
+            e = [render_value(x) for x in e]
            e = list(dict.fromkeys(e))  # this removes duplicates while keeping the sorting order
            content += '- {}: {}\n'.format(field, ', '.join(e))
    content += '\n'
@ -504,8 +523,8 @@ def create_entry_content(entry):
                has_properties = True
                content += '\n'
            e = entry['Building'][field]
-            e = ['"{}"'.format(x) if ',' in x else x for x in e]
-            e = [str(x) for x in e]
+            e = [render_value(x) for x in e]
+            e = list(dict.fromkeys(e))  # this removes duplicates while keeping the sorting order
            content += '- {}: {}\n'.format(field, ', '.join(e))

    # if there is a note, insert it
@ -533,16 +552,14 @@ def all_urls(entries):
    :param entries: 
    :return: 
    """
+    # TODO there are other fields than c.url_fields and also in comments, maybe just regex on the whole content
+    # TODO this might be part of the external link check or it might not, check for duplicate code
    urls = {}
    # iterate over entries
    for entry in entries:
        file = entry['File']
-        for field in c.url_fields:  # TODO there are other fields, maybe just regex on the whole content
+        for field in c.url_fields:
            for value in entry.get(field, []):
-                if value.comment:
-                    value = value.value + ' ' + value.comment
-                else:
-                    value = value.value
                for subvalue in value.split(' '):
                    subvalue = subvalue.strip()
                    if is_url(subvalue):
--- a/code/utils/osg_parse.py
+++ b/code/utils/osg_parse.py
@ -14,10 +14,10 @@ class ListingTransformer(lark.Transformer):
    """

    def unquoted_value(self, x):
-        return x[0].value.strip()
+        return x[0].strip()

    def quoted_value(self, x):
-        return x[0].value[1:-1].strip()  # remove quotation marks and strip whitespaces
+        return x[0][1:-1].strip()  # remove quotation marks and strip whitespaces

    def property(self, x):
        """
@ -25,7 +25,7 @@ class ListingTransformer(lark.Transformer):
        :param x:
        :return:
        """
-        return x[0].value, x[1:]
+        return x[0], x[1:]

    def name(self, x):
        """
@ -33,7 +33,7 @@ class ListingTransformer(lark.Transformer):
        :param x:
        :return:
        """
-        return 'Name', x[0].value.strip()
+        return 'Name', x[0].strip()

    def entry(self, x):
        """
@ -56,19 +56,25 @@ class ListingTransformer(lark.Transformer):
 class EntryTransformer(lark.Transformer):

    def unquoted_value(self, x):
-        return x[0].value.strip()
+        return x[0].strip()

    def quoted_value(self, x):
-        return x[0].value[1:-1].strip()  # remove quotation marks
+        return x[0][1:-1].strip()  # remove quotation marks

    def comment_value(self, x):
-        return x[0].value[1:-1].strip()  # remove parenthesis
+        return x[0][1:-1].strip()  # remove parenthesis

    def value(self, x):
+        """
+        This also stores the comment if needed.
+
+        :param x:
+        :return:
+        """
        if len(x) == 1:
-            v = ValueWithComment(value=x[0])
+            v = x[0]
        else:
-            v = ValueWithComment(value=x[0], comment=x[1])
+            v = Value(*x)
        return v

    def property(self, x):
@ -77,10 +83,10 @@ class EntryTransformer(lark.Transformer):
        :param x:
        :return:
        """
-        return x[0].value.strip(), x[1:]
+        return x[0].strip(), x[1:]

    def title(self, x):
-        return 'Title', x[0].value.strip()
+        return 'Title', x[0].strip()

    def note(self, x):
        """
@ -90,7 +96,7 @@ class EntryTransformer(lark.Transformer):
        """
        if not x:
            raise lark.Discard
-        return 'Note', ''.join((x.value for x in x))
+        return 'Note', ''.join(x)

    def building(self, x):
        return 'Building', x
@ -98,40 +104,16 @@ class EntryTransformer(lark.Transformer):
    def start(self, x):
        return x

-# TODO turns out ValueWithComment does not really solve problem but actually creates even some, are there alternatives like inheriting from string?
-class ValueWithComment:
+
+class Value(str):
    """
-      All our property values can have (optional) comments. This is the class that represents them to us and implements
-      equality and 'in' operator functionality purely on the value.
+    A value is a string with some additional meta object (a comment) but mostly behaves as a string.
    """

-    def __init__(self, value, comment=None):
-        self.value = value
-        self.comment = comment
-
-    def is_empty(self):
-        return self.value == ''
-
-    def has_comment(self):
-        return self.comment is not None
-
-    def startswith(self, str):
-        return self.value.startswith(str)
-
-    def __contains__(self, item):
-        return item in self.value
-
-    def __eq__(self, other):
-        return self.value == other
-
-    def __repr__(self):
-        if self.comment:
-            return '{} ({})'.format(self.value, self.comment)
-        else:
-            return '{}'.format(self.value)
-
-    def __hash__(self):
-        return hash(self.value)
+    def __new__(cls, value, comment):
+        obj = str.__new__(cls, value)
+        obj.comment = comment
+        return obj

 def parse(parser, transformer, content):
    tree = parser.parse(content)
--- a/code/utils/osg_statistics.py
+++ b/code/utils/osg_statistics.py
@ -1,4 +1,23 @@
 """
+Central place to calculate statistics about the entries. Used for updating the statistics.md file and the statistics page
+of the website.
+"""

-"""

+def get_build_systems(entries):
+    """
+    Given a list of entries, calculates statistics about the used build systems and returns the statistics as
+    sorted list of elements (build-system-name, occurence).
+    "n/a" is used if no build system was specified
+    """
+    build_systems = []
+    for entry in entries:
+        build_systems.extend(entry['Building'].get('Build system', ['n/a']))
+
+    unique_build_systems = set(build_systems)
+
+    build_systems_stat = [(l, build_systems.count(l)) for l in unique_build_systems]
+    build_systems_stat.sort(key=lambda x: str.casefold(x[0]))  # first sort by name
+    build_systems_stat.sort(key=lambda x: -x[1])  # then sort by occurrence (highest occurrence first)
+
+    return build_systems_stat