tested grammar for reading entries (parsing quite slow)

This commit is contained in:
Trilarion
2020-09-02 14:45:25 +02:00
parent a1ce1809f3
commit 60fd9ed93e
21 changed files with 118 additions and 50 deletions

View File

@ -1,15 +1,21 @@
start: title description property+ _E note? _E? building
start: title description property+ (_E note)? _E building
title: "# " /(?! ).+(?<! )/ "\n" _E // not starting or ending with a space
title: "#" /(?! ).+(?<! )/ "\n" _E // not starting or ending with a space
description: "_" /(?! ).+(?<![ _])/ "_\n" _E // single line not ending with underscore
property: "- " _key ": " _value "\n" // a property on a single line "- key: value"
_key: /(?! ).+?(?=:)/ // key: everything until next ":"
_value: /.+(?<! )/ // everything until the end of the line
building: "## Building\n" _E property+ _E? note // the "building" section
note: /(?![\-#]).*\n/* // Unstructured text, not starting with - or #
_E: /^$\n/m // empty new line
building: "## Building\n" (_E property+)? (_E note)? // the "building" section
property: "-" _key ":" _values "\n"? // a property on a single line "- key: value"
_key: /(?! ).+?(?=:)(?<! )/ // key: everything until next ":", not beginning or ending with a space
_values: [_value ("," _value)*] // a comma separated list
_value: quoted_value | unquoted_value // quoted or unquoted values
quoted_value: /\".+?\"/ // with quotation marks, can contain commas
unquoted_value: /(?![ \"])[^,\n]+(?<![ ])/ // cannot contain commas, cannot start or end with quotation mark
_E: /^$\n/m // empty new line
%import common.WS
%ignore WS

View File

@ -28,11 +28,12 @@ if __name__ == "__main__":
print('{} inspirations in the inspirations database'.format(len(inspirations)))
osg.write_inspirations_info(inspirations) # write again just to check integrity
osg_ui.run_simple_button_app('Maintenance inspirations', (('Duplicate check', duplicate_check),))
#osg_ui.run_simple_button_app('Maintenance inspirations', (('Duplicate check', duplicate_check),))
# assemble info
entries = osg.read_entries()
entries = osg.assemble_infos()
# assemble inspirations info from entries

View File

@ -58,11 +58,19 @@ class ListingTransformer(lark.Transformer):
# transformer
class EntryTransformer(lark.Transformer):
def start(self, x):
d = {}
for key, value in x:
d[key] = value
return d
def unquoted_value(self, x):
return x[0].value
def quoted_value(self, x):
return x[0].value[1:-1] # remove quotation marks
def property(self, x):
"""
The key of a property will be converted to lower case and the value part is the second part
:param x:
:return:
"""
return x[0].lower(), x[1:]
def title(self, x):
return 'title', x[0].value
@ -70,18 +78,32 @@ class EntryTransformer(lark.Transformer):
def description(self, x):
return 'description', x[0].value
def property(self, x):
return str.casefold(x[0].value), x[1].value
def note(self, x):
"""
Optional
:param x:
:return:
"""
if not x:
raise lark.Discard
return 'note', x[0].value
def building(self, x):
d = {}
for key, value in x:
if key in d:
raise RuntimeError('Key in entry appears twice')
d[key] = value
return 'building', d
def start(self, x):
d = {}
for key, value in x:
if key in d:
raise RuntimeError('Key in entry appears twice')
d[key] = value
return d
regex_sanitize_name = re.compile(r"[^A-Za-z 0-9-+]+")
regex_sanitize_name_space_eater = re.compile(r" +")
@ -535,6 +557,44 @@ def write_inspirations_info(inspirations):
utils.write_text(inspirations_file, content)
def read_entries():
"""
Parses all entries and assembles interesting infos about them.
"""
# setup parser
grammar_file = os.path.join(code_path, 'grammar_entries.lark')
grammar = utils.read_text(grammar_file)
parser = lark.Lark(grammar, debug=False)
# setup transformer
transformer = EntryTransformer()
# a database of all important infos about the entries
entries = []
# iterate over all entries
for file, _, content in entry_iterator():
print(file)
# parse and transform entry content
try:
tree = parser.parse(content)
entry = transformer.transform(tree)
except Exception as e:
print(e)
continue
# add file information
entry['file'] = file
# add to list
entries.append(entry)
return entries
def compare_entries_developers(entries, developers):
"""
Cross checks the game entries lists and the developers lists.