grammars for entries parsing now LALR(1), which is much faster

This commit is contained in:
Trilarion
2020-09-02 22:31:01 +02:00
parent 60fd9ed93e
commit b3f9d3af9b
18 changed files with 147 additions and 49 deletions

View File

@ -1,21 +1,26 @@
start: title description property+ (_E note)? _E building
start: title description property+ note? building
title: "#" /(?! ).+(?<! )/ _NL // not starting or ending with a space
title: "#" /(?! ).+(?<! )/ "\n" _E // not starting or ending with a space
description: "_" /(?! ).+(?<![ _])/ "_" _NL // single line not ending with underscore
property: "-" _key ":" _values _NL
_key : /(?! ).+?(?=:)(?<! )/ // key: everything until next ":", not beginning or ending with a space
_values : [_value ("," _value)*]
_value : quoted_value | unquoted_value
quoted_value : /\".+?\"/ // with quotation marks, can contain commas
unquoted_value : /(?![ \"])[^,\n]+(?<![ \"])/ // cannot contain commas, cannot start or end with quotation mark
description: "_" /(?! ).+(?<![ _])/ "_\n" _E // single line not ending with underscore
note.3: /(?![\-#]).*\n/+ // Unstructured text, not starting with - or #
note: /(?![\-#]).*\n/* // Unstructured text, not starting with - or #
building: "## Building" _NL property* note? // the "building" section
building: "## Building\n" (_E property+)? (_E note)? // the "building" section
property: "-" _key ":" _values "\n"? // a property on a single line "- key: value"
_key: /(?! ).+?(?=:)(?<! )/ // key: everything until next ":", not beginning or ending with a space
_values: [_value ("," _value)*] // a comma separated list
_value: quoted_value | unquoted_value // quoted or unquoted values
quoted_value: /\".+?\"/ // with quotation marks, can contain commas
unquoted_value: /(?![ \"])[^,\n]+(?<![ ])/ // cannot contain commas, cannot start or end with quotation mark
_NUMBER: /[0-9]+/
_E: /^$\n/m // empty new line
CR : /\r/
LF : /\n/
_NL : CR? LF
WS : (" "|/\t/)+
_EL.2 : /^$\n/m
%import common.WS
%ignore WS
%ignore WS
%ignore _EL

View File

@ -1,18 +1,23 @@
start: entry*
entry: "##" name "[" _NUMBER "]\n" property+
property: "-" _key ":" _values "\n"
_key: /(?! ).+?(?=:)(?<! )/ // key: everything until next ":", not beginning or ending with a space
_values: [_value ("," _value)*]
_value: quoted_value | unquoted_value
quoted_value: /\".+?\"/ // with quotation marks, can contain commas
unquoted_value: /(?![ \"])[^,\n]+(?<![ \"])/ // cannot contain commas, cannot start or end with quotation mark
start: _COMMENT _HEADER entry*
entry: "##" name "[" _NUMBER "]" _NL property+
property: "-" _key ":" _values _NL
_key : /(?! ).+?(?=:)(?<! )/ // key: everything until next ":", not beginning or ending with a space
_values : [_value ("," _value)*]
_value : quoted_value | unquoted_value
quoted_value : /\".+?\"/ // with quotation marks, can contain commas
unquoted_value : /(?![ \"])[^,\n]+(?<![ \"])/ // cannot contain commas, cannot start or end with quotation mark
name: /(?! ).+?(?= \[)/ // developer name: everything until " ["
_NUMBER: /[0-9]+/
%import common.WS
CR : /\r/
LF : /\n/
_NL : CR? LF
WS : (" "|/\t/)+
_EL : /^$\n/m
_COMMENT : /^\[comment\]: #.*$\n/m // [comment]: # xxx
_HEADER : /^# .+$\n/m
%ignore WS
%ignore /^\[comment\]: #.*$\n/m // [comment]: # xxx
%ignore /^# .+$\n/m // the line starting with "# "
%ignore /^$\n/m // empty lines
%ignore _EL

View File

@ -2,6 +2,7 @@
Maintenance of inspirations.md and synchronization with the inspirations in the entries.
"""
import time
from utils import constants as c, utils, osg, osg_ui
@ -33,8 +34,14 @@ if __name__ == "__main__":
# assemble info
t0 = time.process_time()
entries = osg.read_entries()
entries = osg.assemble_infos()
print('took {}s'.format(time.process_time()-t0))
t0 = time.process_time()
# entries = osg.assemble_infos()
osg.write_entries(entries)
print('took {}s'.format(time.process_time()-t0))
# assemble inspirations info from entries
entries_inspirations = {}

View File

@ -37,8 +37,10 @@ essential_fields = ('Home', 'State', 'Keywords', 'Code repository', 'Code langua
# only these fields can be used currently (in this order)
valid_fields = (
'Home', 'Media', 'State', 'Play', 'Download', 'Platform', 'Keywords', 'Code repository', 'Code language',
'Code license', 'Code dependencies', 'Assets license', 'Developer', 'Build system', 'Build instructions')
'Home', 'Media', 'Inspirations', 'State', 'Play', 'Download', 'Platform', 'Keywords', 'Code repository', 'Code language',
'Code license', 'Code dependencies', 'Assets license', 'Developer')
valid_building_fields = ('Build system', 'Build instructions')
# these are the only valid platforms currently (and must be given in this order)
valid_platforms = ('Windows', 'Linux', 'macOS', 'Android', 'iOS', 'Web')

View File

@ -86,7 +86,7 @@ class EntryTransformer(lark.Transformer):
"""
if not x:
raise lark.Discard
return 'note', x[0].value
return 'note', ''.join((x.value for x in x))
def building(self, x):
d = {}
@ -401,7 +401,7 @@ def read_and_parse(content_file: str, grammar_file: str, transformer: lark.Trans
"""
content = utils.read_text(content_file)
grammar = utils.read_text(grammar_file)
parser = lark.Lark(grammar, debug=False)
parser = lark.Lark(grammar, debug=False, parser='lalr')
tree = parser.parse(content)
return transformer.transform(tree)
@ -565,7 +565,7 @@ def read_entries():
# setup parser
grammar_file = os.path.join(code_path, 'grammar_entries.lark')
grammar = utils.read_text(grammar_file)
parser = lark.Lark(grammar, debug=False)
parser = lark.Lark(grammar, debug=False, parser='lalr')
# setup transformer
transformer = EntryTransformer()
@ -576,16 +576,20 @@ def read_entries():
# iterate over all entries
for file, _, content in entry_iterator():
print(file)
if not content.endswith('\n'):
content += '\n'
# parse and transform entry content
try:
tree = parser.parse(content)
entry = transformer.transform(tree)
except Exception as e:
print(file)
print(e)
continue
# TODO check entry
# add file information
entry['file'] = file
@ -594,6 +598,74 @@ def read_entries():
return entries
def write_entries(entries):
"""
:return:
"""
# iterate over all entries
entries = entries[:20]
for entry in entries:
write_entry(entry)
def write_entry(entry):
"""
:param entry:
:return:
"""
# TODO check entry
# get path
entry_path = os.path.join(entries_path, entry['file'])
# create output content
content = create_entry_content(entry)
# write entry
utils.write_text(entry_path, content)
def create_entry_content(entry):
"""
:param entry:
:return:
"""
# title and description
content = '# {}\n\n_{}_\n\n'.format(entry['title'], entry['description'])
# now properties in the recommended order
for field in valid_fields:
field_name = field.lower()
if field_name in entry:
content += '- {}: {}\n'.format(field, ', '.join(entry[field_name]))
content += '\n'
# if there is a note, insert it
if 'note' in entry:
content += entry['note'] + '\n'
# building header
content += '## Building\n\n'
# building properties if present
has_properties = False
for field in valid_building_fields:
field_name = field.lower()
if field_name in entry['building']:
has_properties = True
content += '- {}: {}\n'.format(field, ', '.join(entry['building'][field_name]))
# if there is a note, insert it
if 'note' in entry['building']:
if has_properties:
content += '\n'
content += entry['building']['note'] + '\n'
return content
def compare_entries_developers(entries, developers):
"""