589 lines
19 KiB
Python
589 lines
19 KiB
Python
"""
|
|
Specific functions working on the games.
|
|
"""
|
|
|
|
import re
|
|
import os
|
|
from difflib import SequenceMatcher
|
|
from utils import utils, osg_parse, constants as c
|
|
|
|
regex_sanitize_name = re.compile(r"[^A-Za-z 0-9-+]+")
|
|
regex_sanitize_name_space_eater = re.compile(r" +")
|
|
|
|
|
|
def name_similarity(a, b):
|
|
return SequenceMatcher(None, str.casefold(a), str.casefold(b)).ratio()
|
|
|
|
|
|
def entry_iterator():
|
|
"""
|
|
|
|
"""
|
|
|
|
# get all entries (ignore everything starting with underscore)
|
|
entries = os.listdir(c.entries_path)
|
|
|
|
# iterate over all entries
|
|
for entry in entries:
|
|
entry_path = os.path.join(c.entries_path, entry)
|
|
|
|
# ignore directories ("tocs" for example)
|
|
if os.path.isdir(entry_path):
|
|
continue
|
|
|
|
# read entry
|
|
content = utils.read_text(entry_path)
|
|
|
|
# yield
|
|
yield entry, entry_path, content
|
|
|
|
|
|
def canonical_name(name):
|
|
"""
|
|
Derives a canonical name from an actual name (suitable for file names, anchor names, ...)
|
|
"""
|
|
name = name.casefold()
|
|
name = name.replace('ö', 'o').replace('ä', 'a').replace('ü', 'u')
|
|
name = regex_sanitize_name.sub('', name)
|
|
name = regex_sanitize_name_space_eater.sub('_', name)
|
|
name = name.replace('_-_', '-')
|
|
name = name.replace('--', '-').replace('--', '-')
|
|
|
|
return name
|
|
|
|
|
|
def read_developers():
|
|
"""
|
|
|
|
:return:
|
|
"""
|
|
grammar_file = os.path.join(c.code_path, 'grammar_listing.lark')
|
|
developers = osg_parse.read_and_parse(c.developer_file, grammar_file, osg_parse.ListingTransformer)
|
|
|
|
# now developers is a list of dictionaries for every entry with some properties
|
|
|
|
# check for duplicate names entries
|
|
names = [dev['Name'] for dev in developers]
|
|
duplicate_names = (name for name in names if names.count(name) > 1)
|
|
duplicate_names = set(duplicate_names) # to avoid duplicates in duplicate_names
|
|
if duplicate_names:
|
|
print('Warning: duplicate developer names: {}'.format(', '.join(duplicate_names)))
|
|
|
|
# check for essential, valid fields
|
|
for dev in developers:
|
|
# check that essential fields are existing
|
|
for field in c.essential_developer_fields:
|
|
if field not in dev:
|
|
raise RuntimeError('Essential field "{}" missing in developer {}'.format(field, dev['Name']))
|
|
# check that all fields are valid fields
|
|
for field in dev.keys():
|
|
if field not in c.valid_developer_fields:
|
|
raise RuntimeError('Invalid field "{}" in developer {}.'.format(field, dev['Name']))
|
|
# url fields
|
|
for field in c.url_developer_fields:
|
|
if field in dev:
|
|
content = dev[field]
|
|
if any(not (x.startswith('http://') or x.startswith('https://')) for x in content):
|
|
raise RuntimeError('Invalid URL in field "{}" in developer {}.'.format(field, dev['Name']))
|
|
|
|
# convert to dictionary
|
|
developers = {x['Name']: x for x in developers}
|
|
|
|
return developers
|
|
|
|
|
|
def write_developers(developers):
|
|
"""
|
|
|
|
:return:
|
|
"""
|
|
# convert dictionary to list
|
|
developers = list(developers.values())
|
|
|
|
# comment
|
|
content = '{}\n'.format(c.generic_comment_string)
|
|
|
|
# number of developer
|
|
content += '# Developer [{}]\n\n'.format(len(developers))
|
|
|
|
# sort by name
|
|
developers.sort(key=lambda x: str.casefold(x['Name']))
|
|
|
|
# iterate over them
|
|
for dev in developers:
|
|
keys = list(dev.keys())
|
|
# developer name
|
|
content += '## {} [{}]\n\n'.format(dev['Name'], len(dev['Games']))
|
|
keys.remove('Name')
|
|
|
|
# all the remaining in alphabetical order, but 'games' first
|
|
keys.remove('Games')
|
|
keys.sort()
|
|
keys = ['Games'] + keys
|
|
for field in keys:
|
|
value = dev[field]
|
|
# lists get special treatment
|
|
if isinstance(value, list):
|
|
value.sort(key=str.casefold)
|
|
value = [x if not ',' in x else '"{}"'.format(x) for x in value] # surround those with a comma with quotation marks
|
|
value = ', '.join(value)
|
|
content += '- {}: {}\n'.format(field, value)
|
|
content += '\n'
|
|
|
|
# write
|
|
utils.write_text(c.developer_file, content)
|
|
|
|
|
|
def read_inspirations():
|
|
"""
|
|
Reads the info list about the games originals/inspirations from inspirations.md using the Lark parser grammar
|
|
in grammar_listing.lark
|
|
:return:
|
|
"""
|
|
# read inspirations
|
|
|
|
# read and parse inspirations
|
|
grammar_file = os.path.join(c.code_path, 'grammar_listing.lark')
|
|
inspirations = osg_parse.read_and_parse(c.inspirations_file, grammar_file, osg_parse.ListingTransformer)
|
|
|
|
# now inspirations is a list of dictionaries for every entry with some properties
|
|
|
|
# check for duplicate names entries
|
|
names = [inspiration['Name'] for inspiration in inspirations]
|
|
duplicate_names = (name for name in names if names.count(name) > 1)
|
|
duplicate_names = set(duplicate_names) # to avoid duplicates in duplicate_names
|
|
if duplicate_names:
|
|
raise RuntimeError('Duplicate inspiration names: {}'.format(', '.join(duplicate_names)))
|
|
|
|
# check for essential, valid fields
|
|
for inspiration in inspirations:
|
|
# check that essential fields are existing
|
|
for field in c.essential_inspiration_fields:
|
|
if field not in inspiration:
|
|
raise RuntimeError('Essential field "{}" missing in inspiration {}'.format(field, inspiration['Name']))
|
|
# check that all fields are valid fields
|
|
for field in inspiration.keys():
|
|
if field not in c.valid_inspiration_fields:
|
|
raise RuntimeError('Invalid field "{}" in inspiration {}.'.format(field, inspiration['Name']))
|
|
# url fields
|
|
for field in c.url_inspiration_fields:
|
|
if field in inspiration:
|
|
content = inspiration[field]
|
|
if any(not (x.startswith('http://') or x.startswith('https://')) for x in content):
|
|
raise RuntimeError('Invalid URL in field "{}" in inspiration {}.'.format(field, inspiration['Name']))
|
|
|
|
# convert to dictionary
|
|
inspirations = {x['Name']: x for x in inspirations}
|
|
|
|
return inspirations
|
|
|
|
|
|
def write_inspirations(inspirations):
|
|
"""
|
|
Given an internal dictionary of inspirations, write it into the inspirations file
|
|
:param inspirations:
|
|
:return:
|
|
"""
|
|
# convert dictionary to list
|
|
inspirations = list(inspirations.values())
|
|
|
|
# comment
|
|
content = '{}\n'.format(c.generic_comment_string)
|
|
|
|
# updated number of inspirations
|
|
content += '# Inspirations [{}]\n\n'.format(len(inspirations))
|
|
|
|
# sort by name
|
|
inspirations.sort(key=lambda x: str.casefold(x['Name']))
|
|
|
|
# iterate over them
|
|
for inspiration in inspirations:
|
|
keys = list(inspiration.keys())
|
|
# inspiration name
|
|
content += '## {} [{}]\n\n'.format(inspiration['Name'], len(inspiration['Inspired entries']))
|
|
keys.remove('Name')
|
|
|
|
# all the remaining in alphabetical order, but "inspired entries" first
|
|
keys.remove('Inspired entries')
|
|
keys.sort()
|
|
keys = ['Inspired entries'] + keys
|
|
for field in keys:
|
|
value = inspiration[field]
|
|
# lists get special treatment
|
|
if isinstance(value, list):
|
|
value.sort(key=str.casefold) # sorted alphabetically
|
|
value = [x if not ',' in x else '"{}"'.format(x) for x in value] # surround those with a comma with quotation marks
|
|
value = ', '.join(value)
|
|
content += '- {}: {}\n'.format(field, value)
|
|
content += '\n'
|
|
|
|
# write
|
|
utils.write_text(c.inspirations_file, content)
|
|
|
|
|
|
def read_entries():
|
|
"""
|
|
Parses all entries and assembles interesting infos about them.
|
|
"""
|
|
|
|
# setup parser and transformer
|
|
grammar_file = os.path.join(c.code_path, 'grammar_entries.lark')
|
|
grammar = utils.read_text(grammar_file)
|
|
parse = osg_parse.create(grammar, osg_parse.EntryTransformer)
|
|
|
|
# a database of all important infos about the entries
|
|
entries = []
|
|
|
|
# iterate over all entries
|
|
exception_happened = False
|
|
for file, _, content in entry_iterator():
|
|
|
|
if not content.endswith('\n'):
|
|
content += '\n'
|
|
|
|
# parse and transform entry content
|
|
try:
|
|
entry = parse(content)
|
|
entry = [('File', file),] + entry # add file information to the beginning
|
|
entry = check_and_process_entry(entry)
|
|
except Exception as e:
|
|
print('{} - {}'.format(file, e))
|
|
exception_happened = True
|
|
# raise RuntimeError(e)
|
|
continue
|
|
|
|
# add to list
|
|
entries.append(entry)
|
|
if exception_happened:
|
|
raise RuntimeError('errors while reading entries')
|
|
|
|
return entries
|
|
|
|
|
|
def read_entry(file):
|
|
"""
|
|
Reads a single entry
|
|
:param file: the entry file (without path)
|
|
:return: the entry
|
|
"""
|
|
|
|
# setup parser and transformer
|
|
grammar_file = os.path.join(c.code_path, 'grammar_entries.lark')
|
|
grammar = utils.read_text(grammar_file)
|
|
parse = osg_parse.create(grammar, osg_parse.EntryTransformer)
|
|
|
|
# read entry file
|
|
content = utils.read_text(os.path.join(c.entries_path, file))
|
|
if not content.endswith('\n'):
|
|
content += '\n'
|
|
|
|
# parse and transform entry content
|
|
try:
|
|
entry = parse(content)
|
|
entry = [('File', file),] + entry # add file information to the beginning
|
|
entry = check_and_process_entry(entry)
|
|
except Exception as e:
|
|
print('{} - {}'.format(file, e))
|
|
raise RuntimeError(e)
|
|
|
|
return entry
|
|
|
|
|
|
def check_and_process_entry(entry):
|
|
message = ''
|
|
|
|
# check that all fields are valid fields and are existing in that order
|
|
index = 0
|
|
for e in entry:
|
|
field = e[0]
|
|
while index < len(c.valid_fields) and field != c.valid_fields[index]:
|
|
index += 1
|
|
if index == len(c.valid_fields): # must be valid fields and must be in the right order
|
|
message += 'Field "{}" either misspelled or in wrong order\n'.format(field)
|
|
|
|
# order is fine we can convert to dictionary
|
|
d = {}
|
|
for field, value in entry:
|
|
if field in d:
|
|
message += 'Field "{}" appears twice\n'.format(field)
|
|
d[field] = value
|
|
entry = d
|
|
|
|
# check for essential fields
|
|
for field in c.essential_fields:
|
|
if field not in entry:
|
|
message += 'Essential property "{}" missing\n'.format(field)
|
|
|
|
# now the same treatment for building
|
|
building = entry['Building']
|
|
d = {}
|
|
for field, value in building:
|
|
if field in d:
|
|
message += 'Field "{}" appears twice\n'.format(field)
|
|
d[field] = value
|
|
building = d
|
|
|
|
# check valid fields in building TODO should also check order
|
|
for field in building.keys():
|
|
if field not in c.valid_building_fields:
|
|
message += 'Building field "{}" invalid\n'.format(field)
|
|
entry['Building'] = building
|
|
|
|
# check canonical file name
|
|
file = entry['File']
|
|
canonical_file_name = canonical_name(entry['Title']) + '.md'
|
|
# we also allow -X with X =2..9 as possible extension (because of duplicate canonical file names)
|
|
if canonical_file_name != file and canonical_file_name != file[:-5] + '.md':
|
|
message += 'file name should be {}\n'.format(canonical_file_name)
|
|
|
|
# state must contain either beta or mature but not both
|
|
state = entry['State']
|
|
for t in state:
|
|
if t != 'beta' and t != 'mature' and not t.startswith('inactive since '):
|
|
message += 'Unknown state "{}"'.format(t)
|
|
if 'beta' in state == 'mature' in state:
|
|
message += 'State must be one of <"beta", "mature">'
|
|
|
|
# check urls
|
|
for field in c.url_fields:
|
|
values = entry.get(field, [])
|
|
for value in values:
|
|
if value.value.startswith('<') and value.value.endswith('>'):
|
|
value.value = value.value[1:-1]
|
|
if not any(value.startswith(x) for x in c.extended_valid_url_prefixes):
|
|
message += 'URL "{}" in field "{}" does not start with a valid prefix'.format(value, field)
|
|
|
|
# github/gitlab repositories should end on .git and should start with https
|
|
for repo in entry['Code repository']:
|
|
if any(repo.startswith(x) for x in ('@', '?')):
|
|
continue
|
|
repo = repo.value.split(' ')[0].strip()
|
|
if any((x in repo for x in ('github', 'gitlab', 'git.tuxfamily', 'git.savannah'))):
|
|
if not repo.startswith('https://'):
|
|
message += 'Repo "{}" should start with https://'.format(repo)
|
|
if not repo.endswith('.git'):
|
|
message += 'Repo "{}" should end on .git.'.format(repo)
|
|
|
|
# check that all platform tags are valid tags and are existing in that order
|
|
if 'Platform' in entry:
|
|
index = 0
|
|
for platform in entry['Platform']:
|
|
while index < len(c.valid_platforms) and platform != c.valid_platforms[index]:
|
|
index += 1
|
|
if index == len(c.valid_platforms): # must be valid platforms and must be in that order
|
|
message += 'Platform tag "{}" either misspelled or in wrong order'.format(platform)
|
|
|
|
# there must be at least one keyword
|
|
if not entry['Keyword']:
|
|
message += 'Need at least one keyword'
|
|
|
|
# check for existence of at least one recommended keywords
|
|
keywords = entry['Keyword']
|
|
if not any(keyword in keywords for keyword in c.recommended_keywords):
|
|
message += 'Entry contains no recommended keywords'
|
|
|
|
# languages should be known
|
|
languages = entry['Code language']
|
|
for language in languages:
|
|
if language not in c.known_languages:
|
|
message += 'Language "{}" is not a known code language. Misspelled or new?'.format(language)
|
|
|
|
# licenses should be known
|
|
licenses = entry['Code license']
|
|
for license in licenses:
|
|
if license not in c.known_licenses:
|
|
message += 'License "{}" is not a known license. Misspelled or new?'.format(license)
|
|
|
|
if message:
|
|
raise RuntimeError(message)
|
|
|
|
return entry
|
|
|
|
|
|
def is_inactive(entry):
|
|
state = entry['State']
|
|
phrase = 'inactive since '
|
|
return any(x.startswith(phrase) for x in state)
|
|
|
|
|
|
def extract_inactive_year(entry):
|
|
state = entry['State']
|
|
phrase = 'inactive since '
|
|
inactive_year = [x.value[len(phrase):] for x in state if x.startswith(phrase)]
|
|
assert len(inactive_year) <= 1
|
|
if inactive_year:
|
|
return int(inactive_year[0])
|
|
else:
|
|
return None
|
|
|
|
|
|
def write_entries(entries):
|
|
"""
|
|
|
|
:return:
|
|
"""
|
|
|
|
# iterate over all entries
|
|
for entry in entries:
|
|
write_entry(entry)
|
|
|
|
|
|
def write_entry(entry):
|
|
"""
|
|
|
|
:param entry:
|
|
:return:
|
|
"""
|
|
# TODO check entry
|
|
|
|
# get path
|
|
entry_path = os.path.join(c.entries_path, entry['File'])
|
|
|
|
# create output content
|
|
content = create_entry_content(entry)
|
|
|
|
# write entry
|
|
utils.write_text(entry_path, content)
|
|
|
|
|
|
def create_entry_content(entry):
|
|
"""
|
|
|
|
:param entry:
|
|
:return:
|
|
"""
|
|
|
|
# title
|
|
content = '# {}\n\n'.format(entry['Title'])
|
|
|
|
# we automatically sort some fields
|
|
sort_fun = lambda x: str.casefold(x.value)
|
|
for field in ('Media', 'Inspiration', 'Code Language'):
|
|
if field in entry:
|
|
values = entry[field]
|
|
entry[field] = sorted(values, key=sort_fun)
|
|
# we also sort keywords, but first the recommend ones and then other ones
|
|
keywords = entry['Keyword']
|
|
a = [x for x in keywords if x in c.recommended_keywords]
|
|
b = [x for x in keywords if x not in c.recommended_keywords]
|
|
entry['Keyword'] = sorted(a, key=sort_fun) + sorted(b, key=sort_fun)
|
|
|
|
# now all properties in the recommended order
|
|
for field in c.valid_properties:
|
|
if field in entry:
|
|
e = entry[field]
|
|
e = ['"{}"'.format(x) if any(y in x.value for y in (',', ' (')) else x for x in e]
|
|
e = [str(x) for x in e]
|
|
content += '- {}: {}\n'.format(field, ', '.join(e))
|
|
content += '\n'
|
|
|
|
# if there is a note, insert it
|
|
if 'Note' in entry:
|
|
content += entry['Note']
|
|
|
|
# building header
|
|
content += '## Building\n'
|
|
|
|
# building properties if present
|
|
has_properties = False
|
|
for field in c.valid_building_properties:
|
|
if field in entry['Building']:
|
|
if not has_properties:
|
|
has_properties = True
|
|
content += '\n'
|
|
e = entry['Building'][field]
|
|
e = ['"{}"'.format(x) if ',' in x else x for x in e]
|
|
e = [str(x) for x in e]
|
|
content += '- {}: {}\n'.format(field, ', '.join(e))
|
|
|
|
# if there is a note, insert it
|
|
if 'Note' in entry['Building']:
|
|
content += '\n'
|
|
content += entry['Building']['Note']
|
|
|
|
return content
|
|
|
|
|
|
def is_url(str):
|
|
"""
|
|
Could be too generous. See https://stackoverflow.com/questions/7160737/how-to-validate-a-url-in-python-malformed-or-not for other possibilities.
|
|
:param str:
|
|
:return:
|
|
"""
|
|
if any(str.startswith(x) for x in c.valid_url_prefixes) and not ' ' in str:
|
|
return True
|
|
return False
|
|
|
|
|
|
def all_urls(entries):
|
|
"""
|
|
Gets all urls of all entries in a dictionary (key=url value=list of entries (file name) with this url
|
|
:param entries:
|
|
:return:
|
|
"""
|
|
urls = {}
|
|
# iterate over entries
|
|
for entry in entries:
|
|
file = entry['File']
|
|
for field in c.url_fields: # TODO there are other fields, maybe just regex on the whole content
|
|
for value in entry.get(field, []):
|
|
if value.comment:
|
|
value = value.value + ' ' + value.comment
|
|
else:
|
|
value = value.value
|
|
for subvalue in value.split(' '):
|
|
subvalue = subvalue.strip()
|
|
if is_url(subvalue):
|
|
urls[subvalue] = urls.get(subvalue, []) + [file]
|
|
return urls
|
|
|
|
|
|
def git_repo(repo):
|
|
"""
|
|
Tests if a repo URL is a git repo, then returns the repo url.
|
|
"""
|
|
|
|
# everything that starts with 'git://'
|
|
if repo.startswith('git://'):
|
|
return repo
|
|
|
|
# generic (https://*.git) or (http://*.git) ending on git
|
|
if (repo.startswith('https://') or repo.startswith('http://')) and repo.endswith('.git'):
|
|
return repo
|
|
|
|
# for all others we just check if they start with the typical urls of git services
|
|
services = ['https://git.tuxfamily.org/', 'http://git.pond.sub.org/', 'https://gitorious.org/',
|
|
'https://git.code.sf.net/p/']
|
|
if any(repo.startswith(service) for service in services):
|
|
return repo
|
|
|
|
# the rest is not recognized as a git url
|
|
return None
|
|
|
|
|
|
def svn_repo(repo):
|
|
"""
|
|
Tests if a repo URL is a svn repo, then returns the repo url.
|
|
"""
|
|
|
|
# we can just go for known providers of svn
|
|
services = ('svn://', 'https://svn.code.sf.net/p/', 'http://svn.savannah.gnu.org/svn/', 'https://svn.icculus.org/', 'http://svn.icculus.org/', 'http://svn.uktrainsim.com/svn/', 'https://rpg.hamsterrepublic.com/source/wip')
|
|
if any(repo.startswith(service) for service in services):
|
|
return repo
|
|
|
|
# not svn
|
|
return None
|
|
|
|
|
|
def hg_repo(repo):
|
|
"""
|
|
Tests if a repo URL is a hg repo, then returns the repo url.
|
|
"""
|
|
if repo.startswith('https://bitbucket.org/') and not repo.endswith('.git'):
|
|
return repo
|
|
|
|
if repo.startswith('http://hg.'):
|
|
return repo
|
|
|
|
# not hg
|
|
return None |