rename fields to improve understanding, adapting maintenance.py (intermediate)
This commit is contained in:
@ -2,8 +2,8 @@
|
||||
Runs a series of maintenance operations on the collection of entry files, updating the table of content files for
|
||||
each category as well as creating a statistics file.
|
||||
|
||||
Counts the number of records each subfolder and updates the overview. Sorts the entries in the contents files of
|
||||
each sub folder alphabetically.
|
||||
Counts the number of records each sub-folder and updates the overview.
|
||||
Sorts the entries in the contents files of each sub folder alphabetically.
|
||||
|
||||
This script runs with Python 3, it could also with Python 2 with some minor tweaks probably.
|
||||
"""
|
||||
@ -14,19 +14,19 @@ import urllib.request
|
||||
import http.client
|
||||
import datetime
|
||||
|
||||
def get_category_paths():
|
||||
"""
|
||||
Returns all sub folders of the games path.
|
||||
"""
|
||||
return [os.path.join(games_path, x) for x in os.listdir(games_path) if os.path.isdir(os.path.join(games_path, x))]
|
||||
TOC = '_toc.md'
|
||||
|
||||
def get_entry_paths(category_path):
|
||||
"""
|
||||
Returns all files of a category path, except for '_toc.md'.
|
||||
"""
|
||||
return [os.path.join(category_path, x) for x in os.listdir(category_path) if x != '_toc.md' and os.path.isfile(os.path.join(category_path, x))]
|
||||
|
||||
def read_first_line_from_file(file):
|
||||
def read_text(file):
|
||||
"""
|
||||
Reads a whole text file (UTF-8 encoded).
|
||||
"""
|
||||
with open(file, mode='r', encoding='utf-8') as f:
|
||||
text = f.read()
|
||||
return text
|
||||
|
||||
|
||||
def read_first_line(file):
|
||||
"""
|
||||
Convenience function because we only need the first line of a category overview really.
|
||||
"""
|
||||
@ -34,16 +34,42 @@ def read_first_line_from_file(file):
|
||||
line = f.readline()
|
||||
return line
|
||||
|
||||
def read_interesting_info_from_file(file):
|
||||
|
||||
def write_text(file, text):
|
||||
"""
|
||||
Parses a file for some interesting fields and concatenates the content. To be displayed after the game name in the
|
||||
category overview.
|
||||
Writes a whole text file (UTF-8 encoded).
|
||||
"""
|
||||
with open(file, mode='w', encoding='utf-8') as f:
|
||||
f.write(text)
|
||||
|
||||
|
||||
def get_category_paths():
|
||||
"""
|
||||
Returns all sub folders of the games path.
|
||||
"""
|
||||
return [os.path.join(games_path, x) for x in os.listdir(games_path) if os.path.isdir(os.path.join(games_path, x))]
|
||||
|
||||
|
||||
def get_entry_paths(category_path):
|
||||
"""
|
||||
Returns all files of a category path, except for '_toc.md'.
|
||||
"""
|
||||
return [os.path.join(category_path, x) for x in os.listdir(category_path) if x != TOC and os.path.isfile(os.path.join(category_path, x))]
|
||||
|
||||
|
||||
def extract_overview_for_toc(file):
|
||||
"""
|
||||
Parses a file for some interesting fields and concatenates the content.
|
||||
|
||||
To be displayed after the game name in the category TOCs.
|
||||
"""
|
||||
with open(file, mode='r', encoding='utf-8') as f:
|
||||
text = f.read()
|
||||
|
||||
output = [None, None, None]
|
||||
|
||||
# TODO unify this with the statistics
|
||||
|
||||
# language
|
||||
regex = re.compile(r"- Language\(s\): (.*)")
|
||||
matches = regex.findall(text)
|
||||
@ -71,19 +97,25 @@ def read_interesting_info_from_file(file):
|
||||
|
||||
def update_readme():
|
||||
"""
|
||||
Recounts entries in sub categories and writes them to the readme. Needs to be performed regularly.
|
||||
Recounts entries in sub categories and writes them to the readme.
|
||||
Also updates the _toc files in the categories directories.
|
||||
|
||||
Note: The Readme must have a specific structure at the beginning, starting with "# Open Source Games" and ending
|
||||
on "A collection.."
|
||||
|
||||
Needs to be performed regularly.
|
||||
"""
|
||||
print('update readme file')
|
||||
|
||||
# read readme
|
||||
with open(readme_path, mode='r', encoding='utf-8') as f:
|
||||
readme_text = f.read()
|
||||
readme_text = read_text(readme_file)
|
||||
|
||||
# compile regex for identifying the building blocks
|
||||
regex = re.compile(r"(# Open Source Games\n\n)(.*)(\nA collection.*)", re.DOTALL)
|
||||
|
||||
# apply regex
|
||||
matches = regex.findall(readme_text)
|
||||
assert len(matches) == 1
|
||||
matches = matches[0]
|
||||
start = matches[0]
|
||||
end = matches[2]
|
||||
@ -91,35 +123,37 @@ def update_readme():
|
||||
# get sub folders
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# get number of files (minus 1) in each sub folder
|
||||
n = [len(os.listdir(path)) - 1 for path in category_paths]
|
||||
|
||||
# assemble paths
|
||||
paths = [os.path.join(path, '_toc.md') for path in category_paths]
|
||||
toc_paths = [os.path.join(path, TOC) for path in category_paths]
|
||||
|
||||
# get titles (discarding first two ("# ") and last ("\n") characters)
|
||||
titles = [read_first_line_from_file(path)[2:-1] for path in paths]
|
||||
category_titles = [read_first_line(path)[2:-1] for path in toc_paths]
|
||||
|
||||
# get number of files (minus 1 for the already existing TOC file) in each sub folder
|
||||
n_entries = [len(os.listdir(path)) - 1 for path in category_paths]
|
||||
|
||||
# combine titles, category names, numbers in one list
|
||||
info = zip(titles, [os.path.basename(path) for path in category_paths], n)
|
||||
info = zip(category_titles, [os.path.basename(path) for path in category_paths], n_entries)
|
||||
|
||||
# sort according to sub category title (should be unique)
|
||||
info = sorted(info, key=lambda x:x[0])
|
||||
|
||||
# assemble output
|
||||
update = ['- **[{}](games/{}/_toc.md)** ({})\n'.format(*entry) for entry in info]
|
||||
update = "{} entries\n".format(sum(n)) + "".join(update)
|
||||
update = ['- **[{}](games/{}/{})** ({})\n'.format(entry[0], entry[1], TOC, entry[2]) for entry in info]
|
||||
update = "{} entries\n".format(sum(n_entries)) + "".join(update)
|
||||
|
||||
# insert new text in the middle
|
||||
text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + update + "\n[comment]: # (end of autogenerated content)" + end
|
||||
text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + update + "[comment]: # (end of autogenerated content)" + end
|
||||
|
||||
# write to readme
|
||||
with open(readme_path, mode='w', encoding='utf-8') as f:
|
||||
f.write(text)
|
||||
write_text(readme_file, text)
|
||||
|
||||
|
||||
def update_category_tocs():
|
||||
"""
|
||||
Lists all entries in all sub folders and generates the list in the toc file. Needs to be performed regularly.
|
||||
Lists all entries in all sub folders and generates the list in the toc file.
|
||||
|
||||
Needs to be performed regularly.
|
||||
"""
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
@ -129,19 +163,19 @@ def update_category_tocs():
|
||||
print('generate toc for {}'.format(os.path.basename(category_path)))
|
||||
|
||||
# read toc header line
|
||||
toc_file = os.path.join(category_path, '_toc.md')
|
||||
toc_header = read_first_line_from_file(toc_file)
|
||||
toc_file = os.path.join(category_path, TOC)
|
||||
toc_header = read_first_line(toc_file) # stays as is
|
||||
|
||||
# get paths of all entries in this category
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
|
||||
# get titles (discarding first two ("# ") and last ("\n") characters)
|
||||
titles = [read_first_line_from_file(path)[2:-1] for path in entry_paths]
|
||||
titles = [read_first_line(path)[2:-1] for path in entry_paths]
|
||||
|
||||
# get more interesting info
|
||||
more = [read_interesting_info_from_file(path) for path in entry_paths]
|
||||
more = [extract_overview_for_toc(path) for path in entry_paths]
|
||||
|
||||
# combine name and file name
|
||||
# combine name, file name and more info
|
||||
info = zip(titles, [os.path.basename(path) for path in entry_paths], more)
|
||||
|
||||
# sort according to entry title (should be unique)
|
||||
@ -151,13 +185,14 @@ def update_category_tocs():
|
||||
update = ['- **[{}]({})** ({})\n'.format(*entry) for entry in info]
|
||||
update = "".join(update)
|
||||
|
||||
# combine toc header
|
||||
text = toc_header + '\n' + "[comment]: # (start of autogenerated content, do not edit)\n" + update + "\n[comment]: # (end of autogenerated content)"
|
||||
# combine with toc header
|
||||
text = toc_header + '\n' + "[comment]: # (start of autogenerated content, do not edit)\n" + update + "[comment]: # (end of autogenerated content)"
|
||||
|
||||
# write to toc file
|
||||
with open(toc_file, mode='w', encoding='utf-8') as f:
|
||||
f.write(text)
|
||||
|
||||
|
||||
def check_validity_external_links():
|
||||
"""
|
||||
Checks all external links it can find for validity. Prints those with non OK HTTP responses. Does only need to be run
|
||||
@ -212,71 +247,18 @@ def check_validity_external_links():
|
||||
|
||||
print("{} links checked".format(number_checked_links))
|
||||
|
||||
def fix_notation():
|
||||
"""
|
||||
Changes notation, quite special. Only run when needed.
|
||||
"""
|
||||
regex = re.compile(r"- License details:(.*)")
|
||||
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# for each category
|
||||
for category_path in category_paths:
|
||||
# get paths of all entries in this category
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
|
||||
for entry_path in entry_paths:
|
||||
# read it line by line
|
||||
with open(entry_path, 'r', 'utf-8') as f:
|
||||
content = f.readlines()
|
||||
|
||||
# apply regex on every line
|
||||
matched_lines = [regex.findall(line) for line in content]
|
||||
|
||||
# loop over all the lines
|
||||
for line, match in enumerate(matched_lines):
|
||||
if match:
|
||||
match = match[0]
|
||||
|
||||
# patch content
|
||||
content[line] = "- Code license details:{}\n".format(match)
|
||||
|
||||
# write it line by line
|
||||
with open(entry_path, "w", 'utf-8') as f:
|
||||
f.writelines(content)
|
||||
|
||||
def regular_replacements():
|
||||
"""
|
||||
Replacing some stuff by shortcuts. Can be run regularly
|
||||
"""
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# for each category
|
||||
for category_path in category_paths:
|
||||
# get paths of all entries in this category
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
|
||||
for entry_path in entry_paths:
|
||||
# read it line by line
|
||||
with open(entry_path, 'r', 'utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# now the replacements
|
||||
content = content.replace('?source=navbar', '') # sourceforge specific
|
||||
content = content.replace('single player', 'SP')
|
||||
content = content.replace('multi player', 'MP')
|
||||
|
||||
# write it line by line
|
||||
with open(entry_path, "w", 'utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
def check_template_leftovers():
|
||||
"""
|
||||
Checks for template leftovers.
|
||||
|
||||
Should be run only occasionally.
|
||||
"""
|
||||
check_strings = ['# {NAME}', '_{One line description}_', '- Home: {URL}', '- Media: {URL}', '- Download: {URL}', '- State: beta, mature, inactive since', '- Keywords: SP, MP, RTS, TBS (if none, remove the line)', '- Code: primary repository (type if not git), other repositories (type if not git)', '- Language(s): {XX}', '- License: {XX} (if special, include link)', '{XXX}']
|
||||
|
||||
# load template and get all lines
|
||||
text = read_text(os.path.join(games_path, 'template.md'))
|
||||
text = text.split('\n')
|
||||
check_strings = [x for x in text if x and not x.startswith('##')]
|
||||
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
@ -288,13 +270,13 @@ def check_template_leftovers():
|
||||
|
||||
for entry_path in entry_paths:
|
||||
# read it line by line
|
||||
with open(entry_path, 'r', 'utf-8') as f:
|
||||
content = f.read()
|
||||
content = read_text(entry_path)
|
||||
|
||||
for check_string in check_strings:
|
||||
if content.find(check_string) >= 0:
|
||||
print('{}: found {}'.format(os.path.basename(entry_path), check_string))
|
||||
|
||||
|
||||
def parse_entry(content):
|
||||
"""
|
||||
Returns a dictionary of the features of the content
|
||||
@ -302,48 +284,67 @@ def parse_entry(content):
|
||||
|
||||
info = {}
|
||||
|
||||
# state
|
||||
regex = re.compile(r"- State: (.*)")
|
||||
# read title
|
||||
regex = re.compile(r"^# (.*)")
|
||||
matches = regex.findall(content)
|
||||
if matches:
|
||||
# first remove everything in parenthesis
|
||||
states = re.sub(r'\([^)]*\)', '', matches[0])
|
||||
states = states.split(',')
|
||||
states = [x.strip() for x in states]
|
||||
if 'beta' in states:
|
||||
info['state'] = 'beta'
|
||||
elif 'mature' in states:
|
||||
info['state'] = 'mature'
|
||||
else:
|
||||
print('Neither beta nor mature in state tag: {}'.format(content))
|
||||
inactive = next((int(x[14:]) for x in states if x.startswith('inactive since')), None) # only the year
|
||||
if inactive:
|
||||
info['inactive'] = inactive
|
||||
assert len(matches) == 1
|
||||
info['title'] = matches[0]
|
||||
|
||||
# language
|
||||
regex = re.compile(r"- Language\(s\): (.*)")
|
||||
matches = regex.findall(content)
|
||||
if matches:
|
||||
# first remove everything in parenthesis
|
||||
languages = re.sub(r'\([^)]*\)', '', matches[0])
|
||||
languages = languages.split(',')
|
||||
languages = [x.strip() for x in languages]
|
||||
info['language'] = languages
|
||||
# first read all field names
|
||||
regex = re.compile(r"- (.*): ")
|
||||
fields = regex.findall(content)
|
||||
|
||||
# license
|
||||
regex = re.compile(r"- Code license: (.*)")
|
||||
matches = regex.findall(content)
|
||||
if matches:
|
||||
# first remove everything in parenthesis
|
||||
license = re.sub(r'\([^)]*\)', '', matches[0])
|
||||
info['license'] = license
|
||||
# iterate over found field
|
||||
for field in fields:
|
||||
regex = re.compile(r"- {}: (.*)".format(field))
|
||||
matches = regex.findall(content)
|
||||
assert len(matches) == 1 # every field should only be present once
|
||||
v = matches[0]
|
||||
|
||||
# first store as is
|
||||
info[field.lower()+'-raw'] = v
|
||||
|
||||
# remove parenthesis
|
||||
v = re.sub(r'\([^)]*\)', '', v)
|
||||
|
||||
# split on ','
|
||||
v = v.split(',')
|
||||
|
||||
# finally strip
|
||||
v = [x.strip() for x in v]
|
||||
|
||||
# store in info
|
||||
info[field.lower()] = v
|
||||
|
||||
# checks
|
||||
|
||||
# essential fields
|
||||
essential_fields = ['home', 'state']
|
||||
for field in essential_fields:
|
||||
if field not in info:
|
||||
print('Essential field "{}" missing in entry {}'.format(field, info['title']))
|
||||
return {}
|
||||
|
||||
# state must contain either beta or mature but not both
|
||||
v = info['state']
|
||||
if 'beta' in v != 'mature' in v:
|
||||
printf('State must be one of <beta, mature> in entry {}'.format(info['title']))
|
||||
return {}
|
||||
|
||||
# extract inactive
|
||||
phrase = 'inactive since '
|
||||
inactive_year = [x[len(phrase):] for x in info['state'] if x.startswith(phrase)]
|
||||
if inactive_year:
|
||||
info['inactive'] = inactive_year
|
||||
|
||||
return info
|
||||
|
||||
|
||||
def generate_statistics():
|
||||
"""
|
||||
Generates the statistics page.
|
||||
|
||||
Should be done everytime the entries change.
|
||||
"""
|
||||
statistics_path = os.path.join(games_path, 'statistics.md')
|
||||
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
|
||||
@ -351,31 +352,37 @@ def generate_statistics():
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# for each category
|
||||
# a database of all important infos about the entries
|
||||
infos = []
|
||||
|
||||
# for each category
|
||||
for category_path in category_paths:
|
||||
# get paths of all entries in this category
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
|
||||
for entry_path in entry_paths:
|
||||
# read it line by line
|
||||
with open(entry_path, mode='r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
# read entry
|
||||
content = read_text(entry_path)
|
||||
|
||||
# parse entry
|
||||
info = parse_entry(content)
|
||||
|
||||
# add file information
|
||||
info['file'] = os.path.basename(entry_path)[:-3] # [:-3] to cut off the .md
|
||||
|
||||
# add to list
|
||||
infos.append(info)
|
||||
|
||||
# total number
|
||||
number_entries = len(infos)
|
||||
rel = lambda x: x / number_entries * 100 # converion to percent
|
||||
rel = lambda x: x / number_entries * 100 # conversion to percent
|
||||
statistics += 'analyzed {} entries on {}\n\n'.format(number_entries, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# State (beta, mature, inactive)
|
||||
statistics += '## State\n\n'
|
||||
|
||||
number_state_beta = sum(1 for x in infos if 'state' in x and x['state'] == 'beta')
|
||||
number_state_mature = sum(1 for x in infos if 'state' in x and x['state'] == 'mature')
|
||||
number_state_beta = sum(1 for x in infos if 'beta' in x['state'])
|
||||
number_state_mature = sum(1 for x in infos if 'mature' in x['state'])
|
||||
number_inactive = sum(1 for x in infos if 'inactive' in x)
|
||||
statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive))
|
||||
|
||||
@ -439,18 +446,17 @@ def generate_statistics():
|
||||
f.write(statistics)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# paths
|
||||
games_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'games'))
|
||||
readme_path = os.path.join(games_path, os.pardir, 'README.md')
|
||||
readme_file = os.path.realpath(os.path.join(games_path, os.pardir, 'README.md'))
|
||||
|
||||
# recount and write to readme
|
||||
update_readme()
|
||||
# update_readme()
|
||||
|
||||
# generate list in toc files
|
||||
update_category_tocs()
|
||||
# update_category_tocs()
|
||||
|
||||
# generate report
|
||||
generate_statistics()
|
||||
@ -459,10 +465,4 @@ if __name__ == "__main__":
|
||||
# check_template_leftovers()
|
||||
|
||||
# check external links (only rarely)
|
||||
#check_validity_external_links()
|
||||
|
||||
# special, only run when needed
|
||||
# fix_notation()
|
||||
|
||||
# regular replacements
|
||||
#regular_replacements()
|
||||
# check_validity_external_links()
|
||||
|
Reference in New Issue
Block a user