new entries and reorganization of python scripts (added git archive)
This commit is contained in:
14
tools/git archive/archives.csv
Normal file
14
tools/git archive/archives.csv
Normal file
@ -0,0 +1,14 @@
|
||||
https://github.com/guillaume-gouchon/island
|
||||
https://github.com/FreezingMoon/AncientBeast
|
||||
https://github.com/godrin/antargis
|
||||
https://github.com/bote-team/bote
|
||||
https://github.com/Trilarion/civil
|
||||
https://github.com/SWY1985/CivOne
|
||||
https://github.com/colobot/colobot
|
||||
https://github.com/tautvilas/epoh
|
||||
https://github.com/hinogi/eternalwinterwars
|
||||
https://github.com/infidel-/cult
|
||||
https://github.com/Vakarias/farcolony
|
||||
https://github.com/freeciv/freeciv
|
||||
https://github.com/freeciv/freeciv-web
|
||||
https://github.com/freeorion/freeorion
|
|
62
tools/git archive/update.py
Normal file
62
tools/git archive/update.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""
|
||||
Clones and/or pulls all the gits listed in archives.csv
|
||||
|
||||
Requires: git executable in the path
|
||||
|
||||
Warning: This may take a long time on the first run and may need a lot of storage space!
|
||||
"""
|
||||
|
||||
import os
|
||||
import csv
|
||||
import subprocess
|
||||
|
||||
|
||||
def derive_folder_name(url):
|
||||
github = 'https://github.com/'
|
||||
if url.startswith(github):
|
||||
url = url[len(github):].split('/')
|
||||
folder = 'github.' + url[0] + '.' + url[1] + '.git'
|
||||
return folder
|
||||
|
||||
|
||||
def clone(url, folder):
|
||||
result = subprocess.run(["git", "clone", url, folder])
|
||||
if result.returncode:
|
||||
print(result)
|
||||
|
||||
|
||||
def pull():
|
||||
result = subprocess.run(["git", "pull", "--all"])
|
||||
if result.returncode:
|
||||
print(result)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# get this folder
|
||||
root_folder = os.path.realpath(os.path.dirname(__file__))
|
||||
|
||||
# read archives.csv
|
||||
archives = []
|
||||
with open('archives.csv', newline='') as f:
|
||||
reader = csv.reader(f)
|
||||
for row in reader:
|
||||
archives.append(row)
|
||||
|
||||
# loop over archives
|
||||
for archive in archives:
|
||||
url = archive[0]
|
||||
folder = os.path.join(root_folder, derive_folder_name(url))
|
||||
|
||||
# if not existing do the initial checkout
|
||||
if not os.path.isdir(folder):
|
||||
os.chdir(root_folder)
|
||||
clone(url, folder)
|
||||
|
||||
# pull all
|
||||
os.chdir(folder)
|
||||
pull()
|
||||
|
||||
|
||||
|
||||
|
470
tools/maintenance.py
Normal file
470
tools/maintenance.py
Normal file
@ -0,0 +1,470 @@
|
||||
"""
|
||||
Counts the number of records each subfolder and updates the overview. Sorts the entries in the contents files of
|
||||
each sub folder alphabetically.
|
||||
|
||||
This script runs with Python 3, it could also with Python 2 with some minor tweaks probably, but that's not important.
|
||||
|
||||
TODO get number of games with github or bitbucket repository and list those who have neither
|
||||
TODO Which C, C++ projects do not use CMake
|
||||
TODO for those games with github repositories get activity, number of open issues, number of merge requests and display in a health monitor file
|
||||
TODO search for ?? and replace with either nothing or missing information
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import urllib.request
|
||||
import http.client
|
||||
import datetime
|
||||
|
||||
def get_category_paths():
|
||||
"""
|
||||
Returns all sub folders of the games path.
|
||||
"""
|
||||
return [os.path.join(games_path, x) for x in os.listdir(games_path) if os.path.isdir(os.path.join(games_path, x))]
|
||||
|
||||
def get_entry_paths(category_path):
|
||||
"""
|
||||
Returns all files of a category path, except for '_toc.md'.
|
||||
"""
|
||||
return [os.path.join(category_path, x) for x in os.listdir(category_path) if x != '_toc.md' and os.path.isfile(os.path.join(category_path, x))]
|
||||
|
||||
def read_first_line_from_file(file):
|
||||
"""
|
||||
Convenience function because we only need the first line of a category overview really.
|
||||
"""
|
||||
with open(file, 'r') as f:
|
||||
line = f.readline()
|
||||
return line
|
||||
|
||||
def read_interesting_info_from_file(file):
|
||||
"""
|
||||
Parses a file for some interesting fields and concatenates the content. To be displayed after the game name in the
|
||||
category overview.
|
||||
"""
|
||||
with open(file, 'r') as f:
|
||||
text = f.read()
|
||||
|
||||
output = [None, None, None]
|
||||
|
||||
# language
|
||||
regex = re.compile(r"- Language\(s\): (.*)")
|
||||
matches = regex.findall(text)
|
||||
if matches:
|
||||
output[0] = matches[0]
|
||||
|
||||
# license
|
||||
regex = re.compile(r"- License: (.*)")
|
||||
matches = regex.findall(text)
|
||||
if matches:
|
||||
output[1] = matches[0]
|
||||
|
||||
# state
|
||||
regex = re.compile(r"- State: (.*)")
|
||||
matches = regex.findall(text)
|
||||
if matches:
|
||||
output[2] = matches[0]
|
||||
|
||||
output = [x for x in output if x] # eliminate empty entries
|
||||
|
||||
output = ", ".join(output)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def update_readme():
|
||||
"""
|
||||
Recounts entries in sub categories and writes them to the readme. Needs to be performed regularly.
|
||||
"""
|
||||
print('update readme file')
|
||||
|
||||
# read readme
|
||||
with open(readme_path) as f:
|
||||
readme_text = f.read()
|
||||
|
||||
# compile regex for identifying the building blocks
|
||||
regex = re.compile(r"(# Open Source Games\n\n)(.*)(\nA collection.*)", re.DOTALL)
|
||||
|
||||
# apply regex
|
||||
matches = regex.findall(readme_text)
|
||||
matches = matches[0]
|
||||
start = matches[0]
|
||||
end = matches[2]
|
||||
|
||||
# get sub folders
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# get number of files (minus 1) in each sub folder
|
||||
n = [len(os.listdir(path)) - 1 for path in category_paths]
|
||||
|
||||
# assemble paths
|
||||
paths = [os.path.join(path, '_toc.md') for path in category_paths]
|
||||
|
||||
# get titles (discarding first two ("# ") and last ("\n") characters)
|
||||
titles = [read_first_line_from_file(path)[2:-1] for path in paths]
|
||||
|
||||
# combine titles, category names, numbers in one list
|
||||
info = zip(titles, [os.path.basename(path) for path in category_paths], n)
|
||||
|
||||
# sort according to sub category title (should be unique)
|
||||
info = sorted(info, key=lambda x:x[0])
|
||||
|
||||
# assemble output
|
||||
update = ['- **[{}](games/{}/_toc.md)** ({})\n'.format(*entry) for entry in info]
|
||||
update = "{} entries\n".format(sum(n)) + "".join(update)
|
||||
|
||||
# insert new text in the middle
|
||||
text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + update + "\n[comment]: # (end of autogenerated content)" + end
|
||||
|
||||
# write to readme
|
||||
with open(readme_path, 'w') as f:
|
||||
f.write(text)
|
||||
|
||||
def update_category_tocs():
|
||||
"""
|
||||
Lists all entries in all sub folders and generates the list in the toc file. Needs to be performed regularly.
|
||||
"""
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# for each category
|
||||
for category_path in category_paths:
|
||||
print('generate toc for {}'.format(os.path.basename(category_path)))
|
||||
|
||||
# read toc header line
|
||||
toc_file = os.path.join(category_path, '_toc.md')
|
||||
toc_header = read_first_line_from_file(toc_file)
|
||||
|
||||
# get paths of all entries in this category
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
|
||||
# get titles (discarding first two ("# ") and last ("\n") characters)
|
||||
titles = [read_first_line_from_file(path)[2:-1] for path in entry_paths]
|
||||
|
||||
# get more interesting info
|
||||
more = [read_interesting_info_from_file(path) for path in entry_paths]
|
||||
|
||||
# combine name and file name
|
||||
info = zip(titles, [os.path.basename(path) for path in entry_paths], more)
|
||||
|
||||
# sort according to entry title (should be unique)
|
||||
info = sorted(info, key=lambda x:x[0])
|
||||
|
||||
# assemble output
|
||||
update = ['- **[{}]({})** ({})\n'.format(*entry) for entry in info]
|
||||
update = "".join(update)
|
||||
|
||||
# combine toc header
|
||||
text = toc_header + '\n' + "[comment]: # (start of autogenerated content, do not edit)\n" + update + "\n[comment]: # (end of autogenerated content)"
|
||||
|
||||
# write to toc file
|
||||
with open(toc_file, 'w') as f:
|
||||
f.write(text)
|
||||
|
||||
def check_validity_external_links():
|
||||
"""
|
||||
Checks all external links it can find for validity. Prints those with non OK HTTP responses. Does only need to be run
|
||||
from time to time.
|
||||
"""
|
||||
# regex for finding urls (can be in <> or in () or a whitespace
|
||||
regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n]+)")
|
||||
|
||||
# count
|
||||
number_checked_links = 0
|
||||
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# for each category
|
||||
for category_path in category_paths:
|
||||
print('check links for {}'.format(os.path.basename(category_path)))
|
||||
|
||||
# get entry paths
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
|
||||
# for each entry
|
||||
for entry_path in entry_paths:
|
||||
# read entry
|
||||
with open(entry_path, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# apply regex
|
||||
matches = regex.findall(content)
|
||||
|
||||
# for each match
|
||||
for match in matches:
|
||||
|
||||
# for each possible clause
|
||||
for url in match:
|
||||
|
||||
# if there was something
|
||||
if url:
|
||||
try:
|
||||
# without a special headers, frequent 403 responses occur
|
||||
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'})
|
||||
urllib.request.urlopen(req)
|
||||
except urllib.error.HTTPError as e:
|
||||
print("{}: {} - {}".format(os.path.basename(entry_path), url, e.code))
|
||||
except http.client.RemoteDisconnected:
|
||||
print("{}: {} - disconnected without response".format(os.path.basename(entry_path), url))
|
||||
|
||||
number_checked_links += 1
|
||||
|
||||
if number_checked_links % 50 == 0:
|
||||
print("{} links checked".format(number_checked_links))
|
||||
|
||||
print("{} links checked".format(number_checked_links))
|
||||
|
||||
def fix_notation():
|
||||
"""
|
||||
Changes notation, quite special. Only run when needed.
|
||||
"""
|
||||
regex = re.compile(r"- License details:(.*)")
|
||||
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# for each category
|
||||
for category_path in category_paths:
|
||||
# get paths of all entries in this category
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
|
||||
for entry_path in entry_paths:
|
||||
# read it line by line
|
||||
with open(entry_path) as f:
|
||||
content = f.readlines()
|
||||
|
||||
# apply regex on every line
|
||||
matched_lines = [regex.findall(line) for line in content]
|
||||
|
||||
# loop over all the lines
|
||||
for line, match in enumerate(matched_lines):
|
||||
if match:
|
||||
match = match[0]
|
||||
|
||||
# patch content
|
||||
content[line] = "- Code license details:{}\n".format(match)
|
||||
|
||||
# write it line by line
|
||||
with open(entry_path, "w") as f:
|
||||
f.writelines(content)
|
||||
|
||||
def regular_replacements():
|
||||
"""
|
||||
Replacing some stuff by shortcuts. Can be run regularly
|
||||
"""
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# for each category
|
||||
for category_path in category_paths:
|
||||
# get paths of all entries in this category
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
|
||||
for entry_path in entry_paths:
|
||||
# read it line by line
|
||||
with open(entry_path) as f:
|
||||
content = f.read()
|
||||
|
||||
# now the replacements
|
||||
content = content.replace('?source=navbar', '') # sourceforge specific
|
||||
content = content.replace('single player', 'SP')
|
||||
content = content.replace('multi player', 'MP')
|
||||
|
||||
# write it line by line
|
||||
with open(entry_path, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
def check_template_leftovers():
|
||||
"""
|
||||
Checks for template leftovers.
|
||||
"""
|
||||
check_strings = ['# {NAME}', '_{One line description}_', '- Home: {URL}', '- Media: {URL}', '- Download: {URL}', '- State: beta, mature, inactive since', '- Keywords: SP, MP, RTS, TBS (if none, remove the line)', '- Code: primary repository (type if not git), other repositories (type if not git)', '- Language(s): {XX}', '- License: {XX} (if special, include link)', '{XXX}']
|
||||
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# for each category
|
||||
for category_path in category_paths:
|
||||
# get paths of all entries in this category
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
|
||||
for entry_path in entry_paths:
|
||||
# read it line by line
|
||||
with open(entry_path) as f:
|
||||
content = f.read()
|
||||
|
||||
for check_string in check_strings:
|
||||
if content.find(check_string) >= 0:
|
||||
print('{}: found {}'.format(os.path.basename(entry_path), check_string))
|
||||
|
||||
def parse_entry(content):
|
||||
"""
|
||||
Returns a dictionary of the features of the content
|
||||
"""
|
||||
|
||||
info = {}
|
||||
|
||||
# state
|
||||
regex = re.compile(r"- State: (.*)")
|
||||
matches = regex.findall(content)
|
||||
if matches:
|
||||
# first remove everything in parenthesis
|
||||
states = re.sub(r'\([^)]*\)', '', matches[0])
|
||||
states = states.split(',')
|
||||
states = [x.strip() for x in states]
|
||||
if 'beta' in states:
|
||||
info['state'] = 'beta'
|
||||
elif 'mature' in states:
|
||||
info['state'] = 'mature'
|
||||
else:
|
||||
print('Neither beta nor mature in state tag: {}'.format(content))
|
||||
inactive = next((int(x[14:]) for x in states if x.startswith('inactive since')), None) # only the year
|
||||
if inactive:
|
||||
info['inactive'] = inactive
|
||||
|
||||
# language
|
||||
regex = re.compile(r"- Language\(s\): (.*)")
|
||||
matches = regex.findall(content)
|
||||
if matches:
|
||||
# first remove everything in parenthesis
|
||||
languages = re.sub(r'\([^)]*\)', '', matches[0])
|
||||
languages = languages.split(',')
|
||||
languages = [x.strip() for x in languages]
|
||||
info['language'] = languages
|
||||
|
||||
# license
|
||||
regex = re.compile(r"- Code license: (.*)")
|
||||
matches = regex.findall(content)
|
||||
if matches:
|
||||
# first remove everything in parenthesis
|
||||
license = re.sub(r'\([^)]*\)', '', matches[0])
|
||||
info['license'] = license
|
||||
|
||||
return info
|
||||
|
||||
|
||||
def generate_statistics():
|
||||
"""
|
||||
|
||||
"""
|
||||
statistics_path = os.path.join(games_path, 'statistics.md')
|
||||
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
|
||||
|
||||
# get category paths
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# for each category
|
||||
infos = []
|
||||
for category_path in category_paths:
|
||||
# get paths of all entries in this category
|
||||
entry_paths = get_entry_paths(category_path)
|
||||
|
||||
for entry_path in entry_paths:
|
||||
# read it line by line
|
||||
with open(entry_path) as f:
|
||||
content = f.read()
|
||||
|
||||
info = parse_entry(content)
|
||||
info['file'] = os.path.basename(entry_path)[:-3] # [:-3] to cut off the .md
|
||||
infos.append(info)
|
||||
|
||||
# total number
|
||||
number_entries = len(infos)
|
||||
rel = lambda x: x / number_entries * 100 # converion to percent
|
||||
statistics += 'analyzed {} entries on {}\n\n'.format(number_entries, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# State (beta, mature, inactive)
|
||||
statistics += '## State\n\n'
|
||||
|
||||
number_state_beta = sum(1 for x in infos if 'state' in x and x['state'] == 'beta')
|
||||
number_state_mature = sum(1 for x in infos if 'state' in x and x['state'] == 'mature')
|
||||
number_inactive = sum(1 for x in infos if 'inactive' in x)
|
||||
statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive))
|
||||
|
||||
if number_inactive > 0:
|
||||
entries_inactive = [(x['file'], x['inactive']) for x in infos if 'inactive' in x]
|
||||
entries_inactive.sort(key=lambda x: x[0]) # first sort by name
|
||||
entries_inactive.sort(key=lambda x: -x[1]) # then sort by inactive year (more recently first)
|
||||
entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
|
||||
statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n'
|
||||
|
||||
entries_no_state = [x['file'] for x in infos if 'state' not in x]
|
||||
if entries_no_state:
|
||||
entries_no_state.sort()
|
||||
statistics += '##### Without state tag ({})\n\n'.format(len(entries_no_state)) + ', '.join(entries_no_state) + '\n\n'
|
||||
|
||||
# Language
|
||||
statistics += '## Languages\n\n'
|
||||
number_no_language = sum(1 for x in infos if 'language' not in x)
|
||||
if number_no_language > 0:
|
||||
statistics += 'Without language tag: {} ({:.1f}%)\n\n'.format(number_no_language, rel(number_no_language))
|
||||
entries_no_language = [x['file'] for x in infos if 'language' not in x]
|
||||
entries_no_language.sort()
|
||||
statistics += ', '.join(entries_no_language) + '\n\n'
|
||||
|
||||
# get all languages together
|
||||
languages = []
|
||||
for info in infos:
|
||||
if 'language' in info:
|
||||
languages.extend(info['language'])
|
||||
|
||||
unique_languages = set(languages)
|
||||
unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages]
|
||||
unique_languages.sort(key=lambda x: x[0]) # first sort by name
|
||||
unique_languages.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
||||
unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_languages]
|
||||
statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n'
|
||||
|
||||
# Licenses
|
||||
statistics += '## Code licenses\n\n'
|
||||
number_no_license = sum(1 for x in infos if 'license' not in x)
|
||||
if number_no_license > 0:
|
||||
statistics += 'Without license tag: {} ({:.1f}%)\n\n'.format(number_no_license, rel(number_no_license))
|
||||
entries_no_license = [x['file'] for x in infos if 'license' not in x]
|
||||
entries_no_license.sort()
|
||||
statistics += ', '.join(entries_no_license) + '\n\n'
|
||||
|
||||
# get all licenses together
|
||||
licenses = []
|
||||
for info in infos:
|
||||
if 'license' in info:
|
||||
licenses.append(info['license'])
|
||||
|
||||
unique_licenses = set(licenses)
|
||||
unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses]
|
||||
unique_licenses.sort(key=lambda x: x[0]) # first sort by name
|
||||
unique_licenses.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first)
|
||||
unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1]*100) for x in unique_licenses]
|
||||
statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n'
|
||||
|
||||
with open(statistics_path, 'w') as f:
|
||||
f.write(statistics)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# paths
|
||||
games_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'games'))
|
||||
readme_path = os.path.join(games_path, os.pardir, 'README.md')
|
||||
|
||||
# recount and write to readme
|
||||
update_readme()
|
||||
|
||||
# generate list in toc files
|
||||
update_category_tocs()
|
||||
|
||||
# generate report
|
||||
generate_statistics()
|
||||
|
||||
# check for unfilled template lines
|
||||
# check_template_leftovers()
|
||||
|
||||
# check external links (only rarely)
|
||||
#check_validity_external_links()
|
||||
|
||||
# special, only run when needed
|
||||
# fix_notation()
|
||||
|
||||
# regular replacements
|
||||
#regular_replacements()
|
Reference in New Issue
Block a user