some fixes of entries, datatable html and git archive python code

This commit is contained in:
Trilarion
2018-06-11 09:44:45 +02:00
parent 8220365691
commit 81228e509b
19 changed files with 149 additions and 120 deletions

View File

@ -1,13 +0,0 @@
https://github.com/FreezingMoon/AncientBeast
https://github.com/godrin/antargis
https://github.com/bote-team/bote
https://github.com/Trilarion/civil
https://github.com/SWY1985/CivOne
https://github.com/colobot/colobot
https://github.com/tautvilas/epoh
https://github.com/hinogi/eternalwinterwars
https://github.com/infidel-/cult
https://github.com/Vakarias/farcolony
https://github.com/freeciv/freeciv
https://github.com/freeciv/freeciv-web
https://github.com/freeorion/freeorion
1 https://github.com/FreezingMoon/AncientBeast
2 https://github.com/godrin/antargis
3 https://github.com/bote-team/bote
4 https://github.com/Trilarion/civil
5 https://github.com/SWY1985/CivOne
6 https://github.com/colobot/colobot
7 https://github.com/tautvilas/epoh
8 https://github.com/hinogi/eternalwinterwars
9 https://github.com/infidel-/cult
10 https://github.com/Vakarias/farcolony
11 https://github.com/freeciv/freeciv
12 https://github.com/freeciv/freeciv-web
13 https://github.com/freeorion/freeorion

View File

@ -0,0 +1,5 @@
Clones and/or pulls many git repositories from the open source games entries, so that one has an archive of them.
Currently requires at least 35 GB space!
Run update.py to update the archive. Git URLs are stored in archives.json.

File diff suppressed because one or more lines are too long

View File

@ -7,15 +7,25 @@
"""
import os
import csv
import json
import subprocess
def read_text(file):
"""
Reads a whole text file (UTF-8 encoded).
"""
with open(file, mode='r', encoding='utf-8') as f:
text = f.read()
return text
def derive_folder_name(url):
github = 'https://github.com/'
if url.startswith(github):
url = url[len(github):].split('/')
folder = 'github.' + url[0] + '.' + url[1] + '.git'
parts = url[len(github):].split('/')
parts.insert(0, 'github')
folder = '.'.join(parts)
return folder
@ -36,22 +46,18 @@ if __name__ == '__main__':
# get this folder
root_folder = os.path.realpath(os.path.dirname(__file__))
# read archives.csv
archives = []
with open('archives.csv', newline='') as f:
reader = csv.reader(f)
for row in reader:
archives.append(row)
# read archives.json
text = read_text(os.path.join(root_folder, 'archives.json'))
archives = json.loads(text)
# loop over archives
for archive in archives:
url = archive[0]
folder = os.path.join(root_folder, derive_folder_name(url))
folder = os.path.join(root_folder, derive_folder_name(archive))
# if not existing do the initial checkout
if not os.path.isdir(folder):
os.chdir(root_folder)
clone(url, folder)
clone(archive, folder)
# pull all
os.chdir(folder)

View File

@ -64,10 +64,7 @@ def extract_overview_for_toc(file):
To be displayed after the game name in the category TOCs.
"""
with open(file, mode='r', encoding='utf-8') as f:
text = f.read()
info = parse_entry(text)
info = infos[file]
output = []
@ -301,9 +298,12 @@ def parse_entry(content):
# split on ','
v = v.split(',')
# finally strip
# strip
v = [x.strip() for x in v]
# remove all being false (empty)
v = [x for x in v if x]
# store in info
info[field.lower()] = v
@ -319,9 +319,15 @@ def parse_entry(content):
# state must contain either beta or mature but not both
v = info['state']
if 'beta' in v != 'mature' in v:
printf('State must be one of <"beta", "mature"> in entry "{}"'.format(info['title']))
print('State must be one of <"beta", "mature"> in entry "{}"'.format(info['title']))
return info # so that the rest can run through
# github repositories should not end on .git
repos = info['code repository']
for repo in repos:
if repo.startswith('https://github.com/') and repo.endswith('.git'):
print('Github repo {} in entry "{}" should not end on .git.'.format(repo, info['title']))
# extract inactive
phrase = 'inactive since '
inactive_year = [x[len(phrase):] for x in info['state'] if x.startswith(phrase)]
@ -340,7 +346,7 @@ def assemble_infos():
category_paths = get_category_paths()
# a database of all important infos about the entries
infos = []
infos = {}
# for each category
for category_path in category_paths:
@ -364,7 +370,7 @@ def assemble_infos():
info['file'] = os.path.basename(entry_path)[:-3] # [:-3] to cut off the .md
# add to list
infos.append(info)
infos[entry_path] = info
return infos
@ -376,25 +382,28 @@ def generate_statistics():
Should be done every time the entries change.
"""
# for this function replace infos with infos.values
infois = infos.values()
# start the page
statistics_path = os.path.join(games_path, 'statistics.md')
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
# total number
number_entries = len(infos)
number_entries = len(infois)
rel = lambda x: x / number_entries * 100 # conversion to percent
statistics += 'analyzed {} entries on {}\n\n'.format(number_entries, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
# State (beta, mature, inactive)
statistics += '## State\n\n'
number_state_beta = sum(1 for x in infos if 'beta' in x['state'])
number_state_mature = sum(1 for x in infos if 'mature' in x['state'])
number_inactive = sum(1 for x in infos if 'inactive' in x)
number_state_beta = sum(1 for x in infois if 'beta' in x['state'])
number_state_mature = sum(1 for x in infois if 'mature' in x['state'])
number_inactive = sum(1 for x in infois if 'inactive' in x)
statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive))
if number_inactive > 0:
entries_inactive = [(x['title'], x['inactive']) for x in infos if 'inactive' in x]
entries_inactive = [(x['title'], x['inactive']) for x in infois if 'inactive' in x]
entries_inactive.sort(key=lambda x: x[0]) # first sort by name
entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
@ -405,16 +414,16 @@ def generate_statistics():
field = 'code language'
# those without language tag
number_no_language = sum(1 for x in infos if field not in x)
number_no_language = sum(1 for x in infois if field not in x)
if number_no_language > 0:
statistics += 'Without language tag: {} ({:.1f}%)\n\n'.format(number_no_language, rel(number_no_language))
entries_no_language = [x['title'] for x in infos if field not in x]
entries_no_language = [x['title'] for x in infois if field not in x]
entries_no_language.sort()
statistics += ', '.join(entries_no_language) + '\n\n'
# get all languages together
languages = []
for info in infos:
for info in infois:
if field in info:
languages.extend(info[field])
@ -430,16 +439,16 @@ def generate_statistics():
field = 'code license'
# those without license
number_no_license = sum(1 for x in infos if field not in x)
number_no_license = sum(1 for x in infois if field not in x)
if number_no_license > 0:
statistics += 'Without license tag: {} ({:.1f}%)\n\n'.format(number_no_license, rel(number_no_license))
entries_no_license = [x['title'] for x in infos if field not in x]
entries_no_license = [x['title'] for x in infois if field not in x]
entries_no_license.sort()
statistics += ', '.join(entries_no_license) + '\n\n'
# get all licenses together
licenses = []
for info in infos:
for info in infois:
if field in info:
licenses.extend(info[field])
@ -456,7 +465,7 @@ def generate_statistics():
# get all keywords together
keywords = []
for info in infos:
for info in infois:
if field in info:
keywords.extend(info[field])
@ -482,10 +491,11 @@ def export_json():
db['headings'] = ['Name', 'Download']
entries = []
for info in infos:
for info in infos.values():
entry = [info['title']]
if 'download' in info:
entry.append(info['download'][0])
field = 'download'
if field in info and info[field]:
entry.append(info[field][0])
else:
entry.append('')
entries.append(entry)
@ -497,6 +507,41 @@ def export_json():
write_text(json_path, text)
def git_repo(repo):
if repo.startswith('https://github.com/'):
if len(repo.split('/')) == 5:
return repo + '.git'
return None
def update_primary_code_repositories():
primary_repos = []
# for every entry
for info in infos.values():
field = 'code repository'
# if field 'Code repository' is available
if field in info:
repos = info[field]
# if there are code repositories given
if repos:
repo = repos[0]
repo = git_repo(repo)
if repo:
primary_repos.append(repo)
# filter those out, that we can process
# sort them alphabetically
primary_repos.sort()
# write them to tools/git
json_path = os.path.join(games_path, os.path.pardir, 'tools', 'git_archive', 'archives.json')
text = json.dumps(primary_repos)
write_text(json_path, text)
if __name__ == "__main__":
# paths
@ -523,3 +568,6 @@ if __name__ == "__main__":
# check external links (only rarely)
# check_validity_external_links()
# collect list of primary code repositories
update_primary_code_repositories()