some fixes of entries, datatable html and git archive python code
This commit is contained in:
@ -1,13 +0,0 @@
|
||||
https://github.com/FreezingMoon/AncientBeast
|
||||
https://github.com/godrin/antargis
|
||||
https://github.com/bote-team/bote
|
||||
https://github.com/Trilarion/civil
|
||||
https://github.com/SWY1985/CivOne
|
||||
https://github.com/colobot/colobot
|
||||
https://github.com/tautvilas/epoh
|
||||
https://github.com/hinogi/eternalwinterwars
|
||||
https://github.com/infidel-/cult
|
||||
https://github.com/Vakarias/farcolony
|
||||
https://github.com/freeciv/freeciv
|
||||
https://github.com/freeciv/freeciv-web
|
||||
https://github.com/freeorion/freeorion
|
|
5
tools/git_archive/README.txt
Normal file
5
tools/git_archive/README.txt
Normal file
@ -0,0 +1,5 @@
|
||||
Clones and/or pulls many git repositories from the open source games entries, so that one has an archive of them.
|
||||
|
||||
Currently requires at least 35 GB space!
|
||||
|
||||
Run update.py to update the archive. Git URLs are stored in archives.json.
|
1
tools/git_archive/archives.json
Normal file
1
tools/git_archive/archives.json
Normal file
File diff suppressed because one or more lines are too long
@ -7,15 +7,25 @@
|
||||
"""
|
||||
|
||||
import os
|
||||
import csv
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
|
||||
def read_text(file):
|
||||
"""
|
||||
Reads a whole text file (UTF-8 encoded).
|
||||
"""
|
||||
with open(file, mode='r', encoding='utf-8') as f:
|
||||
text = f.read()
|
||||
return text
|
||||
|
||||
|
||||
def derive_folder_name(url):
|
||||
github = 'https://github.com/'
|
||||
if url.startswith(github):
|
||||
url = url[len(github):].split('/')
|
||||
folder = 'github.' + url[0] + '.' + url[1] + '.git'
|
||||
parts = url[len(github):].split('/')
|
||||
parts.insert(0, 'github')
|
||||
folder = '.'.join(parts)
|
||||
return folder
|
||||
|
||||
|
||||
@ -36,22 +46,18 @@ if __name__ == '__main__':
|
||||
# get this folder
|
||||
root_folder = os.path.realpath(os.path.dirname(__file__))
|
||||
|
||||
# read archives.csv
|
||||
archives = []
|
||||
with open('archives.csv', newline='') as f:
|
||||
reader = csv.reader(f)
|
||||
for row in reader:
|
||||
archives.append(row)
|
||||
# read archives.json
|
||||
text = read_text(os.path.join(root_folder, 'archives.json'))
|
||||
archives = json.loads(text)
|
||||
|
||||
# loop over archives
|
||||
for archive in archives:
|
||||
url = archive[0]
|
||||
folder = os.path.join(root_folder, derive_folder_name(url))
|
||||
folder = os.path.join(root_folder, derive_folder_name(archive))
|
||||
|
||||
# if not existing do the initial checkout
|
||||
if not os.path.isdir(folder):
|
||||
os.chdir(root_folder)
|
||||
clone(url, folder)
|
||||
clone(archive, folder)
|
||||
|
||||
# pull all
|
||||
os.chdir(folder)
|
@ -64,10 +64,7 @@ def extract_overview_for_toc(file):
|
||||
|
||||
To be displayed after the game name in the category TOCs.
|
||||
"""
|
||||
with open(file, mode='r', encoding='utf-8') as f:
|
||||
text = f.read()
|
||||
|
||||
info = parse_entry(text)
|
||||
info = infos[file]
|
||||
|
||||
output = []
|
||||
|
||||
@ -301,9 +298,12 @@ def parse_entry(content):
|
||||
# split on ','
|
||||
v = v.split(',')
|
||||
|
||||
# finally strip
|
||||
# strip
|
||||
v = [x.strip() for x in v]
|
||||
|
||||
# remove all being false (empty)
|
||||
v = [x for x in v if x]
|
||||
|
||||
# store in info
|
||||
info[field.lower()] = v
|
||||
|
||||
@ -319,9 +319,15 @@ def parse_entry(content):
|
||||
# state must contain either beta or mature but not both
|
||||
v = info['state']
|
||||
if 'beta' in v != 'mature' in v:
|
||||
printf('State must be one of <"beta", "mature"> in entry "{}"'.format(info['title']))
|
||||
print('State must be one of <"beta", "mature"> in entry "{}"'.format(info['title']))
|
||||
return info # so that the rest can run through
|
||||
|
||||
# github repositories should not end on .git
|
||||
repos = info['code repository']
|
||||
for repo in repos:
|
||||
if repo.startswith('https://github.com/') and repo.endswith('.git'):
|
||||
print('Github repo {} in entry "{}" should not end on .git.'.format(repo, info['title']))
|
||||
|
||||
# extract inactive
|
||||
phrase = 'inactive since '
|
||||
inactive_year = [x[len(phrase):] for x in info['state'] if x.startswith(phrase)]
|
||||
@ -340,7 +346,7 @@ def assemble_infos():
|
||||
category_paths = get_category_paths()
|
||||
|
||||
# a database of all important infos about the entries
|
||||
infos = []
|
||||
infos = {}
|
||||
|
||||
# for each category
|
||||
for category_path in category_paths:
|
||||
@ -364,7 +370,7 @@ def assemble_infos():
|
||||
info['file'] = os.path.basename(entry_path)[:-3] # [:-3] to cut off the .md
|
||||
|
||||
# add to list
|
||||
infos.append(info)
|
||||
infos[entry_path] = info
|
||||
|
||||
return infos
|
||||
|
||||
@ -376,25 +382,28 @@ def generate_statistics():
|
||||
Should be done every time the entries change.
|
||||
"""
|
||||
|
||||
# for this function replace infos with infos.values
|
||||
infois = infos.values()
|
||||
|
||||
# start the page
|
||||
statistics_path = os.path.join(games_path, 'statistics.md')
|
||||
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
|
||||
|
||||
# total number
|
||||
number_entries = len(infos)
|
||||
number_entries = len(infois)
|
||||
rel = lambda x: x / number_entries * 100 # conversion to percent
|
||||
statistics += 'analyzed {} entries on {}\n\n'.format(number_entries, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
|
||||
|
||||
# State (beta, mature, inactive)
|
||||
statistics += '## State\n\n'
|
||||
|
||||
number_state_beta = sum(1 for x in infos if 'beta' in x['state'])
|
||||
number_state_mature = sum(1 for x in infos if 'mature' in x['state'])
|
||||
number_inactive = sum(1 for x in infos if 'inactive' in x)
|
||||
number_state_beta = sum(1 for x in infois if 'beta' in x['state'])
|
||||
number_state_mature = sum(1 for x in infois if 'mature' in x['state'])
|
||||
number_inactive = sum(1 for x in infois if 'inactive' in x)
|
||||
statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format(number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive))
|
||||
|
||||
if number_inactive > 0:
|
||||
entries_inactive = [(x['title'], x['inactive']) for x in infos if 'inactive' in x]
|
||||
entries_inactive = [(x['title'], x['inactive']) for x in infois if 'inactive' in x]
|
||||
entries_inactive.sort(key=lambda x: x[0]) # first sort by name
|
||||
entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first)
|
||||
entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive]
|
||||
@ -405,16 +414,16 @@ def generate_statistics():
|
||||
field = 'code language'
|
||||
|
||||
# those without language tag
|
||||
number_no_language = sum(1 for x in infos if field not in x)
|
||||
number_no_language = sum(1 for x in infois if field not in x)
|
||||
if number_no_language > 0:
|
||||
statistics += 'Without language tag: {} ({:.1f}%)\n\n'.format(number_no_language, rel(number_no_language))
|
||||
entries_no_language = [x['title'] for x in infos if field not in x]
|
||||
entries_no_language = [x['title'] for x in infois if field not in x]
|
||||
entries_no_language.sort()
|
||||
statistics += ', '.join(entries_no_language) + '\n\n'
|
||||
|
||||
# get all languages together
|
||||
languages = []
|
||||
for info in infos:
|
||||
for info in infois:
|
||||
if field in info:
|
||||
languages.extend(info[field])
|
||||
|
||||
@ -430,16 +439,16 @@ def generate_statistics():
|
||||
field = 'code license'
|
||||
|
||||
# those without license
|
||||
number_no_license = sum(1 for x in infos if field not in x)
|
||||
number_no_license = sum(1 for x in infois if field not in x)
|
||||
if number_no_license > 0:
|
||||
statistics += 'Without license tag: {} ({:.1f}%)\n\n'.format(number_no_license, rel(number_no_license))
|
||||
entries_no_license = [x['title'] for x in infos if field not in x]
|
||||
entries_no_license = [x['title'] for x in infois if field not in x]
|
||||
entries_no_license.sort()
|
||||
statistics += ', '.join(entries_no_license) + '\n\n'
|
||||
|
||||
# get all licenses together
|
||||
licenses = []
|
||||
for info in infos:
|
||||
for info in infois:
|
||||
if field in info:
|
||||
licenses.extend(info[field])
|
||||
|
||||
@ -456,7 +465,7 @@ def generate_statistics():
|
||||
|
||||
# get all keywords together
|
||||
keywords = []
|
||||
for info in infos:
|
||||
for info in infois:
|
||||
if field in info:
|
||||
keywords.extend(info[field])
|
||||
|
||||
@ -482,10 +491,11 @@ def export_json():
|
||||
db['headings'] = ['Name', 'Download']
|
||||
|
||||
entries = []
|
||||
for info in infos:
|
||||
for info in infos.values():
|
||||
entry = [info['title']]
|
||||
if 'download' in info:
|
||||
entry.append(info['download'][0])
|
||||
field = 'download'
|
||||
if field in info and info[field]:
|
||||
entry.append(info[field][0])
|
||||
else:
|
||||
entry.append('')
|
||||
entries.append(entry)
|
||||
@ -497,6 +507,41 @@ def export_json():
|
||||
write_text(json_path, text)
|
||||
|
||||
|
||||
def git_repo(repo):
|
||||
if repo.startswith('https://github.com/'):
|
||||
if len(repo.split('/')) == 5:
|
||||
return repo + '.git'
|
||||
return None
|
||||
|
||||
|
||||
def update_primary_code_repositories():
|
||||
|
||||
primary_repos = []
|
||||
|
||||
# for every entry
|
||||
for info in infos.values():
|
||||
field = 'code repository'
|
||||
# if field 'Code repository' is available
|
||||
if field in info:
|
||||
repos = info[field]
|
||||
# if there are code repositories given
|
||||
if repos:
|
||||
repo = repos[0]
|
||||
repo = git_repo(repo)
|
||||
if repo:
|
||||
primary_repos.append(repo)
|
||||
|
||||
# filter those out, that we can process
|
||||
|
||||
|
||||
# sort them alphabetically
|
||||
primary_repos.sort()
|
||||
|
||||
# write them to tools/git
|
||||
json_path = os.path.join(games_path, os.path.pardir, 'tools', 'git_archive', 'archives.json')
|
||||
text = json.dumps(primary_repos)
|
||||
write_text(json_path, text)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# paths
|
||||
@ -523,3 +568,6 @@ if __name__ == "__main__":
|
||||
|
||||
# check external links (only rarely)
|
||||
# check_validity_external_links()
|
||||
|
||||
# collect list of primary code repositories
|
||||
update_primary_code_repositories()
|
||||
|
Reference in New Issue
Block a user