a few additions from backlog and working on scripts

This commit is contained in:
Trilarion
2019-09-16 12:37:52 +02:00
parent 49424eb857
commit 4fab32b287
32 changed files with 359 additions and 243 deletions

View File

@ -282,7 +282,9 @@
"https://github.com/aburch/simutrans.git",
"https://github.com/acmepjz/meandmyshadow.git",
"https://github.com/adamenkov/jet-story.git",
"https://github.com/afritz1/OpenTESArena.git",
"https://github.com/ahmetkasif/KittenMaxit.git",
"https://github.com/ajweeks/FlexEngine.git",
"https://github.com/ajxs/jsFO.git",
"https://github.com/akarnokd/open-ig.git",
"https://github.com/albertz/openlierox.git",
@ -290,6 +292,7 @@
"https://github.com/alexdantas/www.git",
"https://github.com/alexknvl/fonline.git",
"https://github.com/alphaonex86/CatchChallenger.git",
"https://github.com/amroibrahim/DIYDoom.git",
"https://github.com/anael-seghezzi/Maratis-4.git",
"https://github.com/andrettin/wyrmsun.git",
"https://github.com/andrewfenn/Hardwar.git",

View File

@ -87,11 +87,8 @@ https://fedoraproject.org/wiki/SIGs/Games#List_of_games_we_will_NOT_package
https://flathub.org/home (use it for Linux packaging) / https://flathub.org/apps/category/Game
https://freegamer.blogspot.com/2015/11/top-3-open-source-pinball-games.html
https://futurepinball.com/
https://github.com/afritz1/OpenTESArena
https://github.com/ajweeks/FlexEngine
https://github.com/amerkoleci/alimer
https://github.com/amerkoleci/vortice
https://github.com/amroibrahim/DIYDoom
https://github.com/Anthonymcqueen21/Pygame---Alien-Invasion
https://github.com/ao-libre/ao-cliente
https://github.com/apsillers/lords-of-the-fey
@ -123,6 +120,7 @@ https://github.com/cubei/FlappyCow
https://github.com/cyberegoorg/cetech
https://github.com/DaemonEngine/Daemon
https://github.com/Dariasteam/TowerJumper
https://github.com/DeflatedPickle/FAOSDance
https://github.com/demonixis/C3DE
https://github.com/digitall/scummvm-deskadv
https://github.com/DigitalPulseSoftware/NazaraEngine
@ -141,6 +139,7 @@ https://github.com/freeboardgame/FreeBoardGame.org
https://github.com/FreeCol/freecol
https://github.com/gamearians
https://github.com/GentenStudios/quartz-engine
https://github.com/grantjenks/free-python-games (check all)
https://github.com/GunshipPenguin/open_flood
https://github.com/hedgewars/hw
https://github.com/hparcells/cards-against-humanity
@ -172,6 +171,7 @@ https://github.com/pelya/commandergenius
https://github.com/pld-linux
https://github.com/ptitSeb/gl4es
https://github.com/raysan5/rfxgen
https://github.com/Realm667/WolfenDoom
https://github.com/romlok/godot-gdhexgrid
https://github.com/RonenNess/GeonBit.UI
https://github.com/RPG-Paper-Maker/RPG-Paper-Maker
@ -220,7 +220,6 @@ https://pyweek.org/4/entries/ (Ascent of Justice)
https://revolutionarygamesstudio.com/ Trhive
https://salsa.debian.org/games-team/etw
https://scratch.mit.edu/ (https://en.scratch-wiki.info/wiki/Scratch_Source_Code)
https://secretchronicles.org/en/
https://sourceforge.net/projects/actiongame/
https://sourceforge.net/projects/deng/
https://sourceforge.net/projects/ettu/

View File

@ -7,48 +7,23 @@ Unique left column names in the game info boxes:
['Code license', 'Code licenses', 'Developer', 'Developers', 'Engine', 'Engines', 'Genre', 'Genres', 'Libraries', 'Library', 'Media license', 'Media licenses', 'P. language', 'P. languages', 'Platforms']
"""
import os
import requests
import json
from bs4 import BeautifulSoup, NavigableString
from utils.utils import *
from bs4 import BeautifulSoup
from utils import constants, utils, osg
def key_selection_gameinfobox(a, b):
def download_lgw_content():
"""
Checks which of the two elements in a is in b or none but not both
:return:
"""
if len(a) != 2:
raise RuntimeError()
c = [x in b for x in a]
if all(c):
raise RuntimeError
if not any(c):
return None, None
d = [(k, i) for (i, k) in enumerate(a) if c[i]]
return d[0]
def extract_field_content(key, idx, info):
"""
From a game info field.
"""
content = info[key].get_text()
content = content.split(',')
content = [x.strip() for x in content]
content = [x if not (x.endswith('[1]') or x.endswith('[2]')) else x[:-3] for x in content] # remove trailing [1,2]
content = [x.strip() for x in content]
if not content:
raise RuntimeError
if (len(content) > 1 and idx == 0) or (len(content) == 1 and idx == 1):
print(' warning: {} Sg./Pl. mismatch'.format(key))
return content
if __name__ == "__main__":
# parameters
base_url = 'https://libregamewiki.org'
ignored_gameinfos = ['Contribute', 'Origin', 'Release date', 'Latest release']
destination_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
utils.recreate_directory(destination_path)
# read and process the base url (get all games and categories)
url = base_url + '/Category:Games'
@ -69,89 +44,70 @@ if __name__ == "__main__":
break
url = base_url + next_page['href']
# remove all those that start with user
games = [game for game in games if not any(game[1].startswith(x) for x in ('User:', 'Template:', 'Bullet'))]
print('current number of games in LGW {}'.format(len(games)))
# parse games
counter = 0
unique_gameinfo_fields = set()
entries = []
for game in games:
print(game[1])
url = base_url + game[0]
destination_file = os.path.join(destination_path, osg.canonical_game_name(game[0][1:]) + '.html')
text = requests.get(url).text
utils.write_text(destination_file, text)
def parse_lgw_content():
# paths
import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
entries_file = os.path.join(import_path, '_lgw.json')
# iterate over all imported files
files = os.listdir(import_path)
entries = []
for file in files:
if file == '_lgw.json':
continue
text = utils.read_text(os.path.join(import_path, file))
# parse the html
soup = BeautifulSoup(text, 'html.parser')
title = soup.h1.string
title = soup.h1.get_text()
print(title)
entry = {'name': title}
# get all external links
links = [(x['href'], x.get_text()) for x in soup.find_all('a', href=True)]
links = [x for x in links if x[0].startswith('http') and not x[0].startswith('https://libregamewiki.org/')]
entry['external links'] = links
# get meta description
description = soup.find('meta', attrs={"name":"description"})
entry['description'] = description['content']
# parse gameinfobox
info = soup.find('div', class_='gameinfobox')
if not info:
infos = soup.find('div', class_='gameinfobox')
if not infos:
print(' no gameinfobox')
else:
info = info.find_all('tr')
info = [(x.th.string, x.td) for x in info if x.th and x.th.string]
info = [x for x in info if x[0] not in ignored_gameinfos]
info = dict(info)
unique_gameinfo_fields.update(info.keys())
# consume fields of gameinfobox
# genre
key, idx = key_selection_gameinfobox(('Genre', 'Genres'), info.keys())
if key:
genres = extract_field_content(key, idx, info)
entry['genre']
del info[key]
# platforms
key = 'Platforms'
if key in info:
platforms = extract_field_content(key, 1, info)
# platforms = [x if x != 'Mac' else 'macOS' for x in platforms] # replace Mac with macOS
entry['platform'] = platforms
del info[key]
# developer
key, idx = key_selection_gameinfobox(('Developer', 'Developers'), info.keys())
if key:
entry['developer'] = extract_field_content(key, idx, info)
del info[key]
# code license
key, idx = key_selection_gameinfobox(('Code license', 'Code licenses'), info.keys())
if key:
entry['code license'] = extract_field_content(key, idx, info)
del info[key]
# media license
key, idx = key_selection_gameinfobox(('Media license', 'Media licenses'), info.keys())
if key:
entry['assets license'] = extract_field_content(key, idx, info)
del info[key]
# engine
key, idx = key_selection_gameinfobox(('Engine', 'Engines'), info.keys())
if key:
entry['engine'] = extract_field_content(key, idx, info)
del info[key]
# library
key, idx = key_selection_gameinfobox(('Library', 'Libraries'), info.keys())
if key:
entry['library'] = extract_field_content(key, idx, info)
del info[key]
# programming language
key, idx = key_selection_gameinfobox(('P. language', 'P. languages'), info.keys())
if key:
languages = extract_field_content(key, idx, info)
languages = [x for x in languages if x != 'HTML5'] # ignore HTML5
entry['code language'] = languages
del info[key]
# unconsumed
if info:
print('unconsumed gameinfo keys {}'.format(info.keys()))
raise RuntimeError()
infos = infos.find_all('tr')
for x in infos:
if x.th and x.td:
# row with header
key = x.th.get_text()
content = x.td.get_text()
content = content.split(',')
content = [x.strip() for x in content]
entry[key] = content
if not x.th and x.td:
# row without header: contribute section
x = x.find_all('li')
x = [(x.a.string, x.a['href']) for x in x if x.a]
for key, content in x:
entry[key] = content
# parse "for available as package in"
tables = soup.find_all('table', class_='wikitable')
@ -187,18 +143,56 @@ if __name__ == "__main__":
entry['categories'] = categories
entries.append(entry)
# print(entry)
counter += 1
if counter > 20:
# break
pass
unique_gameinfo_fields = sorted(list(unique_gameinfo_fields))
print('unique gameinfo fields: {}'.format(unique_gameinfo_fields))
# save entries
json_path = os.path.join(os.path.dirname(__file__), 'lgw_import.json')
text = json.dumps(entries, indent=1)
write_text(json_path, text)
utils.write_text(entries_file, text)
def clean_lgw_content():
# paths
import_path = os.path.join(constants.root_path, 'tools', 'lgw-import')
entries_file = os.path.join(import_path, '_lgw.json')
cleaned_entries_file = os.path.join(import_path, '_lgw.cleaned.json')
# load entries
text = utils.read_text(entries_file)
entries = json.loads(text)
# rename keys
key_replacements = (('developer', ('Developer', 'Developers')), ('code license', ('Code license', 'Code licenses')), ('engine', ('Engine', 'Engines')), ('genre', ('Genre', 'Genres')))
for index, entry in enumerate(entries):
for new_key, old_keys in key_replacements:
for key in old_keys:
if key in entry:
entry[new_key] = entry[key]
del entry[key]
break
entries[index] = entry
# check for unique field names
unique_fields = set()
for entry in entries:
unique_fields.update(entry.keys())
print('unique lgw fields: {}'.format(sorted(list(unique_fields))))
# which fields are mandatory
for entry in entries:
remove_fields = [field for field in unique_fields if field not in entry]
unique_fields -= set(remove_fields)
print('mandatory lgw fields: {}'.format(sorted(list(unique_fields))))
if __name__ == "__main__":
# stage one
# download_lgw_content()
# stage two
# parse_lgw_content()
# stage three
clean_lgw_content()

View File

@ -243,7 +243,7 @@ if __name__ == "__main__":
# determine file name
print('create new entry for {}'.format(lgw_name))
file_name = derive_canonical_file_name(lgw_name)
file_name = canonical_game_name(lgw_name) + '.md'
target_file = os.path.join(games_path, file_name)
if os.path.isfile(target_file):
print('warning: file {} already existing, save under slightly different name'.format(file_name))

View File

@ -13,7 +13,9 @@ import http.client
import datetime
import json
import textwrap
from utils.osg import *
import os
import re
from utils import constants as c, utils, osg
def update_readme_and_tocs(infos):
@ -29,12 +31,12 @@ def update_readme_and_tocs(infos):
print('update readme and toc files')
# delete content of toc path
for file in os.listdir(tocs_path):
os.remove(os.path.join(tocs_path, file))
for file in os.listdir(c.tocs_path):
os.remove(os.path.join(c.tocs_path, file))
# read readme
readme_file = os.path.join(root_path, 'README.md')
readme_text = read_text(readme_file)
readme_file = os.path.join(c.root_path, 'README.md')
readme_text = utils.read_text(readme_file)
# compile regex for identifying the building blocks in the readme
regex = re.compile(r"(.*?)(\[comment\]: # \(start.*?end of autogenerated content\))(.*)", re.DOTALL)
@ -55,7 +57,7 @@ def update_readme_and_tocs(infos):
# create by category
categories_text = []
for keyword in recommended_keywords:
for keyword in osg.recommended_keywords:
infos_filtered = [x for x in infos if keyword in x['keywords']]
title = keyword.capitalize()
name = keyword.replace(' ', '-')
@ -67,7 +69,7 @@ def update_readme_and_tocs(infos):
# create by platform
platforms_text = []
for platform in valid_platforms:
for platform in osg.valid_platforms:
infos_filtered = [x for x in infos if platform in x.get('platform', [])]
title = platform
name = platform.lower()
@ -80,7 +82,7 @@ def update_readme_and_tocs(infos):
text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + tocs_text + "\n[comment]: # (end of autogenerated content)" + end
# write to readme
write_text(readme_file, text)
utils.write_text(readme_file, text)
def create_toc(title, file, entries):
@ -88,7 +90,7 @@ def create_toc(title, file, entries):
"""
# file path
toc_file = os.path.join(tocs_path, file)
toc_file = os.path.join(c.tocs_path, file)
# header line
text = '[comment]: # (autogenerated content, do not edit)\n# {}\n\n'.format(title)
@ -105,7 +107,7 @@ def create_toc(title, file, entries):
text += '\n'.join(rows)
# write to toc file
write_text(toc_file, text)
utils.write_text(toc_file, text)
def check_validity_external_links():
@ -127,7 +129,7 @@ def check_validity_external_links():
ignored_urls = ('https://git.tukaani.org/xz.git')
# iterate over all entries
for _, entry_path, content in entry_iterator(games_path):
for _, entry_path, content in osg.entry_iterator():
# apply regex
matches = regex.findall(content)
@ -169,12 +171,12 @@ def check_template_leftovers():
print('check for template leftovers')
# load template and get all lines
text = read_text(os.path.join(root_path, 'template.md'))
text = utils.read_text(os.path.join(c.root_path, 'template.md'))
text = text.split('\n')
check_strings = [x for x in text if x and not x.startswith('##')]
# iterate over all entries
for _, entry_path, content in entry_iterator(games_path):
for _, entry_path, content in osg.entry_iterator():
for check_string in check_strings:
if content.find(check_string) >= 0:
@ -196,7 +198,7 @@ def fix_entries():
regex = re.compile(r"(.*)- Keywords:([^\n]*)(.*)", re.DOTALL)
# iterate over all entries
for entry, entry_path, content in entry_iterator(games_path):
for entry, entry_path, content in osg.entry_iterator():
# match with regex
matches = regex.findall(content)
@ -211,7 +213,7 @@ def fix_entries():
elements = list(set(elements))
# get category out
for keyword in recommended_keywords:
for keyword in osg.recommended_keywords:
if keyword in elements:
elements.remove(keyword)
category = keyword
@ -243,13 +245,13 @@ def fix_entries():
if new_content != content:
# write again
write_text(entry_path, new_content)
utils.write_text(entry_path, new_content)
# code dependencies
regex = re.compile(r"(.*)- Code dependencies:([^\n]*)(.*)", re.DOTALL)
# iterate over all entries
for entry, entry_path, content in entry_iterator(games_path):
for entry, entry_path, content in osg.entry_iterator():
# match with regex
matches = regex.findall(content)
@ -279,13 +281,13 @@ def fix_entries():
if new_content != content:
# write again
write_text(entry_path, new_content)
utils.write_text(entry_path, new_content)
# build systems
regex = re.compile(r"(.*)- Build system:([^\n]*)(.*)", re.DOTALL)
# iterate over all entries
for entry, entry_path, content in entry_iterator(games_path):
for entry, entry_path, content in osg.entry_iterator():
# match with regex
matches = regex.findall(content)
@ -311,7 +313,7 @@ def fix_entries():
if new_content != content:
# write again
write_text(entry_path, new_content)
utils.write_text(entry_path, new_content)
def update_statistics(infos):
@ -324,7 +326,7 @@ def update_statistics(infos):
print('update statistics')
# start the page
statistics_file = os.path.join(root_path, 'statistics.md')
statistics_file = os.path.join(c.root_path, 'statistics.md')
statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n'
# total number
@ -524,7 +526,7 @@ def update_statistics(infos):
statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n'
# write to statistics file
write_text(statistics_file, statistics)
utils.write_text(statistics_file, statistics)
def export_json(infos):
@ -585,9 +587,9 @@ def export_json(infos):
db['data'] = entries
# output
json_path = os.path.join(games_path, os.path.pardir, 'docs', 'data.json')
json_path = os.path.join(c.games_path, os.path.pardir, 'docs', 'data.json')
text = json.dumps(db, indent=1)
write_text(json_path, text)
utils.write_text(json_path, text)
def git_repo(repo):
@ -710,9 +712,9 @@ def export_primary_code_repositories_json():
primary_repos[k] = sorted(set(v))
# write them to tools/git
json_path = os.path.join(root_path, 'tools', 'archives.json')
json_path = os.path.join(c.root_path, 'tools', 'archives.json')
text = json.dumps(primary_repos, indent=1)
write_text(json_path, text)
utils.write_text(json_path, text)
def export_git_code_repositories_json():
@ -739,40 +741,31 @@ def export_git_code_repositories_json():
urls.sort()
# write them to tools/git
json_path = os.path.join(root_path, 'tools', 'git_repositories.json')
json_path = os.path.join(c.root_path, 'tools', 'git_repositories.json')
text = json.dumps(urls, indent=1)
write_text(json_path, text)
utils.write_text(json_path, text)
def sort_text_file(file, name):
"""
Reads a text file, splits in lines, removes duplicates, sort, writes back.
"""
text = read_text(file)
text = utils.read_text(file)
text = text.split('\n')
text = sorted(list(set(text)), key=str.casefold)
print('{} contains {} items'.format(name, len(text)))
text = '\n'.join(text)
write_text(file, text)
def strip_url(url):
for prefix in ('http://', 'https://'):
if url.startswith(prefix):
url = url[len(prefix):]
for suffix in ('/', '.git'):
if url.endswith(suffix):
url = url[:-len(suffix)]
return url
utils.write_text(file, text)
def clean_backlog(stripped_game_urls):
# read backlog and split
file = os.path.join(root_path, 'tools', 'backlog.txt')
text = read_text(file)
file = os.path.join(c.root_path, 'tools', 'backlog.txt')
text = utils.read_text(file)
text = text.split('\n')
# remove those that are in stripped_game_urls
text = [x for x in text if strip_url(x) not in stripped_game_urls]
text = [x for x in text if utils.strip_url(x) not in stripped_game_urls]
# remove duplicates and sort
text = sorted(list(set(text)), key=str.casefold)
@ -780,18 +773,14 @@ def clean_backlog(stripped_game_urls):
# join and save again
text = '\n'.join(text)
write_text(file, text)
utils.write_text(file, text)
if __name__ == "__main__":
# paths
root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
games_path = os.path.join(root_path, 'games')
tocs_path = os.path.join(games_path, 'tocs')
# backlog
game_urls = extract_links(games_path)
stripped_game_urls = [strip_url(x) for x in game_urls]
game_urls = osg.extract_links()
stripped_game_urls = [utils.strip_url(x) for x in game_urls]
clean_backlog(stripped_game_urls)
@ -802,7 +791,7 @@ if __name__ == "__main__":
fix_entries()
# assemble info
infos = assemble_infos(games_path)
infos = osg.assemble_infos()
# recount and write to readme and to tocs
update_readme_and_tocs(infos)
@ -823,5 +812,5 @@ if __name__ == "__main__":
# check_validity_external_links()
# sort backlog and rejected
# sort_text_file(os.path.join(root_path, 'tools', 'backlog.txt'), 'backlog')
sort_text_file(os.path.join(root_path, 'tools', 'rejected.txt'), 'rejected games list')
# sort_text_file(os.path.join(c.root_path, 'tools', 'backlog.txt'), 'backlog')
sort_text_file(os.path.join(c.root_path, 'tools', 'rejected.txt'), 'rejected games list')

View File

@ -229,9 +229,9 @@ if __name__ == "__main__":
urls = osgc_entry['url']
if type(urls) == str:
urls = [urls]
urls = [strip_url(url) for url in urls]
our_urls = our_entry['home']
our_urls = [x.replace('http://', '').replace('https://', '') for x in our_urls]
urls = [x.replace('http://', '').replace('https://', '') for x in urls]
our_urls = [strip_url(url) for url in our_urls]
for url in urls:
if url not in our_urls:
p += ' home url {} missing\n'.format(url)
@ -309,7 +309,7 @@ if __name__ == "__main__":
# determine file name
print('create new entry for {}'.format(osgc_name))
file_name = derive_canonical_file_name(osgc_name)
file_name = canonical_game_name(osgc_name) + '.md'
target_file = os.path.join(games_path, file_name)
if os.path.isfile(target_file):
print('warning: file {} already existing, save under slightly different name'.format(file_name))

12
tools/utils/constants.py Normal file
View File

@ -0,0 +1,12 @@
"""
Paths, properties.
"""
import os
# paths
root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
games_path = os.path.join(root_path, 'games')
tocs_path = os.path.join(games_path, 'tocs')
local_properties_file = os.path.join(root_path, 'local.properties')

View File

@ -3,8 +3,9 @@ Specific functions working on the games.
"""
import re
import os
from difflib import SequenceMatcher
from utils.utils import *
from utils import utils, constants as c
essential_fields = ('Home', 'State', 'Keywords', 'Code repository', 'Code language', 'Code license')
valid_fields = ('Home', 'Media', 'State', 'Play', 'Download', 'Platform', 'Keywords', 'Code repository', 'Code language',
@ -19,38 +20,37 @@ def game_name_similarity(a, b):
return SequenceMatcher(None, str.casefold(a), str.casefold(b)).ratio()
def entry_iterator(games_path):
def entry_iterator():
"""
"""
# get all entries (ignore everything starting with underscore)
entries = os.listdir(games_path)
entries = os.listdir(c.games_path)
# iterate over all entries
for entry in entries:
entry_path = os.path.join(games_path, entry)
entry_path = os.path.join(c.games_path, entry)
# ignore directories ("tocs" for example)
if os.path.isdir(entry_path):
continue
# read entry
content = read_text(entry_path)
content = utils.read_text(entry_path)
# yield
yield entry, entry_path, content
def derive_canonical_file_name(name):
def canonical_game_name(name):
"""
Derives a canonical file name from a game name
Derives a canonical game name from an actual game name (suitable for file names, ...)
"""
name = regex_sanitize_name.sub('', name)
name = regex_sanitize_name_space_eater.sub('_', name)
name = name.replace('_-_', '-')
name = name.casefold()
name = name + '.md'
return name
@ -193,7 +193,7 @@ def parse_entry(content):
return info
def assemble_infos(games_path):
def assemble_infos():
"""
Parses all entries and assembles interesting infos about them.
"""
@ -204,7 +204,7 @@ def assemble_infos(games_path):
infos = []
# iterate over all entries
for entry, _, content in entry_iterator(games_path):
for entry, _, content in entry_iterator():
# parse entry
info = parse_entry(content)
@ -213,12 +213,12 @@ def assemble_infos(games_path):
info['file'] = entry
# check canonical file name
canonical_file_name = derive_canonical_file_name(info['name'])
canonical_file_name = canonical_game_name(info['name']) + '.md'
# we also allow -X with X =2..9 as possible extension (because of duplicate canonical file names)
if canonical_file_name != entry and canonical_file_name != entry[:-5] + '.md':
print('file {} should be {}'.format(entry, canonical_file_name))
source_file = os.path.join(games_path, entry)
target_file = os.path.join(games_path, canonical_file_name)
source_file = os.path.join(c.games_path, entry)
target_file = os.path.join(c.games_path, canonical_file_name)
if not os.path.isfile(target_file):
pass
# os.rename(source_file, target_file)
@ -228,7 +228,8 @@ def assemble_infos(games_path):
return infos
def extract_links(games_path):
def extract_links():
"""
Parses all entries and extracts http(s) links from them
"""
@ -238,7 +239,7 @@ def extract_links(games_path):
# iterate over all entries
urls = set()
for _, _, content in entry_iterator(games_path):
for _, _, content in entry_iterator():
# apply regex
matches = regex.findall(content)

View File

@ -0,0 +1,5 @@
"""
Everything specific to the Github API (via PyGithub).
"""
from github import Github

View File

@ -9,7 +9,6 @@ import tarfile
import time
import urllib.request
import zipfile
import errno
import stat
@ -266,4 +265,34 @@ def unzip(zip_file, destination_directory):
# done creating files, now update dir dt
for name in dirs:
date_time = dirs[name]
os.utime(name, (date_time, date_time))
os.utime(name, (date_time, date_time))
def strip_url(url):
for prefix in ('http://', 'https://'):
if url.startswith(prefix):
url = url[len(prefix):]
for prefix in ('www'):
if url.startswith(prefix):
url = url[len(prefix):]
for suffix in ('/', '.git', '/en', '/index.html'):
if url.endswith(suffix):
url = url[:-len(suffix)]
return url
def load_properties(filepath, sep='=', comment_char='#'):
"""
Read the file as a properties file (in Java).
"""
properties = {}
with open(filepath, "rt") as file:
for line in file:
line = line.strip()
if not line.startswith(comment_char):
line = line.split(sep)
assert(len(line)==2)
key = line[0].strip()
value = line[1].strip()
properties[key] = value
return properties