a few additions from backlog and working on scripts

This commit is contained in:
Trilarion
2019-09-16 12:37:52 +02:00
parent 49424eb857
commit 4fab32b287
32 changed files with 359 additions and 243 deletions

12
tools/utils/constants.py Normal file
View File

@ -0,0 +1,12 @@
"""
Paths, properties.
"""
import os
# paths
root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
games_path = os.path.join(root_path, 'games')
tocs_path = os.path.join(games_path, 'tocs')
local_properties_file = os.path.join(root_path, 'local.properties')

View File

@ -3,8 +3,9 @@ Specific functions working on the games.
"""
import re
import os
from difflib import SequenceMatcher
from utils.utils import *
from utils import utils, constants as c
essential_fields = ('Home', 'State', 'Keywords', 'Code repository', 'Code language', 'Code license')
valid_fields = ('Home', 'Media', 'State', 'Play', 'Download', 'Platform', 'Keywords', 'Code repository', 'Code language',
@ -19,38 +20,37 @@ def game_name_similarity(a, b):
return SequenceMatcher(None, str.casefold(a), str.casefold(b)).ratio()
def entry_iterator(games_path):
def entry_iterator():
"""
"""
# get all entries (ignore everything starting with underscore)
entries = os.listdir(games_path)
entries = os.listdir(c.games_path)
# iterate over all entries
for entry in entries:
entry_path = os.path.join(games_path, entry)
entry_path = os.path.join(c.games_path, entry)
# ignore directories ("tocs" for example)
if os.path.isdir(entry_path):
continue
# read entry
content = read_text(entry_path)
content = utils.read_text(entry_path)
# yield
yield entry, entry_path, content
def derive_canonical_file_name(name):
def canonical_game_name(name):
"""
Derives a canonical file name from a game name
Derives a canonical game name from an actual game name (suitable for file names, ...)
"""
name = regex_sanitize_name.sub('', name)
name = regex_sanitize_name_space_eater.sub('_', name)
name = name.replace('_-_', '-')
name = name.casefold()
name = name + '.md'
return name
@ -193,7 +193,7 @@ def parse_entry(content):
return info
def assemble_infos(games_path):
def assemble_infos():
"""
Parses all entries and assembles interesting infos about them.
"""
@ -204,7 +204,7 @@ def assemble_infos(games_path):
infos = []
# iterate over all entries
for entry, _, content in entry_iterator(games_path):
for entry, _, content in entry_iterator():
# parse entry
info = parse_entry(content)
@ -213,12 +213,12 @@ def assemble_infos(games_path):
info['file'] = entry
# check canonical file name
canonical_file_name = derive_canonical_file_name(info['name'])
canonical_file_name = canonical_game_name(info['name']) + '.md'
# we also allow -X with X =2..9 as possible extension (because of duplicate canonical file names)
if canonical_file_name != entry and canonical_file_name != entry[:-5] + '.md':
print('file {} should be {}'.format(entry, canonical_file_name))
source_file = os.path.join(games_path, entry)
target_file = os.path.join(games_path, canonical_file_name)
source_file = os.path.join(c.games_path, entry)
target_file = os.path.join(c.games_path, canonical_file_name)
if not os.path.isfile(target_file):
pass
# os.rename(source_file, target_file)
@ -228,7 +228,8 @@ def assemble_infos(games_path):
return infos
def extract_links(games_path):
def extract_links():
"""
Parses all entries and extracts http(s) links from them
"""
@ -238,7 +239,7 @@ def extract_links(games_path):
# iterate over all entries
urls = set()
for _, _, content in entry_iterator(games_path):
for _, _, content in entry_iterator():
# apply regex
matches = regex.findall(content)

View File

@ -0,0 +1,5 @@
"""
Everything specific to the Github API (via PyGithub).
"""
from github import Github

View File

@ -9,7 +9,6 @@ import tarfile
import time
import urllib.request
import zipfile
import errno
import stat
@ -266,4 +265,34 @@ def unzip(zip_file, destination_directory):
# done creating files, now update dir dt
for name in dirs:
date_time = dirs[name]
os.utime(name, (date_time, date_time))
os.utime(name, (date_time, date_time))
def strip_url(url):
for prefix in ('http://', 'https://'):
if url.startswith(prefix):
url = url[len(prefix):]
for prefix in ('www'):
if url.startswith(prefix):
url = url[len(prefix):]
for suffix in ('/', '.git', '/en', '/index.html'):
if url.endswith(suffix):
url = url[:-len(suffix)]
return url
def load_properties(filepath, sep='=', comment_char='#'):
"""
Read the file as a properties file (in Java).
"""
properties = {}
with open(filepath, "rt") as file:
for line in file:
line = line.strip()
if not line.startswith(comment_char):
line = line.split(sep)
assert(len(line)==2)
key = line[0].strip()
value = line[1].strip()
properties[key] = value
return properties