rename of folder tools to code

This commit is contained in:
Trilarion
2020-01-10 16:46:46 +01:00
parent 30ce6187eb
commit 10625a854a
40 changed files with 30 additions and 25 deletions

0
code/utils/__init__.py Normal file
View File

37
code/utils/archive.py Normal file
View File

@ -0,0 +1,37 @@
"""
"""
def derive_folder_name(url, replaces):
"""
"""
sanitize = lambda x: x.replace('/', '.')
for service in replaces:
if url.startswith(service):
url = replaces[service] + url[len(service):]
return sanitize(url)
for generic in ['http://', 'https://', 'git://', 'svn://']:
if url.startswith(generic):
url = url[len(generic):]
return sanitize(url)
raise Exception('malformed url: {}'.format(url))
def git_folder_name(url):
"""
"""
replaces = {
'https://github.com': 'github',
'https://git.code.sf.net/p': 'sourceforge',
'https://git.tuxfamily.org': 'tuxfamily',
'https://git.savannah.gnu.org/git': 'savannah.gnu',
'https://gitlab.com': 'gitlab',
'https://gitorious.org': 'gitorious',
'https://anongit.': '',
'https://bitbucket.org': 'bitbucket',
'https://gitlab.gnome.org': 'gnome'
}
return derive_folder_name(url, replaces)

12
code/utils/constants.py Normal file
View File

@ -0,0 +1,12 @@
"""
Paths, properties.
"""
import os
# paths
root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
entries_path = os.path.join(root_path, 'entries')
tocs_path = os.path.join(entries_path, 'tocs')
local_properties_file = os.path.join(root_path, 'local.properties')

297
code/utils/osg.py Normal file
View File

@ -0,0 +1,297 @@
"""
Specific functions working on the games.
"""
import re
import os
from difflib import SequenceMatcher
from utils import utils, constants as c
essential_fields = ('Home', 'State', 'Keywords', 'Code repository', 'Code language', 'Code license')
valid_fields = ('Home', 'Media', 'State', 'Play', 'Download', 'Platform', 'Keywords', 'Code repository', 'Code language',
'Code license', 'Code dependencies', 'Assets license', 'Developer', 'Build system', 'Build instructions')
valid_platforms = ('Windows', 'Linux', 'macOS', 'Android', 'iOS', 'Web')
recommended_keywords = ('action', 'arcade', 'adventure', 'visual novel', 'sports', 'platform', 'puzzle', 'role playing', 'simulation', 'strategy', 'cards', 'board', 'music', 'educational', 'tool', 'game engine', 'framework', 'library', 'remake')
known_languages = ('AGS Script', 'ActionScript', 'Ada', 'AngelScript', 'Assembly', 'Basic', 'Blender Script', 'BlitzMax', 'C', 'C#', 'C++', 'Clojure', 'CoffeeScript', 'ColdFusion', 'D', 'DM', 'Dart', 'Dia', 'Elm', 'Emacs Lisp', 'F#', 'GDScript', 'Game Maker Script', 'Go', 'Groovy', 'Haskell', 'Haxe', 'Io', 'Java', 'JavaScript', 'Kotlin', 'Lisp', 'Lua', 'MegaGlest Script', 'MoonScript', 'None', 'OCaml', 'Objective-C', 'PHP', 'Pascal', 'Perl', 'Python', 'QuakeC', 'R', "Ren'py", 'Ruby', 'Rust', 'Scala', 'Scheme', 'Script', 'Shell', 'Swift', 'TorqueScript', 'TypeScript', 'Vala', 'Visual Basic', 'XUL', 'ZenScript', 'ooc')
known_licenses = ('2-clause BSD', '3-clause BSD', 'AFL-3.0', 'AGPL-3.0', 'Apache-2.0', 'Artistic License-1.0', 'Artistic License-2.0', 'Boost-1.0', 'CC-BY-NC-3.0', 'CC-BY-NC-SA-2.0', 'CC-BY-NC-SA-3.0', 'CC-BY-SA-3.0', 'CC-BY-NC-SA-4.0', 'CC-BY-SA-4.0', 'CC0', 'Custom', 'EPL-2.0', 'GPL-2.0', 'GPL-3.0', 'IJG', 'ISC', 'Java Research License', 'LGPL-2.0', 'LGPL-2.1', 'LGPL-3.0', 'MAME', 'MIT', 'MPL-1.1', 'MPL-2.0', 'MS-PL', 'MS-RL', 'NetHack General Public License', 'None', 'Proprietary', 'Public domain', 'SWIG license', 'Unlicense', 'WTFPL', 'wxWindows license', 'zlib')
known_multiplayer_modes = ('competitive', 'co-op', 'hotseat', 'LAN', 'local', 'massive', 'matchmaking', 'online', 'split-screen')
# TODO put the abbreviations directly in the name line (parenthesis maybe), that is more natural
code_dependencies_aliases = {'Simple DirectMedia Layer': ('SDL', 'SDL2'), 'Simple and Fast Multimedia Library': 'SFML', 'Boost (C++ Libraries)': 'Boost', 'SGE Game Engine': 'SGE'}
code_dependencies_without_entry = {'OpenGL': 'https://www.opengl.org/', 'GLUT': 'https://www.opengl.org/resources/libraries/', 'WebGL': 'https://www.khronos.org/webgl/', 'Unity': 'https://unity.com/solutions/game',
'.NET': 'https://dotnet.microsoft.com/', 'Vulkan': 'https://www.khronos.org/vulkan/', 'KDE Frameworks': 'https://kde.org/products/frameworks/'}
regex_sanitize_name = re.compile(r"[^A-Za-z 0-9-+]+")
regex_sanitize_name_space_eater = re.compile(r" +")
def name_similarity(a, b):
return SequenceMatcher(None, str.casefold(a), str.casefold(b)).ratio()
def split_infos(infos):
"""
Split into games, tools, frameworks, libraries
"""
games = [x for x in infos if not any([y in x['keywords'] for y in ('tool', 'framework', 'library')])]
tools = [x for x in infos if 'tool' in x['keywords']]
frameworks = [x for x in infos if 'framework' in x['keywords']]
libraries = [x for x in infos if 'library' in x['keywords']]
return games, tools, frameworks, libraries
def entry_iterator():
"""
"""
# get all entries (ignore everything starting with underscore)
entries = os.listdir(c.entries_path)
# iterate over all entries
for entry in entries:
entry_path = os.path.join(c.entries_path, entry)
# ignore directories ("tocs" for example)
if os.path.isdir(entry_path):
continue
# read entry
content = utils.read_text(entry_path)
# yield
yield entry, entry_path, content
def canonical_entry_name(name):
"""
Derives a canonical game name from an actual game name (suitable for file names, ...)
"""
name = name.casefold()
name = name.replace('ö', 'o').replace('ä', 'a').replace('ü', 'u')
name = regex_sanitize_name.sub('', name)
name = regex_sanitize_name_space_eater.sub('_', name)
name = name.replace('_-_', '-')
name = name.replace('--', '-').replace('--', '-')
return name
def parse_entry(content):
"""
Returns a dictionary of the features of the content.
Raises errors when a major error in the structure is expected, prints a warning for minor errors.
"""
info = {}
# read name
regex = re.compile(r"^# (.*)") # start of content, starting with "# " and then everything until the end of line
matches = regex.findall(content)
if len(matches) != 1 or not matches[0]: # name must be there
raise RuntimeError('Name not found in entry "{}" : {}'.format(content, matches))
info['name'] = matches[0]
# read description
regex = re.compile(r"^.*\n\n_(.*)_\n") # third line from top, everything between underscores
matches = regex.findall(content)
if len(matches) != 1 or not matches[0]: # description must be there
raise RuntimeError('Description not found in entry "{}"'.format(content))
info['description'] = matches[0]
# first read all field names
regex = re.compile(r"^- (.*?): ", re.MULTILINE) # start of each line having "- ", then everything until a colon, then ": "
fields = regex.findall(content)
# check that essential fields are there
for field in essential_fields:
if field not in fields: # essential fields must be there
raise RuntimeError('Essential field "{}" missing in entry "{}"'.format(field, info['name']))
# check that all fields are valid fields and are existing in that order
index = 0
for field in fields:
while index < len(valid_fields) and field != valid_fields[index]:
index += 1
if index == len(valid_fields): # must be valid fields and must be in the right order
raise RuntimeError('Field "{}" in entry "{}" either misspelled or in wrong order'.format(field, info['name']))
# iterate over found fields
for field in fields:
regex = re.compile(r"- {}: (.*)".format(field))
matches = regex.findall(content)
if len(matches) != 1: # every field must be present only once
raise RuntimeError('Field "{}" in entry "{}" exist multiple times.'.format(field, info['name']))
v = matches[0]
# first store as is
info[field.lower()+'-raw'] = v
# remove parenthesis with content
v = re.sub(r'\([^)]*\)', '', v)
# split on ', '
v = v.split(', ')
# strip
v = [x.strip() for x in v]
# remove all being false (empty) that were for example just comments
v = [x for x in v if x]
# if entry is of structure <..> remove <>
v = [x[1:-1] if x[0] is '<' and x[-1] is '>' else x for x in v]
# empty fields will not be stored
if not v:
continue
# store in info
info[field.lower()] = v
# check again that essential fields made it through
for field in ('home', 'state', 'keywords', 'code language', 'code license'):
if field not in info: # essential fields must still be inside
raise RuntimeError('Essential field "{}" empty in entry "{}"'.format(field, info['name']))
# now checks on the content of fields
# name and description should not have spaces at the begin or end
for field in ('name', 'description'):
v = info[field]
if len(v) != len(v.strip()): # warning about that
print('Warning: No leading or trailing spaces in field {} in entry "{}"'.format(field, info['name']))
# state (essential field) must contain either beta or mature but not both, but at least one
v = info['state']
for t in v:
if t != 'beta' and t != 'mature' and not t.startswith('inactive since '):
raise RuntimeError('Unknown state tage "{}" in entry "{}"'.format(t, info['name']))
if 'beta' in v != 'mature' in v:
raise RuntimeError('State must be one of <"beta", "mature"> in entry "{}"'.format(info['name']))
# extract inactive year
phrase = 'inactive since '
inactive_year = [x[len(phrase):] for x in v if x.startswith(phrase)]
assert len(inactive_year) <= 1
if inactive_year:
info['inactive'] = inactive_year[0]
# urls in home, download, play and code repositories must start with http or https (or git) and should not contain spaces
for field in ['home', 'download', 'play', 'code repository']:
if field in info:
for url in info[field]:
if not any([url.startswith(x) for x in ['http://', 'https://', 'git://', 'svn://', 'ftp://', 'bzr://']]):
raise RuntimeError('URL "{}" in entry "{}" does not start with http/https/git/svn/ftp/bzr'.format(url, info['name']))
if ' ' in url:
raise RuntimeError('URL "{}" in entry "{}" contains a space'.format(url, info['name']))
# github/gitlab repositories should end on .git and should start with https
if 'code repository' in info:
for repo in info['code repository']:
if any((x in repo for x in ('github', 'gitlab', 'git.tuxfamily', 'git.savannah'))):
if not repo.startswith('https://'):
print('Warning: Repo {} in entry "{}" should start with https://'.format(repo, info['name']))
if not repo.endswith('.git'):
print('Warning: Repo {} in entry "{}" should end on .git.'.format(repo, info['name']))
# check that all platform tags are valid tags and are existing in that order
if 'platform' in info:
index = 0
for platform in info['platform']:
while index < len(valid_platforms) and platform != valid_platforms[index]:
index += 1
if index == len(valid_platforms): # must be valid platforms and must be in that order
raise RuntimeError('Platform tag "{}" in entry "{}" either misspelled or in wrong order'.format(platform, info['name']))
# there must be at least one keyword
if 'keywords' not in info:
raise RuntimeError('Need at least one keyword in entry "{}"'.format(info['name']))
# check for existence of at least one recommended keywords
fail = True
for recommended_keyword in recommended_keywords:
if recommended_keyword in info['keywords']:
fail = False
break
if fail: # must be at least one recommended keyword
raise RuntimeError('Entry "{}" contains no recommended keyword'.format(info['name']))
# languages should be known
languages = info['code language']
for language in languages:
if language not in known_languages:
print('Warning: Language {} in entry "{}" is not a known language. Misspelled or new?'.format(language, info['name']))
# licenses should be known
licenses = info['code license']
for license in licenses:
if license not in known_licenses:
print('Warning: License {} in entry "{}" is not a known license. Misspelled or new?'.format(license, info['name']))
return info
def assemble_infos():
"""
Parses all entries and assembles interesting infos about them.
"""
print('assemble game infos')
# a database of all important infos about the entries
infos = []
# iterate over all entries
for entry, _, content in entry_iterator():
# parse entry
info = parse_entry(content)
# add file information
info['file'] = entry
# check canonical file name
canonical_file_name = canonical_entry_name(info['name']) + '.md'
# we also allow -X with X =2..9 as possible extension (because of duplicate canonical file names)
if canonical_file_name != entry and canonical_file_name != entry[:-5] + '.md':
print('Warning: file {} should be {}'.format(entry, canonical_file_name))
source_file = os.path.join(c.entries_path, entry)
target_file = os.path.join(c.entries_path, canonical_file_name)
if not os.path.isfile(target_file):
pass
# os.rename(source_file, target_file)
# add to list
infos.append(info)
return infos
def extract_links():
"""
Parses all entries and extracts http(s) links from them
"""
# regex for finding urls (can be in <> or in ]() or after a whitespace
regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n,]")
# iterate over all entries
urls = set()
for _, _, content in entry_iterator():
# apply regex
matches = regex.findall(content)
# for each match
for match in matches:
# for each possible clause
for url in match:
# if there was something (and not a sourceforge git url)
if url:
urls.add(url)
urls = sorted(list(urls), key=str.casefold)
return urls

20
code/utils/osg_github.py Normal file
View File

@ -0,0 +1,20 @@
"""
Everything specific to the Github API (via PyGithub).
"""
from github import Github
def retrieve_repo_info(repos):
"""
For a list of Github repos, retrieves repo information
"""
result = []
g = Github()
for repo in repos:
r = g.get_repo(repo)
e = {'archived': r.archived, 'description': r.description, 'language': r.language,
'last modified': r.last_modified, 'open issues count': r.open_issues_count,
'stars count': r.stargazers_count, 'topics': r.topics, 'repo': repo}
result.append(e)
return result

0
code/utils/osg_parse.py Normal file
View File

312
code/utils/utils.py Normal file
View File

@ -0,0 +1,312 @@
"""
Utilities for the tools. Only depending on standard Python or third party modules.
"""
import os
import shutil
import subprocess
import tarfile
import time
import urllib.request
import zipfile
import stat
def read_text(file):
"""
Reads a whole text file (UTF-8 encoded).
"""
with open(file, mode='r', encoding='utf-8', errors='ignore') as f:
text = f.read()
return text
def read_first_line(file):
"""
Convenience function because we only need the first line of a category overview really.
"""
with open(file, mode='r', encoding='utf-8') as f:
line = f.readline()
return line
def write_text(file, text):
"""
Writes a whole text file (UTF-8 encoded).
"""
with open(file, mode='w', encoding='utf-8') as f:
f.write(text)
def determine_archive_version_generic(name, leading_terms, trailing_terms):
"""
Given an archive file name, tries to get version information. Generic version that can cut off leading and trailing
terms and converts to lower case. Give the most special terms first in the list. As many cut offs as possible are
performed.
"""
# to lower case
name = name.lower()
# cut leading terms
for t in leading_terms:
if name.startswith(t):
name = name[len(t):]
# cut trailing terms
for t in trailing_terms:
if name.endswith(t):
name = name[:-len(t)]
return name
def unzip_keep_last_modified(archive, destination):
"""
Unzips content of a zip file archive into the destination directory keeping the last modified file property as
it was in the zip archive.
Assumes that destination is an existing directory path.
"""
with zipfile.ZipFile(archive, 'r') as zip:
# zip.extractall(destination) # does not keep the last modified property
for zip_entry in zip.infolist():
name, date_time = zip_entry.filename, zip_entry.date_time
date_time = time.mktime(date_time + (0, 0, -1))
zip.extract(zip_entry, destination)
os.utime(os.path.join(destination, name), (date_time, date_time))
def detect_archive_type(name):
"""
Tries to guess which type an archive is.
"""
# test for tar
tar_endings = ['.tbz2', '.tar.gz']
for ending in tar_endings:
if name.endswith(ending):
return 'tar'
# test for zip
zip_endings = ['.zip', '.jar']
for ending in zip_endings:
if name.endswith(ending):
return 'zip'
# unknown
return None
def folder_size(path):
size = 0
for dirpath, dirnames, filenames in os.walk(path):
for file in filenames:
size += os.path.getsize(os.path.join(dirpath, file))
return size
def extract_archive(source, destination, type):
"""
Extracts a zip, tar, ... to a destination path.
Type may result from detect_archive_type().
"""
if type == 'tar':
tar = tarfile.open(source, 'r')
tar.extractall(destination)
elif type == 'zip':
unzip_keep_last_modified(source, destination)
def strip_wrapped_folders(folder):
"""
If a folder only contains a single sub-folder and nothing else, descends this way as much as possible.
Assumes folder is a directory.
"""
while True:
entries = list(os.scandir(folder))
if len(entries) == 1 and entries[0].is_dir():
folder = entries[0].path
else:
break
return folder
def determine_latest_last_modified_date(folder):
"""
Given a folder, recursively searches all files in this folder and all sub-folders and memorizes the latest
"last modified" date of all these files.
"""
latest_last_modified = 0
for dirpath, dirnames, filenames in os.walk(folder):
for filename in filenames:
filepath = os.path.join(dirpath, filename)
lastmodified = os.path.getmtime(filepath)
if lastmodified > latest_last_modified:
latest_last_modified = lastmodified
return latest_last_modified
def subprocess_run(cmd, display=True):
"""
Runs a cmd via subprocess and displays the std output in case of success or the std error output in case of failure
where it also stops execution.
"""
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode:
if display:
print("error {} in call {}".format(result.returncode, cmd))
print(result.stdout.decode('cp1252'))
print(result.stderr.decode('cp1252'))
raise RuntimeError()
if display:
print(' output: {}'.format(result.stdout.decode('cp1252')))
return result.stdout.decode('cp1252')
# TODO need move_tree
def copy_tree(source, destination):
"""
Copies the full content of one directory into another avoiding the use of distutils.di_util.copy_tree because that
can give unwanted errors on Windows (probably related to symlinks).
"""
# this gave an FileNotFoundError: [Errno 2] No such file or directory: '' on Windows
# distutils.dir_util.copy_tree(archive_path, git_path)
for dirpath, dirnames, filenames in os.walk(source):
# first create all the directory on destination
directories_to_be_created = [os.path.join(destination, os.path.relpath(os.path.join(dirpath, x), source)) for x
in dirnames]
for directory in directories_to_be_created:
os.makedirs(directory, exist_ok=True)
# second copy all the files
filepaths_source = [os.path.join(dirpath, x) for x in filenames]
filepaths_destination = [os.path.join(destination, os.path.relpath(x, source)) for x in filepaths_source]
for src, dst in zip(filepaths_source, filepaths_destination):
shutil.copyfile(src, dst)
def download_url(url, destination):
"""
Using urllib.request downloads from an url to a destination. Destination will be a file.
Waits one second before, trying to be nice.
"""
time.sleep(1) # we are nice
with urllib.request.urlopen(url) as response:
with open(destination, 'wb') as f:
shutil.copyfileobj(response, f)
def handleRemoveReadonly(func, path, exc):
"""
Necessary on Windows. See https://stackoverflow.com/questions/1889597/deleting-directory-in-python
"""
os.chmod(path, stat.S_IWRITE)
func(path)
def git_clear_path(git_path):
"""
Clears all in a path except the '.git' directory
"""
for item in os.listdir(git_path):
# ignore '.git
if item == '.git':
continue
item = os.path.join(git_path, item)
if os.path.isdir(item):
shutil.rmtree(item, onerror=handleRemoveReadonly)
else:
os.remove(item)
def recreate_directory(path):
"""
Recreates a directory (deletes before if existing)
"""
if os.path.isdir(path):
shutil.rmtree(path, onerror=handleRemoveReadonly)
for attempts in range(10):
try:
os.mkdir(path)
except PermissionError:
time.sleep(0.1)
continue
else:
break
else:
raise RuntimeError()
def unzip(zip_file, destination_directory):
"""
Unzips and keeps the original modified date.
:param zip_file:
:param destination_directory:
:return:
"""
dirs = {}
with zipfile.ZipFile(zip_file, 'r') as zip:
for info in zip.infolist():
name, date_time = info.filename, info.date_time
name = os.path.join(destination_directory, name)
zip.extract(info, destination_directory)
# still need to adjust the dt o/w item will have the current dt
date_time = time.mktime(info.date_time + (0, 0, -1))
if os.path.isdir(name):
# changes to dir dt will have no effect right now since files are
# being created inside of it; hold the dt and apply it later
dirs[name] = date_time
else:
os.utime(name, (date_time, date_time))
# done creating files, now update dir dt
for name in dirs:
date_time = dirs[name]
os.utime(name, (date_time, date_time))
def strip_url(url):
for prefix in ('http://', 'https://', 'svn://', 'www.'):
if url.startswith(prefix):
url = url[len(prefix):]
for suffix in ('/', '.git', '/en', '/index.html'):
if url.endswith(suffix):
url = url[:-len(suffix)]
return url
def load_properties(filepath, sep='=', comment_char='#'):
"""
Read the file as a properties file (in Java).
"""
properties = {}
with open(filepath, "rt") as file:
for line in file:
line = line.strip()
if not line.startswith(comment_char):
line = line.split(sep)
assert(len(line)==2)
key = line[0].strip()
value = line[1].strip()
properties[key] = value
return properties
def unique_elements_and_occurrences(elements):
"""
"""
unique_elements = {}
for element in elements:
try:
unique_elements[element] = unique_elements.get(element, 0) + 1
except Exception as e:
print(e)
unique_elements = list(unique_elements.items())
unique_elements.sort(key=lambda x: -x[1])
unique_elements = ['{}({})'.format(k, v) for k, v in unique_elements]
return unique_elements