From 20a44a66940e9ebee1a4905ffe4a81d492b83764 Mon Sep 17 00:00:00 2001 From: Trilarion Date: Thu, 28 Jun 2018 14:50:35 +0200 Subject: [PATCH] streamlined python code --- .gitignore | 1 + docs/data.json | 2 +- games/rpg/linleys_dungeon_crawl.md | 2 +- games/statistics.md | 2 +- games/strategy/alien_assault_traders.md | 4 +- tools/{archive => }/README.txt | 0 tools/aatraders.json | 18 +++ tools/aatraders_source_release_to_git.py | 156 ++++++++++++++++++ tools/{archive => }/archives.json | 1 + tools/maintenance.py | 27 +--- tools/phaos_source_release_to_git.py | 92 +---------- tools/{archive => }/update.py | 60 +++---- tools/utils/__init__.py | 0 tools/utils/utils.py | 191 +++++++++++++++++++++++ 14 files changed, 398 insertions(+), 158 deletions(-) rename tools/{archive => }/README.txt (100%) create mode 100644 tools/aatraders.json create mode 100644 tools/aatraders_source_release_to_git.py rename tools/{archive => }/archives.json (99%) rename tools/{archive => }/update.py (71%) create mode 100644 tools/utils/__init__.py create mode 100644 tools/utils/utils.py diff --git a/.gitignore b/.gitignore index 7a435476..4f0de2bd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /.idea /tools/archive/** +__pycache__ diff --git a/docs/data.json b/docs/data.json index 49256f0c..2df4245c 100644 --- a/docs/data.json +++ b/docs/data.json @@ -84,7 +84,7 @@ [ "Alien Assault Traders (home, entry)", "Alien Assault Traders is an online, web-based, turn-based..", - "Link", + "Link", "Strategy", "beta / inactive since 2009", "online", diff --git a/games/rpg/linleys_dungeon_crawl.md b/games/rpg/linleys_dungeon_crawl.md index a5fd23f4..6cab0830 100644 --- a/games/rpg/linleys_dungeon_crawl.md +++ b/games/rpg/linleys_dungeon_crawl.md @@ -4,7 +4,7 @@ _Roguelike molded in the tradition of the early greats of the genre: Rogue, Hack - Home: http://www.dungeoncrawl.org/ - Media: https://en.wikipedia.org/wiki/Linley%27s_Dungeon_Crawl, http://nethack.wikia.com/wiki/Linley%27s_Dungeon_Crawl -- Download: http://www.dungeoncrawl.org/?d, ftp://ftp.dungeoncrawl.org/ +- Download: http://www.dungeoncrawl.org/?d (ftp://ftp.dungeoncrawl.org/) - State: mature, inactive since 2006 - Keywords: roguelike - Code repository: https://gitlab.com/Trilarion/dungeoncrawl.git (snapshot of source releases) diff --git a/games/statistics.md b/games/statistics.md index 080e5635..edf5d3d0 100644 --- a/games/statistics.md +++ b/games/statistics.md @@ -1,7 +1,7 @@ [comment]: # (autogenerated content, do not edit) # Statistics -analyzed 403 entries on 2018-06-28 10:38:05 +analyzed 403 entries on 2018-06-28 14:38:52 ## State diff --git a/games/strategy/alien_assault_traders.md b/games/strategy/alien_assault_traders.md index 11880e95..e6a592fe 100644 --- a/games/strategy/alien_assault_traders.md +++ b/games/strategy/alien_assault_traders.md @@ -3,10 +3,10 @@ _Alien Assault Traders is an online, web-based, turn-based strategy space trading game forked off the source of both Black Nova Traders and Nova Game Systems software and inspired by the popular BBS game of TradeWars._ - Home: http://www.aatraders.com/, https://sourceforge.net/projects/aatrade/ -- Download: https://sourceforge.net/projects/aatrade/files/AATraders%200.4x/ +- Download: https://sourceforge.net/projects/aatrade/files/ - State: beta, inactive since 2009 - Keywords: online -- Code repository: https://github.com/tarnus/aatraders +- Code repository: https://github.com/tarnus/aatraders, https://gitlab.com/Trilarion/aatraders.git (+) - Code language: PHP - Code license: GPL-2.0 diff --git a/tools/archive/README.txt b/tools/README.txt similarity index 100% rename from tools/archive/README.txt rename to tools/README.txt diff --git a/tools/aatraders.json b/tools/aatraders.json new file mode 100644 index 00000000..5a7ea4d7 --- /dev/null +++ b/tools/aatraders.json @@ -0,0 +1,18 @@ +["https://sourceforge.net/projects/aatrade/files/AATraders%200.4x/aatrade_0.40_re-release.zip/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.4x/aatrade_0.40.0_release.zip/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.3x/Alien%20Assault%20Traders%200.31.0/aatrade_0.31.0_release.zip/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.3x/Alien%20Assault%20Traders%200.30.3/aatrade_0.30.3_release.zip/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.3x/Alien%20Assault%20Traders%200.30.2/aatrade_0.30.2_release.zip/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.3x/Alien%20Assault%20Traders%200.30.1/aatrade_0.30.1_release.zip/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.3x/Alien%20Assault%20Traders%200.30/aatrade_0.30_release.zip/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.2x%20Final/AAtraders%200.21a%20Final%20Patch/AATRade_0.21a_final.zip/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.2x%20Final/AATraders%200.21%20Release/aatrade-0.21.tar.gz/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.2x%20Final/AATrade%200.20%20Release/aatrade-0.20.tar.gz/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.1x%20Final/Release%200.14.1%20w_Profiles/aatrade-0.14.1.tar.gz/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.1x%20Final/Release%200.14%20w_Profiles/aatrade-0.14.tar.gz/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.1x%20Final/Release%20v0.13/aatrade-0.13.tar.gz/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.1x%20Final/Recommended%20Release%20v0.12/aatrade-0.12.tar.gz/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.1x%20Final/Release%20v0.11/aatrade-0.11.tar.gz/download", +"https://sourceforge.net/projects/aatrade/files/AATraders%200.1x%20Final/Initial%20Release%20v0.10/aatrade-0.10.tar.gz/download", +"https://sourceforge.net/projects/aatrade/files/OldFiles/aatrade0.21final.zip/download", +"https://sourceforge.net/projects/aatrade/files/OldFiles/aatrade0.21patched.zip/download"] \ No newline at end of file diff --git a/tools/aatraders_source_release_to_git.py b/tools/aatraders_source_release_to_git.py new file mode 100644 index 00000000..c1694528 --- /dev/null +++ b/tools/aatraders_source_release_to_git.py @@ -0,0 +1,156 @@ +""" + Downloads source releases from Sourceforge and puts them into a git repository +""" + +import json +import datetime +from utils.utils import * + +def special_aatrade_package_extraction(source): + """ + Unpacks "aatrade_packages". + """ + files = os.listdir(source) + if any([x.startswith('aatrade_package') for x in files]): + # we got the special case + print('aatrade package extraction of {}'.format(source)) + + # first delete all, that do not begin with the package name + for file in files: + if not file.startswith('aatrade_package'): + os.remove(os.path.join(source, file)) + + # second extract all those with are left, removing them too + files = os.listdir(source) + for file in files: + try: + extract_archive(os.path.join(source, file), source, 'tar') + except: + extract_archive(os.path.join(source, file), source, 'zip') + os.remove(os.path.join(source, file)) + + +if __name__ == '__main__': + + # base path is the directory containing this file + base_path = os.path.abspath(os.path.dirname(__file__)) + print('base path={}'.format(base_path)) + + # recreate archive path + archive_path = os.path.join(base_path, 'downloads') + if not os.path.exists(archive_path): + os.mkdir(archive_path) + + # load source releases urls + with open(os.path.join(base_path, 'aatraders.json'), 'r') as f: + urls = json.load(f) + print('will process {} urls'.format(len(urls))) + if len(urls) != len(set(urls)): + raise RuntimeError("urls list contains duplicates") + + # determine file archives from urls + archives = [x.split('/')[-2] for x in urls] + if len(archives) != len(set(archives)): + raise RuntimeError("files with duplicate archives, cannot deal with that") + + # determine version from file name + versions = [determine_archive_version_generic(x, leading_terms=['aatrade_', 'aatrade-', 'aatrade'], trailing_terms=['.zip', '.tar.gz', '_release']) for x in archives] + for version in versions: + print(version) + + # extend archives to full paths + archives = [os.path.join(archive_path, x) for x in archives] + + # download them + print('download source releases') + for url, destination in zip(urls, archives): + # only if not yet existing + if os.path.exists(destination): + continue + # download + print(' download {}'.format(os.path.basename(destination))) + download_url(url, destination) + + # extract them + print('extract downloaded archives') + extracted_archives = [x + '-extracted' for x in archives] + for archive, extracted_archive in zip(archives, extracted_archives): + print(' extract {}'.format(os.path.basename(archive))) + # only if not yet existing + if os.path.exists(extracted_archive): + continue + os.mkdir(extracted_archive) + # extract + extract_archive(archive, extracted_archive, detect_archive_type(archive)) + + # go up in unzipped archives until the very first non-empty folder + extracted_archives = [strip_wrapped_folders(x) for x in extracted_archives] + + # special 'aatrade_packageX' treatment + for extracted_archive in extracted_archives: + special_aatrade_package_extraction(extracted_archive) + + # calculate size of folder + sizes = [folder_size(x) for x in extracted_archives] + + # determine date + dates = [determine_latest_last_modified_date(x) for x in extracted_archives] + dates_strings = [datetime.datetime.fromtimestamp(x).strftime('%Y-%m-%d') for x in dates] + # if len(dates_strings) != len(set(dates_strings)): + # raise RuntimeError("Some on the same day, cannot cope with that") + + # gather all important stuff in one list and sort by dates and throw those out where size is not in range + db = list(zip(urls, extracted_archives, versions, dates, dates_strings, sizes)) + db.sort(key=lambda x:x[3]) + + size_range = [5e6, float("inf")] # set to None if not desired + if size_range: + db = [x for x in db if size_range[0] <= x[5] <= size_range[1]] + + print('proposed order') + for url, _, version, _, date, size in db: + print(' date={} version={} size={}'.format(date, version, size)) + + # git init + git_path = os.path.join(base_path, 'aatrade') + if os.path.exists(git_path): + shutil.rmtree(git_path) + os.mkdir(git_path) + os.chdir(git_path) + subprocess_run(['git', 'init']) + subprocess_run(['git', 'config', 'user.name', 'Trilarion']) + subprocess_run(['git', 'config', 'user.email', 'Trilarion@users.noreply.gitlab.com']) + + # now process revision by revision + print('process revisions') + git_author = 'akapanamajack, tarnus ' + for url, archive_path, version, _, date, _ in db: + print(' process version={}'.format(version)) + + # clear git path without deleting .git + print(' clear git') + for item in os.listdir(git_path): + # ignore '.git + if item == '.git': + continue + item = os.path.join(git_path, item) + if os.path.isdir(item): + shutil.rmtree(item) + else: + os.remove(item) + + # copy unpacked source files to git path + print('copy to git') + copy_tree(archive_path, git_path) + + # update the git index (add unstaged, remove deleted, ...) + print('git add') + os.chdir(git_path) + subprocess_run(['git', 'add', '--all']) + + # perform the commit + print('git commit') + os.chdir(git_path) + message = 'version {} ({}) on {}'.format(version, url, date) + print(' message "{}"'.format(message)) + subprocess_run(['git', 'commit', '--message={}'.format(message), '--author={}'.format(git_author), '--date={}'.format(date)]) \ No newline at end of file diff --git a/tools/archive/archives.json b/tools/archives.json similarity index 99% rename from tools/archive/archives.json rename to tools/archives.json index 37acbb4b..9efb225a 100644 --- a/tools/archive/archives.json +++ b/tools/archives.json @@ -298,6 +298,7 @@ "https://github.com/xesf/twin-e.git", "https://github.com/xoreos/xoreos.git", "https://github.com/zaki/irrlicht.git", + "https://gitlab.com/Trilarion/aatraders.git", "https://gitlab.com/Trilarion/antichess.git", "https://gitlab.com/Trilarion/aplanetsrevenge.git", "https://gitlab.com/Trilarion/attal.git", diff --git a/tools/maintenance.py b/tools/maintenance.py index 4c021caa..d8ed7679 100644 --- a/tools/maintenance.py +++ b/tools/maintenance.py @@ -15,36 +15,11 @@ import http.client import datetime import json import textwrap +from utils.utils import * TOC = '_toc.md' -def read_text(file): - """ - Reads a whole text file (UTF-8 encoded). - """ - with open(file, mode='r', encoding='utf-8') as f: - text = f.read() - return text - - -def read_first_line(file): - """ - Convenience function because we only need the first line of a category overview really. - """ - with open(file, mode='r', encoding='utf-8') as f: - line = f.readline() - return line - - -def write_text(file, text): - """ - Writes a whole text file (UTF-8 encoded). - """ - with open(file, mode='w', encoding='utf-8') as f: - f.write(text) - - def get_category_paths(): """ Returns all sub folders of the games path. diff --git a/tools/phaos_source_release_to_git.py b/tools/phaos_source_release_to_git.py index 4ab86557..e663463d 100644 --- a/tools/phaos_source_release_to_git.py +++ b/tools/phaos_source_release_to_git.py @@ -1,90 +1,10 @@ """ - Downloads source releases from Sourceforge and puts them into a git repository +Downloads source releases from Sourceforge and puts them into a git repository """ -import os -import shutil -import urllib.request import json -import time -import zipfile -import subprocess import datetime -import distutils.dir_util -import sys - - -def determine_version(name): - # to lower case - name = name.lower() - # cut leading terms - terms = ['phaos-', 'phaos', 'pv'] - for t in terms: - if name.startswith(t): - name = name[len(t):] - # cut trailing '.zip' - t = '.zip' - if name.endswith(t): - name = name[:-len(t)] - return name - - -def determine_last_modified_date(folder): - latest_last_modified = 0 - for dirpath, dirnames, filenames in os.walk(folder): - for filename in filenames: - filepath = os.path.join(dirpath, filename) - lastmodified = os.path.getmtime(filepath) - if lastmodified > latest_last_modified: - latest_last_modified = lastmodified - return latest_last_modified - - -def unzip_keep_last_modified(archive, destination): - """ - Assuming that destination is a directory and already existing. - """ - with zipfile.ZipFile(archive, 'r') as zip: - # zip.extractall(destination) - for zip_entry in zip.infolist(): - name, date_time = zip_entry.filename, zip_entry.date_time - date_time = time.mktime(date_time + (0, 0, -1)) - zip.extract(zip_entry, destination) - os.utime(os.path.join(destination, name), (date_time, date_time)) - -def strip_wrapping(folder): - names = os.listdir(folder) - while len(names) == 1: - folder = os.path.join(folder, names[0]) - names = os.listdir(folder) - return folder - -def copy_tree(source, destination): - # this gave an FileNotFoundError: [Errno 2] No such file or directory: '' on Windows - # distutils.dir_util.copy_tree(archive_path, git_path) - for dirpath, dirnames, filenames in os.walk(source): - # first create all the directory on destination - directories_to_be_created = [os.path.join(destination, os.path.relpath(os.path.join(dirpath, x), source)) for x in dirnames] - for directory in directories_to_be_created: - os.makedirs(directory, exist_ok=True) - # second copy all the files - filepaths_source = [os.path.join(dirpath, x) for x in filenames] - filepaths_destination = [os.path.join(destination, os.path.relpath(x, source)) for x in filepaths_source] - for src, dst in zip(filepaths_source, filepaths_destination): - shutil.copyfile(src, dst) - - -def subprocess_run(cmd): - """ - - """ - result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - if result.returncode: - print("error {} in call {}".format(result.returncode, cmd)) - print(result.stderr.decode('ascii')) - sys.exit(-1) - else: - print(' output: {}'.format(result.stdout.decode('ascii'))) +from utils.utils import * if __name__ == '__main__': @@ -113,7 +33,7 @@ if __name__ == '__main__': raise RuntimeError("files with duplicate archives, cannot deal with that") # determine version from file name - versions = [determine_version(x) for x in archives] + versions = [determine_archive_version_generic(x, leading_terms=['phaos-', 'phaos', 'pv'], trailing_terms=['zip']) for x in archives] # for version in versions: # print(version) @@ -146,10 +66,10 @@ if __name__ == '__main__': unzip_keep_last_modified(archive, unzipped_archive) # go up in unzipped archives until the very first non-empty folder - unzipped_archives = [strip_wrapping(x) for x in unzipped_archives] + unzipped_archives = [strip_wrapped_folders(x) for x in unzipped_archives] # determine date - dates = [determine_last_modified_date(x) for x in unzipped_archives] + dates = [determine_latest_last_modified_date(x) for x in unzipped_archives] dates_strings = [datetime.datetime.fromtimestamp(x).strftime('%Y-%m-%d') for x in dates] # if len(dates_strings) != len(set(dates_strings)): # raise RuntimeError("Some on the same day, cannot cope with that") @@ -168,6 +88,8 @@ if __name__ == '__main__': os.mkdir(git_path) os.chdir(git_path) subprocess_run(['git', 'init']) + subprocess_run(['git', 'config', 'user.name', 'Trilarion']) + subprocess_run(['git', 'config', 'user.email', 'Trilarion@users.noreply.gitlab.com']) # now process revision by revision print('process revisions') diff --git a/tools/archive/update.py b/tools/update.py similarity index 71% rename from tools/archive/update.py rename to tools/update.py index 61cf7fac..562e60e2 100644 --- a/tools/archive/update.py +++ b/tools/update.py @@ -1,30 +1,20 @@ """ - Clones and/or pulls all the gits listed in archives.json +Clones and/or pulls all the gits listed in archives.json - Requires: git executable in the path +Requires: git executable in the path - Warning: This may take a long time on the first run and may need a lot of storage space! +Warning: This may take a long time on the first run and may need a lot of storage space! - TODO are really all existing branches cloned and pulled? (see https://stackoverflow.com/questions/67699/how-to-clone-all-remote-branches-in-git) - TODO Sourceforge git clone may not work all the time (restart the script helps..) +TODO are really all existing branches cloned and pulled? (see https://stackoverflow.com/questions/67699/how-to-clone-all-remote-branches-in-git) +TODO Sourceforge git clone may not work all the time (restart the script helps..) - Note: May need to set http.postBuffer (https://stackoverflow.com/questions/17683295/git-bash-error-rpc-failed-result-18-htp-code-200b-1kib-s) +Note: May need to set http.postBuffer (https://stackoverflow.com/questions/17683295/git-bash-error-rpc-failed-result-18-htp-code-200b-1kib-s) """ -import os import json -import subprocess -import time +from utils.utils import * -def read_text(file): - """ - Reads a whole text file (UTF-8 encoded). - """ - with open(file, mode='r', encoding='utf-8') as f: - text = f.read() - return text - def derive_folder_name(url, replaces): sanitize = lambda x: x.replace('/', '.') for service in replaces: @@ -52,16 +42,12 @@ def git_folder_name(url): def git_clone(url, folder): - result = subprocess.run(["git", "clone", "--mirror", url, folder]) - if result.returncode: - print(result) + subprocess_run(["git", "clone", "--mirror", url, folder]) def git_update(folder): os.chdir(folder) - result = subprocess.run(["git", "fetch", "--all"]) - if result.returncode: - print(result) + subprocess_run(["git", "fetch", "--all"]) def svn_folder_name(url): @@ -72,15 +58,12 @@ def svn_folder_name(url): def svn_clone(url, folder): - result = subprocess.run(["svn", "checkout", url, folder]) - if result.returncode: - print(result) + subprocess_run(["svn", "checkout", url, folder]) + def svn_update(folder): os.chdir(folder) - result = subprocess.run(["svn", "update"]) - if result.returncode: - print(result) + subprocess_run(["svn", "update"]) def hg_folder_name(url): @@ -93,16 +76,12 @@ def hg_folder_name(url): def hg_clone(url, folder): - result = subprocess.run(["hg", "clone", url, folder]) - if result.returncode: - print(result) + subprocess_run(["hg", "clone", url, folder]) def hg_update(folder): os.chdir(folder) - result = subprocess.run(['hg', 'pull', '-u']) - if result.returncode: - print(result) + subprocess_run(['hg', 'pull', '-u']) def bzr_folder_name(url): @@ -113,21 +92,17 @@ def bzr_folder_name(url): def bzr_clone(url, folder): - result = subprocess.run(['bzr', 'branch', url, folder]) - if result.returncode: - print(result) + subprocess_run(['bzr', 'branch', url, folder]) def bzr_update(folder): os.chdir(folder) - result = subprocess.run(['bzr', 'pull']) - if result.returncode: - print(result) + subprocess_run(['bzr', 'pull']) def run(type, urls): print('update {} {} archives'.format(len(urls), type)) - base_folder = os.path.join(root_folder, type) + base_folder = os.path.join(archive_folder, type) if not os.path.exists(base_folder): os.mkdir(base_folder) @@ -192,6 +167,7 @@ if __name__ == '__main__': # get this folder root_folder = os.path.realpath(os.path.dirname(__file__)) + archive_folder = os.path.join(root_folder, 'archive') # read archives.json text = read_text(os.path.join(root_folder, 'archives.json')) diff --git a/tools/utils/__init__.py b/tools/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/utils/utils.py b/tools/utils/utils.py new file mode 100644 index 00000000..8a2ac214 --- /dev/null +++ b/tools/utils/utils.py @@ -0,0 +1,191 @@ +""" +Utilities for the tools. +""" + +import sys +import os +import time +import zipfile +import tarfile +import subprocess +import shutil +import urllib.request + + +def read_text(file): + """ + Reads a whole text file (UTF-8 encoded). + """ + with open(file, mode='r', encoding='utf-8') as f: + text = f.read() + return text + + +def read_first_line(file): + """ + Convenience function because we only need the first line of a category overview really. + """ + with open(file, mode='r', encoding='utf-8') as f: + line = f.readline() + return line + + +def write_text(file, text): + """ + Writes a whole text file (UTF-8 encoded). + """ + with open(file, mode='w', encoding='utf-8') as f: + f.write(text) + + +def determine_archive_version_generic(name, leading_terms, trailing_terms): + """ + Given an archive file name, tries to get version information. Generic version that can cut off leading and trailing + terms and converts to lower case. Give the most special terms first in the list. As many cut offs as possible are + performed. + """ + # to lower case + name = name.lower() + + # cut leading terms + for t in leading_terms: + if name.startswith(t): + name = name[len(t):] + + # cut trailing terms + for t in trailing_terms: + if name.endswith(t): + name = name[:-len(t)] + return name + + +def unzip_keep_last_modified(archive, destination): + """ + Unzips content of a zip file archive into the destination directory keeping the last modified file property as + it was in the zip archive. + + Assumes that destination is an existing directory path. + """ + with zipfile.ZipFile(archive, 'r') as zip: + # zip.extractall(destination) # does not keep the last modified property + for zip_entry in zip.infolist(): + name, date_time = zip_entry.filename, zip_entry.date_time + date_time = time.mktime(date_time + (0, 0, -1)) + zip.extract(zip_entry, destination) + os.utime(os.path.join(destination, name), (date_time, date_time)) + + +def detect_archive_type(name): + """ + Tries to guess which type an archive is. + """ + # test for tar + tar_endings = ['.tbz2', '.tar.gz'] + for ending in tar_endings: + if name.endswith(ending): + return 'tar' + + # test for zip + zip_endings = ['.zip', '.jar'] + for ending in zip_endings: + if name.endswith(ending): + return 'zip' + + # unknown + return None + + +def folder_size(path): + size = 0 + for dirpath, dirnames, filenames in os.walk(path): + for file in filenames: + size += os.path.getsize(os.path.join(dirpath, file)) + return size + + +def extract_archive(source, destination, type): + """ + Extracts a zip, tar, ... to a destination path. + + Type may result from detect_archive_type(). + """ + if type == 'tar': + tar = tarfile.open(source, 'r') + tar.extractall(destination) + elif type == 'zip': + unzip_keep_last_modified(source, destination) + + +def strip_wrapped_folders(folder): + """ + If a folder only contains a single sub-folder and nothing else, descends this way as much as possible. + + Assumes folder is a directory. + """ + while True: + entries = list(os.scandir(folder)) + if len(entries) == 1 and entries[0].is_dir(): + folder = entries[0].path + else: + break + return folder + + +def determine_latest_last_modified_date(folder): + """ + Given a folder, recursively searches all files in this folder and all sub-folders and memorizes the latest + "last modified" date of all these files. + """ + latest_last_modified = 0 + for dirpath, dirnames, filenames in os.walk(folder): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + lastmodified = os.path.getmtime(filepath) + if lastmodified > latest_last_modified: + latest_last_modified = lastmodified + return latest_last_modified + + +def subprocess_run(cmd): + """ + Runs a cmd via subprocess and displays the std output in case of success or the std error output in case of failure + where it also stops execution. + """ + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if result.returncode: + print("error {} in call {}".format(result.returncode, cmd)) + print(result.stderr.decode('ascii')) + sys.exit(-1) + else: + print(' output: {}'.format(result.stdout.decode('ascii'))) + + +def copy_tree(source, destination): + """ + Copies the full content of one directory into another avoiding the use of distutils.di_util.copy_tree because that + can give unwanted errors on Windows (probably related to symlinks). + """ + # this gave an FileNotFoundError: [Errno 2] No such file or directory: '' on Windows + # distutils.dir_util.copy_tree(archive_path, git_path) + for dirpath, dirnames, filenames in os.walk(source): + # first create all the directory on destination + directories_to_be_created = [os.path.join(destination, os.path.relpath(os.path.join(dirpath, x), source)) for x in dirnames] + for directory in directories_to_be_created: + os.makedirs(directory, exist_ok=True) + # second copy all the files + filepaths_source = [os.path.join(dirpath, x) for x in filenames] + filepaths_destination = [os.path.join(destination, os.path.relpath(x, source)) for x in filepaths_source] + for src, dst in zip(filepaths_source, filepaths_destination): + shutil.copyfile(src, dst) + + +def download_url(url, destination): + """ + Using urllib.request downloads from an url to a destination. Destination will be a file. + + Waits one second before, trying to be nice. + """ + time.sleep(1) # we are nice + with urllib.request.urlopen(url) as response: + with open(destination, 'wb') as f: + shutil.copyfileobj(response, f)