61 lines
2.4 KiB
Python
61 lines
2.4 KiB
Python
"""
|
|
Detects the submodules in the Git repositories via "git show HEAD:.gitmodules" and adds them to the list of
|
|
repositories to be checked out. Works on bare repositories.
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
import os
|
|
import urllib
|
|
from utils import constants as c, utils as u, archive as a
|
|
|
|
if __name__ == '__main__':
|
|
|
|
regex_submodules = re.compile(r"url = (\S*)", re.MULTILINE)
|
|
|
|
# get this folder
|
|
root_folder = os.path.realpath(os.path.dirname(__file__))
|
|
archive_folder = c.get_config('archive-folder')
|
|
base_folder = os.path.join(archive_folder, 'git')
|
|
|
|
# read archives.json
|
|
text = u.read_text(os.path.join(root_folder, 'archives.json'))
|
|
archives = json.loads(text)
|
|
|
|
# loop over all git archives
|
|
submodules = []
|
|
for repo in archives['git']:
|
|
print('process {}'.format(repo))
|
|
git_folder = a.git_folder_name(repo)
|
|
folder = os.path.join(archive_folder, 'git', git_folder)
|
|
if not os.path.isdir(folder):
|
|
print('Warning: folder {} does not exist'.format(git_folder))
|
|
continue
|
|
os.chdir(folder)
|
|
try:
|
|
content = u.subprocess_run(['git', 'show', 'HEAD:.gitmodules'], False)
|
|
except:
|
|
continue
|
|
matches = regex_submodules.findall(content)
|
|
# resolve relative urls
|
|
matches = [urllib.parse.urljoin(repo, x) if x.startswith('..') else x for x in matches]
|
|
submodules.extend(matches)
|
|
|
|
# transform git://github.com to https://github.com
|
|
for a, b in (('git://github.com', 'https://github.com'), ('git@github.com:', 'https://github.com/'), ('git+ssh://git@github.com', 'https://github.com')):
|
|
submodules = [b + x[len(a):] if x.startswith(a) else x for x in submodules]
|
|
|
|
# let all github repos end on ".git"
|
|
submodules = [x + '.git' if 'github.com' in x and not x.endswith('.git') else x for x in submodules]
|
|
|
|
# eliminate those which are duplicates and those which are in archives already
|
|
submodules = set(submodules) - set(archives['git'])
|
|
submodules = sorted(list(submodules))
|
|
|
|
# TODO single dots are not yet resolved correctly, for example in https://github.com/henkboom/pax-britannica.git
|
|
submodules = [x for x in submodules if not any([x.startswith(y) for y in ('.', 'git@')])]
|
|
|
|
# store them
|
|
print('found {} submodules'.format(len(submodules)))
|
|
u.write_text(os.path.join(root_folder, 'archives.git-submodules.json'), json.dumps(submodules, indent=1))
|