added entries
This commit is contained in:
192
tools/lechemindeladam_svn_to_git.py
Normal file
192
tools/lechemindeladam_svn_to_git.py
Normal file
@ -0,0 +1,192 @@
|
||||
"""
|
||||
The svn is too big to be automatically imported to git (and Github) because there are lots of large binary data components.
|
||||
Needs a manual solution.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
from utils.utils import *
|
||||
|
||||
|
||||
def special_treatment(destination, revision):
|
||||
"""
|
||||
|
||||
"""
|
||||
|
||||
if revision == 5:
|
||||
shutil.rmtree(os.path.join(destination, 'Holyspirit'))
|
||||
|
||||
|
||||
def delete_global_excludes(folder):
|
||||
"""
|
||||
|
||||
"""
|
||||
for dirpath, dirnames, filenames in os.walk(folder):
|
||||
rel_path = os.path.relpath(dirpath, folder)
|
||||
if rel_path.startswith('.svn'):
|
||||
continue
|
||||
for file in filenames:
|
||||
if file in global_exclude:
|
||||
os.remove(os.path.join(dirpath, file))
|
||||
|
||||
|
||||
def delete_empty_directories(folder):
|
||||
"""
|
||||
|
||||
"""
|
||||
for dirpath, dirnames, filenames in os.walk(folder, topdown=False):
|
||||
rel_path = os.path.relpath(dirpath, folder)
|
||||
if rel_path.startswith('.svn'):
|
||||
continue
|
||||
if not filenames and not dirnames:
|
||||
os.removedirs(dirpath)
|
||||
|
||||
|
||||
def list_large_unwanted_files(folder):
|
||||
"""
|
||||
|
||||
"""
|
||||
output = []
|
||||
for dirpath, dirnames, filenames in os.walk(folder):
|
||||
rel_path = os.path.relpath(dirpath, folder)
|
||||
if rel_path.startswith('.svn'):
|
||||
continue
|
||||
for file in filenames:
|
||||
file_path = os.path.join(dirpath, file)
|
||||
already_listed = False
|
||||
for extension in unwanted_file_extensions:
|
||||
if file.endswith(extension):
|
||||
output.append(os.path.join(rel_path, file))
|
||||
already_listed = True
|
||||
break
|
||||
if not already_listed and os.path.getsize(file_path) > large_file_limit:
|
||||
output.append(os.path.join(rel_path, file))
|
||||
return output
|
||||
|
||||
|
||||
def checkout(revision_start, revision_end):
|
||||
"""
|
||||
|
||||
"""
|
||||
assert revision_end >= revision_start
|
||||
|
||||
for revision in range(revision_start, revision_end + 1):
|
||||
print('checking out revision {}'.format(revision))
|
||||
|
||||
# create destination directory
|
||||
destination = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision))
|
||||
if os.path.exists(destination):
|
||||
shutil.rmtree(destination)
|
||||
|
||||
# checkout
|
||||
start_time = time.time()
|
||||
subprocess_run(['svn', 'checkout', '-r{}'.format(revision), svn_url, destination])
|
||||
print('checkout took {}s'.format(time.time() - start_time))
|
||||
|
||||
# sanitation (delete files from global exclude list)
|
||||
delete_global_excludes(destination)
|
||||
|
||||
# list unwanted files
|
||||
unwanted_files = list_large_unwanted_files(destination)
|
||||
if unwanted_files:
|
||||
text = json.dumps(unwanted_files, indent=1)
|
||||
write_text(os.path.join(svn_checkout_path, 'r{:04d}_unwanted_files.json'.format(revision)), text)
|
||||
|
||||
# delete empty directories
|
||||
delete_empty_directories(destination)
|
||||
|
||||
# special treatment
|
||||
special_treatment(destination, revision)
|
||||
|
||||
|
||||
def initialize_git():
|
||||
"""
|
||||
|
||||
"""
|
||||
# git init
|
||||
os.mkdir(git_path)
|
||||
os.chdir(git_path)
|
||||
subprocess_run(['git', 'init'])
|
||||
subprocess_run(['git', 'config', 'user.name', 'Trilarion'])
|
||||
subprocess_run(['git', 'config', 'user.email', 'Trilarion@users.noreply.gitlab.com'])
|
||||
|
||||
|
||||
def combine_log_messages(msg):
|
||||
"""
|
||||
|
||||
"""
|
||||
# throw out all empty ones
|
||||
msg = [x.strip() for x in msg if x]
|
||||
# combine again
|
||||
msg = "\r\n".join(msg)
|
||||
return msg
|
||||
|
||||
|
||||
def read_logs():
|
||||
"""
|
||||
Probably regular expressions would have worked too.
|
||||
"""
|
||||
# read log
|
||||
print('read all log messages')
|
||||
os.chdir(svn_checkout_path)
|
||||
start_time = time.time()
|
||||
log = subprocess_run(['svn', 'log', svn_url], display=False)
|
||||
print('read log took {}s'.format(time.time() - start_time))
|
||||
# process log
|
||||
log = log.split('\r\n------------------------------------------------------------------------\r\n')
|
||||
# not the last one
|
||||
log = log[:-2]
|
||||
print('{} log entries'.format(len(log)))
|
||||
|
||||
# process log entries
|
||||
log = [x.split('\r\n') for x in log]
|
||||
|
||||
# the first one still contains an additional "---" elements
|
||||
log[0] = log[0][1:]
|
||||
|
||||
# split the first line
|
||||
info = [x[0].split('|') for x in log]
|
||||
|
||||
# get the revision
|
||||
revision = [int(x[0][1:]) for x in info]
|
||||
|
||||
author = [x[1].strip() for x in info]
|
||||
unique_authors = list(set(author))
|
||||
unique_authors.sort()
|
||||
|
||||
date = [x[2].strip() for x in info]
|
||||
msg = [combine_log_messages(x[2:]) for x in log]
|
||||
logs = list(zip(revision, author, date, msg))
|
||||
logs.sort(key=lambda x: x[0])
|
||||
return logs, unique_authors
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
global_exclude = ['Thumbs.db']
|
||||
unwanted_file_extensions = ['.exe', '.dll']
|
||||
large_file_limit = 1e6 # in bytes
|
||||
|
||||
# base path is the directory containing this file
|
||||
base_path = os.path.abspath(os.path.dirname(__file__))
|
||||
print('base path={}'.format(base_path))
|
||||
|
||||
# derived paths
|
||||
svn_checkout_path = os.path.join(base_path, 'svn_checkout')
|
||||
if not os.path.exists(svn_checkout_path):
|
||||
os.mkdir(svn_checkout_path)
|
||||
git_path = os.path.join(base_path, 'lechemindeladam')
|
||||
# if not os.path.exists(git_path):
|
||||
# initialize_git()
|
||||
|
||||
# svn url
|
||||
svn_url = "https://svn.code.sf.net/p/lechemindeladam/code/"
|
||||
|
||||
# read logs
|
||||
# logs, authors = read_logs()
|
||||
# text = json.dumps(logs, indent=1)
|
||||
# write_text(os.path.join(svn_checkout_path, 'logs.json'), text)
|
||||
# text = json.dumps(authors, indent=1)
|
||||
# write_text(os.path.join(svn_checkout_path, 'authors.json'), text)
|
||||
|
||||
checkout(1, 50)
|
Reference in New Issue
Block a user