opensourcegames/tools/lechemindeladam_svn_to_git.py

395 lines
12 KiB
Python

"""
The svn is too big to be automatically imported to git (and Github) because there are lots of large binary data components.
Needs a manual solution.
TODO use git lfs migrate later on the elements
TODO check for sufficient disc space before checkout
"""
import json
import psutil
from utils.utils import *
def special_treatment(destination, revision):
"""
"""
# copy all important files from Holyspirit/Holyspirit and delete it
if 5 <= revision <= 330:
source = os.path.join(destination, 'Holyspirit', 'Holyspirit')
if os.path.isdir(source):
if revision >= 8:
shutil.copytree(os.path.join(source, 'Data'), os.path.join(destination, 'Data'))
files = [x for x in os.listdir(source) if x.endswith('.txt')]
for file in files:
shutil.copy(os.path.join(source, file), destination)
# remove it
shutil.rmtree(os.path.join(destination, 'Holyspirit'))
# copy all important files from Holyspirit and delete it
if 337 <= revision <= 1200:
source = os.path.join(destination, 'Holyspirit')
if os.path.isdir(source):
data = os.path.join(source, 'Data')
if os.path.isdir(data):
shutil.copytree(data, os.path.join(destination, 'Data'))
files = [x for x in os.listdir(source) if x.endswith('.txt') or x.endswith('.conf')]
for file in files:
shutil.copy(os.path.join(source, file), destination)
# remove it
shutil.rmtree(source)
# remove Holyspirit3 folder
if 464 <= revision <= 1200:
source = os.path.join(destination, 'Holyspirit3')
if os.path.isdir(source):
shutil.rmtree(source)
# remove Holyspirit2 folder
if 659 <= revision <= 1200:
source = os.path.join(destination, 'Holyspirit2')
if os.path.isdir(source):
shutil.rmtree(source)
# remove Launcher/release
if 413 <= revision <= 1200:
source = os.path.join(destination, 'Launcher', 'release')
if os.path.isdir(source):
shutil.rmtree(source)
# delete all *.dll, *.exe in base folder
if 3 <= revision <= 9:
files = os.listdir(destination)
for file in files:
if file.endswith('.exe') or file.endswith('.dll'):
os.remove(os.path.join(destination, file))
# delete "cross" folder
if 42 <= revision <= 43:
folder = os.path.join(destination, 'Cross')
if os.path.isdir(folder):
shutil.rmtree(folder)
# delete personal photos
if 374 <= revision <= 1200:
folder = os.path.join(destination, 'Photos')
if os.path.isdir(folder):
shutil.rmtree(folder)
# move empire of steam out
if 1173 <= revision <= 1200:
folder = os.path.join(destination, 'EmpireOfSteam')
if os.path.isdir(folder):
# move to empire path
empire = os.path.join(empire_path, 'r{:04d}'.format(revision))
shutil.move(folder, empire)
# holy editor cleanup
if 1078 <= revision <= 1200:
source = os.path.join(destination, 'HolyEditor')
for name in ('bin', 'release'):
folder = os.path.join(source, name)
if os.path.isdir(folder):
shutil.rmtree(folder)
for name in ('moc.exe',):
file = os.path.join(source, name)
if os.path.isfile(file):
os.remove(file)
# source folder cleanup
if 939 <= revision <= 1200:
source = os.path.join(destination, 'Source')
for name in ('HS',):
folder = os.path.join(source, name)
if os.path.isdir(folder):
shutil.rmtree(folder)
for name in ('HS.zip',):
file = os.path.join(source, name)
if os.path.isfile(file):
os.remove(file)
def delete_global_excludes(folder):
"""
"""
for dirpath, dirnames, filenames in os.walk(folder):
rel_path = os.path.relpath(dirpath, folder)
for file in filenames:
if file in global_exclude:
os.remove(os.path.join(dirpath, file))
def delete_empty_directories(folder):
"""
"""
for dirpath, dirnames, filenames in os.walk(folder, topdown=False):
rel_path = os.path.relpath(dirpath, folder)
if not filenames and not dirnames:
os.removedirs(dirpath)
def list_large_unwanted_files(folder):
"""
"""
output = []
for dirpath, dirnames, filenames in os.walk(folder):
rel_path = os.path.relpath(dirpath, folder)
for file in filenames:
file_path = os.path.join(dirpath, file)
already_listed = False
for extension in unwanted_file_extensions:
if file.endswith(extension):
output.append(os.path.join(rel_path, file) + ' ' + str(os.path.getsize(file_path)))
already_listed = True
break
if not already_listed and os.path.getsize(file_path) > large_file_limit:
output.append(os.path.join(rel_path, file) + ' ' + str(os.path.getsize(file_path)))
return output
def checkout(revision_start, revision_end):
"""
"""
assert revision_end >= revision_start
for revision in range(revision_start, revision_end + 1):
# check free disc space
if psutil.disk_usage(svn_checkout_path).free < 3e10: # 1e10 = 10 GiB
print('not enough free disc space, will exit')
sys.exit(-1)
print('checking out revision {}'.format(revision))
# create destination directory
destination = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision))
if os.path.exists(destination):
shutil.rmtree(destination)
# checkout
start_time = time.time()
subprocess_run(['svn', 'export', '-r{}'.format(revision), svn_url, destination])
print('checkout took {:.1f}s'.format(time.time() - start_time))
def fix_revision(revision_start, revision_end):
"""
"""
assert revision_end >= revision_start
unwanted_files = {}
sizes = {}
for revision in range(revision_start, revision_end + 1):
print('fixing revision {}'.format(revision))
# destination directory
destination = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision))
if not os.path.exists(destination):
raise RuntimeError('cannot fix revision {}, directory does not exist'.format(revision))
# special treatment
special_treatment(destination, revision)
# delete files from global exclude list
delete_global_excludes(destination)
# list unwanted files
unwanted_files[revision] = list_large_unwanted_files(destination)
# delete empty directories
delete_empty_directories(destination)
# size of resulting folder
sizes[revision] = folder_size(destination)
text = json.dumps(unwanted_files, indent=1)
write_text(os.path.join(svn_checkout_path, 'unwanted_files.json'.format(revision)), text)
text = json.dumps(sizes, indent=1)
write_text(os.path.join(svn_checkout_path, 'folder_sizes.json'.format(revision)), text)
def initialize_git():
"""
"""
# git init
os.mkdir(git_path)
os.chdir(git_path)
subprocess_run(['git', 'init'])
subprocess_run(['git', 'config', 'user.name', 'Trilarion'])
subprocess_run(['git', 'config', 'user.email', 'Trilarion@users.noreply.gitlab.com'])
def combine_log_messages(msg):
"""
"""
# throw out all empty ones
msg = [x.strip() for x in msg if x]
# combine again
msg = "\r\n".join(msg)
return msg
def read_logs():
"""
Probably regular expressions would have worked too.
"""
# read log
print('read all log messages')
os.chdir(svn_checkout_path)
start_time = time.time()
log = subprocess_run(['svn', 'log', svn_url], display=False)
print('read log took {:.1f}s'.format(time.time() - start_time))
# process log
log = log.split('\r\n------------------------------------------------------------------------\r\n')
# not the last one
log = log[:-2]
print('{} log entries'.format(len(log)))
# process log entries
log = [x.split('\r\n') for x in log]
# the first one still contains an additional "---" elements
log[0] = log[0][1:]
# split the first line
info = [x[0].split('|') for x in log]
# get the revision
revision = [int(x[0][1:]) for x in info]
author = [x[1].strip() for x in info]
unique_authors = list(set(author))
unique_authors.sort()
date = [x[2].strip() for x in info]
msg = [combine_log_messages(x[2:]) for x in log]
logs = list(zip(revision, author, date, msg))
logs.sort(key=lambda x: x[0])
return logs, unique_authors
def gitify(revision_start, revision_end):
"""
"""
assert revision_end >= revision_start
for revision in range(revision_start, revision_end + 1):
print('adding revision {} to git'.format(revision))
# svn folder
svn_folder = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision))
if not os.path.exists(svn_folder):
raise RuntimeError('cannot add revision {}, directory does not exist'.format(revision))
# clear git path
print('git clear path')
while True:
try:
git_clear_path(git_path)
break
except PermissionError as e:
print(e)
# wait a bit
time.sleep(1)
# copy source files to git path
print('copy to git')
copy_tree(svn_folder, git_path)
os.chdir(git_path)
# update the git index (add unstaged, remove deleted, ...)
print('git add')
subprocess_run(['git', 'add', '--all'])
# check if there is something to commit
status = subprocess_run(['git', 'status', '--porcelain'])
if not status:
print(' nothing to commit for revision {}, will skip'.format(revision))
continue
# perform the commit
print('git commit')
log = logs[revision] # revision, author, date, message
message = log[3] + '\r\nsvn-revision: {}'.format(revision)
print(' message "{}"'.format(message))
author = authors[log[1]]
author = '{} <{}>'.format(*author)
cmd = ['git', 'commit', '--allow-empty-message', '--message={}'.format(message), '--author={}'.format(author),
'--date={}'.format(log[2])]
print(' cmd: {}'.format(' '.join(cmd)))
subprocess_run(cmd)
if __name__ == "__main__":
global_exclude = ['Thumbs.db']
unwanted_file_extensions = ['.exe', '.dll']
large_file_limit = 1e6 # in bytes
# base path is the directory containing this file
base_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'conversion')
print('base path={}'.format(base_path))
# derived paths
svn_checkout_path = os.path.join(base_path, 'svn')
if not os.path.exists(svn_checkout_path):
os.mkdir(svn_checkout_path)
empire_path = os.path.join(base_path, 'empire') # empire of steam side project
if not os.path.exists(empire_path):
os.mkdir(empire_path)
git_path = os.path.join(base_path, 'lechemindeladam')
if not os.path.exists(git_path):
initialize_git()
# svn url
svn_url = "https://svn.code.sf.net/p/lechemindeladam/code/"
# read logs
# logs, authors = read_logs()
# text = json.dumps(logs, indent=1)
# write_text(os.path.join(base_path, 'logs.json'), text)
# text = json.dumps(authors, indent=1)
# write_text(os.path.join(base_path, 'authors.json'), text)
text = read_text(os.path.join(base_path, 'logs.json'))
logs = json.loads(text)
logs = {x[0]: x for x in logs} # dictionary
text = read_text(os.path.join(base_path, 'authors.json'))
authors = json.loads(text) # should be a dictionary: svn-author: [git-author, git-email]
# the steps
# checkout(1, 50)
# fix_revision(1, 50)
# gitify(4, 50)
# checkout(51, 100)
# checkout(101, 200)
# fix_revision(51, 200)
# gitify(51, 200)
# checkout(201, 400)
# fix_revision(201, 400)
# gitify(201, 400)
# checkout(401, 800)
# fix_revision(401, 800)
# gitify(401, 800)
# checkout(801, 1200)
# fix_revision(801, 1200)
# gitify(801, 1200)
checkout(1201, 1500)