503 lines
16 KiB
Python
503 lines
16 KiB
Python
"""
|
|
The svn is too big to be automatically imported to git (and Github) because there are lots of large binary data components.
|
|
Needs a manual solution.
|
|
|
|
TODO use git lfs migrate later on the elements
|
|
TODO instead of svn export for every revision, checkout and then update to revision (reduced bandwith)
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import psutil
|
|
|
|
from utils.utils import *
|
|
|
|
|
|
def remove_folders(base_folder, names):
|
|
if isinstance(names, str):
|
|
names = (names,)
|
|
for name in names:
|
|
folder = os.path.join(base_folder, name)
|
|
if os.path.isdir(folder):
|
|
shutil.rmtree(folder)
|
|
|
|
|
|
def remove_files(base_folder, names):
|
|
if isinstance(names, str):
|
|
names = (names,)
|
|
for name in names:
|
|
file = os.path.join(base_folder, name)
|
|
if os.path.isfile(file):
|
|
os.remove(file)
|
|
|
|
|
|
def special_treatment(destination, revision):
|
|
"""
|
|
|
|
"""
|
|
|
|
# copy content of trunk to base
|
|
if 2270 <= revision <= 2420:
|
|
source = os.path.join(destination, 'trunk')
|
|
if os.path.isdir(source):
|
|
copy_tree(source, destination)
|
|
shutil.rmtree(source)
|
|
|
|
# copy all important files from Holyspirit/Holyspirit and delete it
|
|
if 5 <= revision <= 330:
|
|
source = os.path.join(destination, 'Holyspirit', 'Holyspirit')
|
|
if os.path.isdir(source):
|
|
if revision >= 8:
|
|
shutil.copytree(os.path.join(source, 'Data'), os.path.join(destination, 'Data'))
|
|
files = [x for x in os.listdir(source) if x.endswith('.txt')]
|
|
for file in files:
|
|
shutil.copy(os.path.join(source, file), destination)
|
|
# remove it
|
|
shutil.rmtree(os.path.join(destination, 'Holyspirit'))
|
|
|
|
# copy all important files from Holyspirit and delete it
|
|
if 337 <= revision <= 2268:
|
|
source = os.path.join(destination, 'Holyspirit')
|
|
if os.path.isdir(source):
|
|
data = os.path.join(source, 'Data')
|
|
if os.path.isdir(data):
|
|
# shutil.copytree(data, os.path.join(destination, 'Data'))
|
|
shutil.move(data, destination)
|
|
target = os.path.join(destination, 'Meta')
|
|
if not os.path.isdir(target):
|
|
os.mkdir(target)
|
|
files = [x for x in os.listdir(source) if x.endswith('.txt') or x.endswith('.conf') or x.endswith('.ini')]
|
|
for file in files:
|
|
shutil.move(os.path.join(source, file), target)
|
|
# remove it
|
|
shutil.rmtree(source)
|
|
|
|
# copy data folder vom HolySpiritJE and delete it
|
|
if 2012 <= revision <= 2269:
|
|
source = os.path.join(destination, 'HolyspiritJE')
|
|
if os.path.isdir(source):
|
|
data = os.path.join(source, 'Data')
|
|
if os.path.isdir(data):
|
|
shutil.move(data, os.path.join(destination, 'DataJE'))
|
|
target = os.path.join(destination, 'MetaJE')
|
|
if not os.path.isdir(target):
|
|
os.mkdir(target)
|
|
files = [x for x in os.listdir(source) if x.endswith('.txt') or x.endswith('.conf') or x.endswith('.ini')]
|
|
for file in files:
|
|
shutil.move(os.path.join(source, file), target)
|
|
# remove it
|
|
shutil.rmtree(source)
|
|
|
|
# remove Holyspirit3 folder
|
|
if 464 <= revision <= 2268:
|
|
remove_folders(destination, 'Holyspirit3')
|
|
|
|
# remove Holyspirit2 folder
|
|
if 659 <= revision <= 2268:
|
|
remove_folders(destination, 'Holyspirit2')
|
|
|
|
# remove Launcher/release
|
|
if 413 <= revision <= 2420:
|
|
source = os.path.join(destination, 'Launcher')
|
|
remove_folders(source, ('bin', 'debug', 'release', 'obj'))
|
|
|
|
# delete all *.dll, *.exe in base folder
|
|
if 3 <= revision <= 9:
|
|
files = os.listdir(destination)
|
|
for file in files:
|
|
if file.endswith('.exe') or file.endswith('.dll'):
|
|
os.remove(os.path.join(destination, file))
|
|
|
|
# delete "cross" folder
|
|
if 42 <= revision <= 43:
|
|
remove_folders(destination, 'Cross')
|
|
|
|
# delete personal photos
|
|
if 374 <= revision <= 2267:
|
|
remove_folders(destination, 'Photos')
|
|
if 2268 <= revision <= 2420:
|
|
source = os.path.join(destination, 'Media')
|
|
remove_folders(source, 'Photos')
|
|
|
|
# move empire of steam out
|
|
if 1173 <= revision <= 2420:
|
|
folder = os.path.join(destination, 'EmpireOfSteam')
|
|
if os.path.isdir(folder):
|
|
# move to empire path
|
|
empire = os.path.join(empire_path, 'r{:04d}'.format(revision))
|
|
shutil.move(folder, empire)
|
|
|
|
# holy editor cleanup
|
|
if 1078 <= revision <= 2420:
|
|
source = os.path.join(destination, 'HolyEditor')
|
|
remove_folders(source, ('bin', 'release', 'debug', 'obj'))
|
|
remove_files(source, 'moc.exe')
|
|
|
|
# source folder cleanup
|
|
if 939 <= revision <= 2420:
|
|
source = os.path.join(destination, 'Source')
|
|
remove_folders(source, 'HS')
|
|
remove_files(source, 'HS.zip')
|
|
|
|
# sourceM folder cleanup
|
|
if 2110 <= revision <= 2270:
|
|
source = os.path.join(destination, 'SourceM')
|
|
remove_folders(source, 'HS')
|
|
|
|
# sourceNewApi cleanup
|
|
if 2261 <= revision <= 2269:
|
|
source = os.path.join(destination, 'SourceNewApi')
|
|
remove_folders(source, 'HS')
|
|
|
|
# Autres folder cleanup
|
|
if 1272 <= revision <= 2267:
|
|
source = os.path.join(destination, 'Autres')
|
|
remove_folders(source, ('conf', 'db', 'hooks', 'locks'))
|
|
remove_files(source, ('format', 'maj.php'))
|
|
# Media/Other folder cleanup
|
|
if 2268 <= revision <= 2420:
|
|
source = os.path.join(destination, 'Media', 'Other')
|
|
remove_files(source, ('format', 'maj.php'))
|
|
|
|
# remove Holyspirit-Demo
|
|
if 1668 <= revision <= 2268:
|
|
remove_folders(destination, 'Holyspirit_Demo')
|
|
|
|
# remove Debug.rar
|
|
if 1950 <= revision <= 2420:
|
|
remove_files(destination, 'Debug.rar')
|
|
|
|
# remove 3dparty folder
|
|
if 2273 <= revision <= 2420:
|
|
remove_folders(destination, '3dparty')
|
|
|
|
# branches cleanup
|
|
if 2270 <= revision <= 2420:
|
|
remove_folders(destination, 'branches')
|
|
|
|
|
|
def delete_global_excludes(folder):
|
|
"""
|
|
|
|
"""
|
|
for dirpath, dirnames, filenames in os.walk(folder):
|
|
rel_path = os.path.relpath(dirpath, folder)
|
|
for file in filenames:
|
|
if file in global_exclude:
|
|
os.remove(os.path.join(dirpath, file))
|
|
|
|
|
|
def delete_empty_directories(folder):
|
|
"""
|
|
|
|
"""
|
|
for dirpath, dirnames, filenames in os.walk(folder, topdown=False):
|
|
rel_path = os.path.relpath(dirpath, folder)
|
|
if not filenames and not dirnames:
|
|
os.removedirs(dirpath)
|
|
|
|
|
|
def list_large_unwanted_files(folder):
|
|
"""
|
|
|
|
"""
|
|
output = []
|
|
for dirpath, dirnames, filenames in os.walk(folder):
|
|
rel_path = os.path.relpath(dirpath, folder)
|
|
for file in filenames:
|
|
file_path = os.path.join(dirpath, file)
|
|
already_listed = False
|
|
for extension in unwanted_file_extensions:
|
|
if file.endswith(extension):
|
|
output.append(os.path.join(rel_path, file) + ' ' + str(os.path.getsize(file_path)))
|
|
already_listed = True
|
|
break
|
|
if not already_listed and os.path.getsize(file_path) > large_file_limit:
|
|
output.append(os.path.join(rel_path, file) + ' ' + str(os.path.getsize(file_path)))
|
|
return output
|
|
|
|
|
|
def checkout(revision_start, revision_end=None):
|
|
"""
|
|
|
|
"""
|
|
if not revision_end:
|
|
revision_end = revision_start
|
|
|
|
assert revision_end >= revision_start
|
|
|
|
for revision in range(revision_start, revision_end + 1):
|
|
# check free disc space
|
|
if psutil.disk_usage(svn_checkout_path).free < 3e10: # 1e10 = 10 GiB
|
|
print('not enough free disc space, will exit')
|
|
sys.exit(-1)
|
|
|
|
print('checking out revision {}'.format(revision))
|
|
|
|
# create destination directory
|
|
destination = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision))
|
|
if os.path.exists(destination):
|
|
shutil.rmtree(destination)
|
|
|
|
# checkout
|
|
start_time = time.time()
|
|
# sometimes checkout fails for reasons like "svn: E000024: Can't open file '/svn/p/lechemindeladam/code/db/revs/1865': Too many open files", we try again and again in these cases
|
|
while True:
|
|
try:
|
|
subprocess_run(['svn', 'export', '-r{}'.format(revision), svn_url, destination])
|
|
break
|
|
except:
|
|
print('problem with export, will try again')
|
|
if os.path.isdir(destination):
|
|
shutil.rmtree(destination)
|
|
|
|
print('checkout took {:.1f}s'.format(time.time() - start_time))
|
|
|
|
|
|
def fix_revision(revision_start, revision_end=None):
|
|
"""
|
|
|
|
"""
|
|
if not revision_end:
|
|
revision_end = revision_start
|
|
assert revision_end >= revision_start
|
|
|
|
unwanted_files = {}
|
|
sizes = {}
|
|
|
|
for revision in range(revision_start, revision_end + 1):
|
|
print('fixing revision {}'.format(revision))
|
|
|
|
# destination directory
|
|
destination = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision))
|
|
if not os.path.exists(destination):
|
|
raise RuntimeError('cannot fix revision {}, directory does not exist'.format(revision))
|
|
|
|
# special treatment
|
|
special_treatment(destination, revision)
|
|
|
|
# delete files from global exclude list
|
|
delete_global_excludes(destination)
|
|
|
|
# list unwanted files
|
|
unwanted_files[revision] = list_large_unwanted_files(destination)
|
|
|
|
# delete empty directories
|
|
delete_empty_directories(destination)
|
|
|
|
# size of resulting folder
|
|
sizes[revision] = folder_size(destination)
|
|
|
|
text = json.dumps(unwanted_files, indent=1)
|
|
write_text(os.path.join(svn_checkout_path, 'unwanted_files.json'.format(revision)), text)
|
|
text = json.dumps(sizes, indent=1)
|
|
write_text(os.path.join(svn_checkout_path, 'folder_sizes.json'.format(revision)), text)
|
|
|
|
|
|
def initialize_git():
|
|
"""
|
|
|
|
"""
|
|
# git init
|
|
os.mkdir(git_path)
|
|
os.chdir(git_path)
|
|
subprocess_run(['git', 'init'])
|
|
subprocess_run(['git', 'config', 'user.name', 'Trilarion'])
|
|
subprocess_run(['git', 'config', 'user.email', 'Trilarion@users.noreply.gitlab.com'])
|
|
|
|
|
|
def combine_log_messages(msg):
|
|
"""
|
|
|
|
"""
|
|
# throw out all empty ones
|
|
msg = [x.strip() for x in msg if x]
|
|
# combine again
|
|
msg = "\r\n".join(msg)
|
|
return msg
|
|
|
|
|
|
def read_logs():
|
|
"""
|
|
Probably regular expressions would have worked too.
|
|
"""
|
|
# read log
|
|
print('read all log messages')
|
|
os.chdir(svn_checkout_path)
|
|
start_time = time.time()
|
|
log = subprocess_run(['svn', 'log', svn_url], display=False)
|
|
print('read log took {:.1f}s'.format(time.time() - start_time))
|
|
# process log
|
|
log = log.split('\r\n------------------------------------------------------------------------\r\n')
|
|
# not the last one
|
|
log = log[:-2]
|
|
print('{} log entries'.format(len(log)))
|
|
|
|
# process log entries
|
|
log = [x.split('\r\n') for x in log]
|
|
|
|
# the first one still contains an additional "---" elements
|
|
log[0] = log[0][1:]
|
|
|
|
# split the first line
|
|
info = [x[0].split('|') for x in log]
|
|
|
|
# get the revision
|
|
revision = [int(x[0][1:]) for x in info]
|
|
|
|
author = [x[1].strip() for x in info]
|
|
unique_authors = list(set(author))
|
|
unique_authors.sort()
|
|
|
|
date = [x[2].strip() for x in info]
|
|
msg = [combine_log_messages(x[2:]) for x in log]
|
|
logs = list(zip(revision, author, date, msg))
|
|
logs.sort(key=lambda x: x[0])
|
|
return logs, unique_authors
|
|
|
|
|
|
def gitify(revision_start, revision_end):
|
|
"""
|
|
|
|
"""
|
|
assert revision_end >= revision_start
|
|
|
|
for revision in range(revision_start, revision_end + 1):
|
|
print('adding revision {} to git'.format(revision))
|
|
|
|
# svn folder
|
|
svn_folder = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision))
|
|
if not os.path.exists(svn_folder):
|
|
raise RuntimeError('cannot add revision {}, directory does not exist'.format(revision))
|
|
|
|
# clear git path
|
|
print('git clear path')
|
|
while True:
|
|
try:
|
|
git_clear_path(git_path)
|
|
break
|
|
except PermissionError as e:
|
|
print(e)
|
|
# wait a bit
|
|
time.sleep(1)
|
|
|
|
# copy source files to git path
|
|
print('copy to git')
|
|
copy_tree(svn_folder, git_path)
|
|
|
|
os.chdir(git_path)
|
|
|
|
# update the git index (add unstaged, remove deleted, ...)
|
|
print('git add')
|
|
subprocess_run(['git', 'add', '--all'])
|
|
|
|
# check if there is something to commit
|
|
status = subprocess_run(['git', 'status', '--porcelain'])
|
|
if not status:
|
|
print(' nothing to commit for revision {}, will skip'.format(revision))
|
|
continue
|
|
|
|
# perform the commit
|
|
print('git commit')
|
|
log = logs[revision] # revision, author, date, message
|
|
message = log[3] + '\r\nsvn-revision: {}'.format(revision)
|
|
print(' message "{}"'.format(message))
|
|
author = authors[log[1]]
|
|
author = '{} <{}>'.format(*author)
|
|
cmd = ['git', 'commit', '--allow-empty-message', '--message={}'.format(message), '--author={}'.format(author),
|
|
'--date={}'.format(log[2])]
|
|
print(' cmd: {}'.format(' '.join(cmd)))
|
|
subprocess_run(cmd)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
global_exclude = ['Thumbs.db']
|
|
unwanted_file_extensions = ['.exe', '.dll']
|
|
large_file_limit = 1e6 # in bytes
|
|
|
|
# base path is the directory containing this file
|
|
base_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'conversion')
|
|
print('base path={}'.format(base_path))
|
|
|
|
# derived paths
|
|
svn_checkout_path = os.path.join(base_path, 'svn')
|
|
if not os.path.exists(svn_checkout_path):
|
|
os.mkdir(svn_checkout_path)
|
|
empire_path = os.path.join(base_path, 'empire') # empire of steam side project
|
|
if not os.path.exists(empire_path):
|
|
os.mkdir(empire_path)
|
|
git_path = os.path.join(base_path, 'lechemindeladam')
|
|
if not os.path.exists(git_path):
|
|
initialize_git()
|
|
|
|
# svn url
|
|
svn_url = "https://svn.code.sf.net/p/lechemindeladam/code/"
|
|
|
|
# read logs
|
|
# logs, authors = read_logs()
|
|
# text = json.dumps(logs, indent=1)
|
|
# write_text(os.path.join(base_path, 'logs.json'), text)
|
|
# text = json.dumps(authors, indent=1)
|
|
# write_text(os.path.join(base_path, 'authors.json'), text)
|
|
text = read_text(os.path.join(base_path, 'logs.json'))
|
|
logs = json.loads(text)
|
|
logs = {x[0]: x for x in logs} # dictionary
|
|
text = read_text(os.path.join(base_path, 'authors.json'))
|
|
authors = json.loads(text) # should be a dictionary: svn-author: [git-author, git-email]
|
|
|
|
# the steps
|
|
# checkout(1, 50)
|
|
# fix_revision(1, 50)
|
|
# gitify(4, 50)
|
|
|
|
# checkout(51, 100)
|
|
# checkout(101, 200)
|
|
|
|
# fix_revision(51, 200)
|
|
|
|
# gitify(51, 200)
|
|
|
|
# checkout(201, 400)
|
|
# fix_revision(201, 400)
|
|
# gitify(201, 400)
|
|
|
|
# checkout(401, 800)
|
|
# fix_revision(401, 800)
|
|
# gitify(401, 800)
|
|
|
|
# checkout(801, 1200)
|
|
# fix_revision(801, 1200)
|
|
# gitify(801, 1200)
|
|
|
|
# checkout(1201, 1470)
|
|
# fix_revision(1201, 1470)
|
|
# gitify(1201, 1470)
|
|
|
|
# checkout(1471, 1700)
|
|
# fix_revision(1471, 1700)
|
|
# gitify(1471, 1700)
|
|
|
|
# checkout(1701, 1900)
|
|
# fix_revision(1701, 1900)
|
|
# gitify(1701, 1900)
|
|
|
|
# checkout(1901, 2140)
|
|
# fix_revision(1901, 2140)
|
|
# gitify(1901, 2140)
|
|
|
|
# checkout(2141, 2388)
|
|
# fix_revision(2141, 2388)
|
|
# gitify(2141, 2388)
|
|
|
|
# checkout(2389, 2420)
|
|
# fix_revision(2389, 2420)
|
|
# gitify(2389, 2420)
|
|
|
|
# run the following commands in the git bash
|
|
# git config credential.useHttpPath true
|
|
# git lfs install
|
|
# git lfs migrate import --include-ref=master --include="Zombie_paysan.rs.hs,Witch_monster.rs.hs,WanderingStones.rs.hs,TwoWeapons.rs.hs,TwoHands.rs.hs,TwoHand.rs.hs,Reaper.rs.hs,Peasant_crossbow.rs.hs,Peasant_club.rs.hs,OneHand.rs.hs,Offspring_champion.rs.hs,Mimic.rs.hs,LordSkeleton.rs.hs,Goule.rs.hs,ErrantRoche.rs.hs,DemonicPriest0.rs.hs,DemonicPriest.rs.hs,Brute.rs.hs,20575__dobroide__20060706.night.forest02.wav,31464__offtheline__Morning_Sounds.wav,47989__Luftrum__forestsurroundings.wav,ambiance.wav,Catacombs0.wav,Pluie.wav,Taverne fusion.png,Abbey.ogg,AgrarianLands0.ogg,AgrarianLands1.ogg,Boss0.ogg,Catacombs0.ogg,Catacombs1.ogg,DarkForest.ogg,Forest_ambient0.ogg,Johannes.ogg,OWC.ogg"
|
|
|
|
# then add remote and push (done)
|