""" The svn is too big to be automatically imported to git (and Github) because there are lots of large binary data components. Needs a manual solution. TODO use git lfs migrate later on the elements TODO instead of svn export for every revision, checkout and then update to revision (reduced bandwith) """ import json import sys import psutil from utils.utils import * def remove_folders(base_folder, names): if isinstance(names, str): names = (names,) for name in names: folder = os.path.join(base_folder, name) if os.path.isdir(folder): shutil.rmtree(folder) def remove_files(base_folder, names): if isinstance(names, str): names = (names,) for name in names: file = os.path.join(base_folder, name) if os.path.isfile(file): os.remove(file) def special_treatment(destination, revision): """ """ # copy content of trunk to base if 2270 <= revision <= 2420: source = os.path.join(destination, 'trunk') if os.path.isdir(source): copy_tree(source, destination) shutil.rmtree(source) # copy all important files from Holyspirit/Holyspirit and delete it if 5 <= revision <= 330: source = os.path.join(destination, 'Holyspirit', 'Holyspirit') if os.path.isdir(source): if revision >= 8: shutil.copytree(os.path.join(source, 'Data'), os.path.join(destination, 'Data')) files = [x for x in os.listdir(source) if x.endswith('.txt')] for file in files: shutil.copy(os.path.join(source, file), destination) # remove it shutil.rmtree(os.path.join(destination, 'Holyspirit')) # copy all important files from Holyspirit and delete it if 337 <= revision <= 2268: source = os.path.join(destination, 'Holyspirit') if os.path.isdir(source): data = os.path.join(source, 'Data') if os.path.isdir(data): # shutil.copytree(data, os.path.join(destination, 'Data')) shutil.move(data, destination) target = os.path.join(destination, 'Meta') if not os.path.isdir(target): os.mkdir(target) files = [x for x in os.listdir(source) if x.endswith('.txt') or x.endswith('.conf') or x.endswith('.ini')] for file in files: shutil.move(os.path.join(source, file), target) # remove it shutil.rmtree(source) # copy data folder vom HolySpiritJE and delete it if 2012 <= revision <= 2269: source = os.path.join(destination, 'HolyspiritJE') if os.path.isdir(source): data = os.path.join(source, 'Data') if os.path.isdir(data): shutil.move(data, os.path.join(destination, 'DataJE')) target = os.path.join(destination, 'MetaJE') if not os.path.isdir(target): os.mkdir(target) files = [x for x in os.listdir(source) if x.endswith('.txt') or x.endswith('.conf') or x.endswith('.ini')] for file in files: shutil.move(os.path.join(source, file), target) # remove it shutil.rmtree(source) # remove Holyspirit3 folder if 464 <= revision <= 2268: remove_folders(destination, 'Holyspirit3') # remove Holyspirit2 folder if 659 <= revision <= 2268: remove_folders(destination, 'Holyspirit2') # remove Launcher/release if 413 <= revision <= 2420: source = os.path.join(destination, 'Launcher') remove_folders(source, ('bin', 'debug', 'release', 'obj')) # delete all *.dll, *.exe in base folder if 3 <= revision <= 9: files = os.listdir(destination) for file in files: if file.endswith('.exe') or file.endswith('.dll'): os.remove(os.path.join(destination, file)) # delete "cross" folder if 42 <= revision <= 43: remove_folders(destination, 'Cross') # delete personal photos if 374 <= revision <= 2267: remove_folders(destination, 'Photos') if 2268 <= revision <= 2420: source = os.path.join(destination, 'Media') remove_folders(source, 'Photos') # move empire of steam out if 1173 <= revision <= 2420: folder = os.path.join(destination, 'EmpireOfSteam') if os.path.isdir(folder): # move to empire path empire = os.path.join(empire_path, 'r{:04d}'.format(revision)) shutil.move(folder, empire) # holy editor cleanup if 1078 <= revision <= 2420: source = os.path.join(destination, 'HolyEditor') remove_folders(source, ('bin', 'release', 'debug', 'obj')) remove_files(source, 'moc.exe') # source folder cleanup if 939 <= revision <= 2420: source = os.path.join(destination, 'Source') remove_folders(source, 'HS') remove_files(source, 'HS.zip') # sourceM folder cleanup if 2110 <= revision <= 2270: source = os.path.join(destination, 'SourceM') remove_folders(source, 'HS') # sourceNewApi cleanup if 2261 <= revision <= 2269: source = os.path.join(destination, 'SourceNewApi') remove_folders(source, 'HS') # Autres folder cleanup if 1272 <= revision <= 2267: source = os.path.join(destination, 'Autres') remove_folders(source, ('conf', 'db', 'hooks', 'locks')) remove_files(source, ('format', 'maj.php')) # Media/Other folder cleanup if 2268 <= revision <= 2420: source = os.path.join(destination, 'Media', 'Other') remove_files(source, ('format', 'maj.php')) # remove Holyspirit-Demo if 1668 <= revision <= 2268: remove_folders(destination, 'Holyspirit_Demo') # remove Debug.rar if 1950 <= revision <= 2420: remove_files(destination, 'Debug.rar') # remove 3dparty folder if 2273 <= revision <= 2420: remove_folders(destination, '3dparty') # branches cleanup if 2270 <= revision <= 2420: remove_folders(destination, 'branches') def delete_global_excludes(folder): """ """ for dirpath, dirnames, filenames in os.walk(folder): rel_path = os.path.relpath(dirpath, folder) for file in filenames: if file in global_exclude: os.remove(os.path.join(dirpath, file)) def delete_empty_directories(folder): """ """ for dirpath, dirnames, filenames in os.walk(folder, topdown=False): rel_path = os.path.relpath(dirpath, folder) if not filenames and not dirnames: os.removedirs(dirpath) def list_large_unwanted_files(folder): """ """ output = [] for dirpath, dirnames, filenames in os.walk(folder): rel_path = os.path.relpath(dirpath, folder) for file in filenames: file_path = os.path.join(dirpath, file) already_listed = False for extension in unwanted_file_extensions: if file.endswith(extension): output.append(os.path.join(rel_path, file) + ' ' + str(os.path.getsize(file_path))) already_listed = True break if not already_listed and os.path.getsize(file_path) > large_file_limit: output.append(os.path.join(rel_path, file) + ' ' + str(os.path.getsize(file_path))) return output def checkout(revision_start, revision_end=None): """ """ if not revision_end: revision_end = revision_start assert revision_end >= revision_start for revision in range(revision_start, revision_end + 1): # check free disc space if psutil.disk_usage(svn_checkout_path).free < 3e10: # 1e10 = 10 GiB print('not enough free disc space, will exit') sys.exit(-1) print('checking out revision {}'.format(revision)) # create destination directory destination = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision)) if os.path.exists(destination): shutil.rmtree(destination) # checkout start_time = time.time() # sometimes checkout fails for reasons like "svn: E000024: Can't open file '/svn/p/lechemindeladam/code/db/revs/1865': Too many open files", we try again and again in these cases while True: try: subprocess_run(['svn', 'export', '-r{}'.format(revision), svn_url, destination]) break except: print('problem with export, will try again') if os.path.isdir(destination): shutil.rmtree(destination) print('checkout took {:.1f}s'.format(time.time() - start_time)) def fix_revision(revision_start, revision_end=None): """ """ if not revision_end: revision_end = revision_start assert revision_end >= revision_start unwanted_files = {} sizes = {} for revision in range(revision_start, revision_end + 1): print('fixing revision {}'.format(revision)) # destination directory destination = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision)) if not os.path.exists(destination): raise RuntimeError('cannot fix revision {}, directory does not exist'.format(revision)) # special treatment special_treatment(destination, revision) # delete files from global exclude list delete_global_excludes(destination) # list unwanted files unwanted_files[revision] = list_large_unwanted_files(destination) # delete empty directories delete_empty_directories(destination) # size of resulting folder sizes[revision] = folder_size(destination) text = json.dumps(unwanted_files, indent=1) write_text(os.path.join(svn_checkout_path, 'unwanted_files.json'.format(revision)), text) text = json.dumps(sizes, indent=1) write_text(os.path.join(svn_checkout_path, 'folder_sizes.json'.format(revision)), text) def initialize_git(): """ """ # git init os.mkdir(git_path) os.chdir(git_path) subprocess_run(['git', 'init']) subprocess_run(['git', 'config', 'user.name', 'Trilarion']) subprocess_run(['git', 'config', 'user.email', 'Trilarion@users.noreply.gitlab.com']) def combine_log_messages(msg): """ """ # throw out all empty ones msg = [x.strip() for x in msg if x] # combine again msg = "\r\n".join(msg) return msg def read_logs(): """ Probably regular expressions would have worked too. """ # read log print('read all log messages') os.chdir(svn_checkout_path) start_time = time.time() log = subprocess_run(['svn', 'log', svn_url], display=False) print('read log took {:.1f}s'.format(time.time() - start_time)) # process log log = log.split('\r\n------------------------------------------------------------------------\r\n') # not the last one log = log[:-2] print('{} log entries'.format(len(log))) # process log entries log = [x.split('\r\n') for x in log] # the first one still contains an additional "---" elements log[0] = log[0][1:] # split the first line info = [x[0].split('|') for x in log] # get the revision revision = [int(x[0][1:]) for x in info] author = [x[1].strip() for x in info] unique_authors = list(set(author)) unique_authors.sort() date = [x[2].strip() for x in info] msg = [combine_log_messages(x[2:]) for x in log] logs = list(zip(revision, author, date, msg)) logs.sort(key=lambda x: x[0]) return logs, unique_authors def gitify(revision_start, revision_end): """ """ assert revision_end >= revision_start for revision in range(revision_start, revision_end + 1): print('adding revision {} to git'.format(revision)) # svn folder svn_folder = os.path.join(svn_checkout_path, 'r{:04d}'.format(revision)) if not os.path.exists(svn_folder): raise RuntimeError('cannot add revision {}, directory does not exist'.format(revision)) # clear git path print('git clear path') while True: try: git_clear_path(git_path) break except PermissionError as e: print(e) # wait a bit time.sleep(1) # copy source files to git path print('copy to git') copy_tree(svn_folder, git_path) os.chdir(git_path) # update the git index (add unstaged, remove deleted, ...) print('git add') subprocess_run(['git', 'add', '--all']) # check if there is something to commit status = subprocess_run(['git', 'status', '--porcelain']) if not status: print(' nothing to commit for revision {}, will skip'.format(revision)) continue # perform the commit print('git commit') log = logs[revision] # revision, author, date, message message = log[3] + '\r\nsvn-revision: {}'.format(revision) print(' message "{}"'.format(message)) author = authors[log[1]] author = '{} <{}>'.format(*author) cmd = ['git', 'commit', '--allow-empty-message', '--message={}'.format(message), '--author={}'.format(author), '--date={}'.format(log[2])] print(' cmd: {}'.format(' '.join(cmd))) subprocess_run(cmd) if __name__ == "__main__": global_exclude = ['Thumbs.db'] unwanted_file_extensions = ['.exe', '.dll'] large_file_limit = 1e6 # in bytes # base path is the directory containing this file base_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'conversion') print('base path={}'.format(base_path)) # derived paths svn_checkout_path = os.path.join(base_path, 'svn') if not os.path.exists(svn_checkout_path): os.mkdir(svn_checkout_path) empire_path = os.path.join(base_path, 'empire') # empire of steam side project if not os.path.exists(empire_path): os.mkdir(empire_path) git_path = os.path.join(base_path, 'lechemindeladam') if not os.path.exists(git_path): initialize_git() # svn url svn_url = "https://svn.code.sf.net/p/lechemindeladam/code/" # read logs # logs, authors = read_logs() # text = json.dumps(logs, indent=1) # write_text(os.path.join(base_path, 'logs.json'), text) # text = json.dumps(authors, indent=1) # write_text(os.path.join(base_path, 'authors.json'), text) text = read_text(os.path.join(base_path, 'logs.json')) logs = json.loads(text) logs = {x[0]: x for x in logs} # dictionary text = read_text(os.path.join(base_path, 'authors.json')) authors = json.loads(text) # should be a dictionary: svn-author: [git-author, git-email] # the steps # checkout(1, 50) # fix_revision(1, 50) # gitify(4, 50) # checkout(51, 100) # checkout(101, 200) # fix_revision(51, 200) # gitify(51, 200) # checkout(201, 400) # fix_revision(201, 400) # gitify(201, 400) # checkout(401, 800) # fix_revision(401, 800) # gitify(401, 800) # checkout(801, 1200) # fix_revision(801, 1200) # gitify(801, 1200) # checkout(1201, 1470) # fix_revision(1201, 1470) # gitify(1201, 1470) # checkout(1471, 1700) # fix_revision(1471, 1700) # gitify(1471, 1700) # checkout(1701, 1900) # fix_revision(1701, 1900) # gitify(1701, 1900) # checkout(1901, 2140) # fix_revision(1901, 2140) # gitify(1901, 2140) # checkout(2141, 2388) # fix_revision(2141, 2388) # gitify(2141, 2388) # checkout(2389, 2420) # fix_revision(2389, 2420) # gitify(2389, 2420) # run the following commands in the git bash # git config credential.useHttpPath true # git lfs install # git lfs migrate import --include-ref=master --include="Zombie_paysan.rs.hs,Witch_monster.rs.hs,WanderingStones.rs.hs,TwoWeapons.rs.hs,TwoHands.rs.hs,TwoHand.rs.hs,Reaper.rs.hs,Peasant_crossbow.rs.hs,Peasant_club.rs.hs,OneHand.rs.hs,Offspring_champion.rs.hs,Mimic.rs.hs,LordSkeleton.rs.hs,Goule.rs.hs,ErrantRoche.rs.hs,DemonicPriest0.rs.hs,DemonicPriest.rs.hs,Brute.rs.hs,20575__dobroide__20060706.night.forest02.wav,31464__offtheline__Morning_Sounds.wav,47989__Luftrum__forestsurroundings.wav,ambiance.wav,Catacombs0.wav,Pluie.wav,Taverne fusion.png,Abbey.ogg,AgrarianLands0.ogg,AgrarianLands1.ogg,Boss0.ogg,Catacombs0.ogg,Catacombs1.ogg,DarkForest.ogg,Forest_ambient0.ogg,Johannes.ogg,OWC.ogg" # then add remote and push (done)