""" Runs a series of maintenance operations on the collection of entry files, updating the table of content files for each category as well as creating a statistics file. Counts the number of records each sub-folder and updates the overview. Sorts the entries in the contents files of each sub folder alphabetically. """ # TODO check for within an entry for similar dev names # TODO wikipedia (media search) for popular ones at least # TODO google search (for homepages or media entries) for popular ones at least import os import re import datetime import json import textwrap from utils import osg, osg_ui, osg_parse, utils, constants as c import requests def check_validity_backlog(): import requests # read backlog and split file = os.path.join(c.root_path, 'code', 'backlog.txt') text = utils.read_text(file) urls = text.split('\n') urls = [x.split(' ')[0] for x in urls] headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'} for url in urls: try: r = requests.get(url, headers=headers, timeout=5) except Exception as e: print('{} gave error: {}'.format(url, e)) else: if r.status_code != requests.codes.ok: print('{} returned status code: {}'.format(url, r.status_code)) if r.is_redirect or r.history: print('{} redirected to {}, {}'.format(url, r.url, r.history)) def create_toc(title, file, entries): """ """ # file path toc_file = os.path.join(c.tocs_path, file) # header line text = '[comment]: # (autogenerated content, do not edit)\n# {}\n\n'.format(title) # assemble rows rows = [] for entry in entries: info = entry['Code language'] + entry['Code license'] + entry['State'] info = [x.value for x in info] rows.append('- **[{}]({})** ({})'.format(entry['Title'], '../' + entry['File'], ', '.join(info))) # sort rows (by title) rows.sort(key=str.casefold) # add to text text += '\n'.join(rows) # write to toc file utils.write_text(toc_file, text) def sort_text_file(file, name): """ Reads a text file, splits in lines, removes duplicates, sort, writes back. """ text = utils.read_text(file) text = text.split('\n') text = sorted(list(set(text)), key=str.casefold) print('{} contains {} items'.format(name, len(text))) text = '\n'.join(text) utils.write_text(file, text) class EntriesMaintainer: def __init__(self): self.entries = None def read_entries(self): self.entries = osg.read_entries() print('{} entries read'.format(len(self.entries))) def write_entries(self): if not self.entries: print('entries not yet loaded') return osg.write_entries(self.entries) print('entries written') def check_template_leftovers(self): """ Checks for template leftovers. Should be run only occasionally. """ # load template and get all lines text = utils.read_text(os.path.join(c.root_path, 'template.md')) text = text.split('\n') check_strings = [x for x in text if x and not x.startswith('##')] # iterate over all entries for _, entry_path, content in osg.entry_iterator(): for check_string in check_strings: if content.find(check_string) >= 0: print('{}: found {}'.format(os.path.basename(entry_path), check_string)) print('checked for template leftovers') def check_inconsistencies(self): """ :return: """ if not self.entries: print('entries not yet loaded') return # get all keywords and print similar keywords keywords = [] for entry in self.entries: keywords.extend(entry['Keyword']) if b'first\xe2\x80\x90person'.decode() in entry['Keyword']: print(entry['File']) keywords = [x.value for x in keywords] # reduce those starting with "multiplayer" keywords = [x if not x.startswith('multiplayer') else 'multiplayer' for x in keywords] # check unique keywords unique_keywords = list(set(keywords)) unique_keywords_counts = [keywords.count(l) for l in unique_keywords] for index, name in enumerate(unique_keywords): for other_index in range(index+1, len(unique_keywords)): other_name = unique_keywords[other_index] if osg.name_similarity(name, other_name) > 0.8: print(' Keywords {} ({}) - {} ({}) are similar'.format(name, unique_keywords_counts[index], other_name, unique_keywords_counts[other_index])) # get all names of frameworks and library also using osg.code_dependencies_aliases valid_dependencies = list(c.general_code_dependencies_without_entry.keys()) for entry in self.entries: if any((x in ('framework', 'library', 'game engine') for x in entry['Keyword'])): name = entry['Title'] if name in c.code_dependencies_aliases: valid_dependencies.extend(c.code_dependencies_aliases[name]) else: valid_dependencies.append(name) # get all referenced code dependencies referenced_dependencies = {} for entry in self.entries: deps = entry.get('Code dependency', []) for dependency in deps: dependency = dependency.value if dependency in referenced_dependencies: referenced_dependencies[dependency] += 1 else: referenced_dependencies[dependency] = 1 # delete those that are valid dependencies referenced_dependencies = [(k, v) for k, v in referenced_dependencies.items() if k not in valid_dependencies] # sort by number referenced_dependencies.sort(key=lambda x: x[1], reverse=True) # print out print('Code dependencies not included as entry') for dep in referenced_dependencies: print('{} ({})'.format(*dep)) # if there is the "Play" field, it should have "Web" as Platform for entry in self.entries: name = entry['File'] if 'Play' in entry: if not 'Platform' in entry: print('Entry "{}" has "Play" field but not "Platform" field, add it with "Web"'.format(name)) elif not 'Web' in entry['Platform']: print('Entry "{}" has "Play" field but not "Web" in "Platform" field'.format(name)) # javascript/typescript/php as language but not web as platform? ignored = ('0_ad.md', 'aussenposten.md', 'between.md', 'caesaria.md', 'cavepacker.md', 'citybound.md', 'gorillas.md', 'ika.md', 'inexor.md', 'maniadrive.md', 'oolite.md', 'freevikings.md', 'rolisteam.md', 'rpgboss.md', 'ruby-warrior.md', 'snelps.md', 'tenes_empanadas_graciela.md', 'thrive.md') for entry in self.entries: name = entry['File'] if name in ignored: continue if any(language in entry['Code language'] for language in ('JavaScript', 'TypeScript', 'PHP', 'CoffeeScript')) and ('Platform' not in entry or 'Web' not in entry['Platform']): print('Entry "{}" has language JavaScript/PHP but not Web as platform.'.format(name)) # space in name but not space as keyword ignored = ('burgerspace.md', 'crystal_space_3d_sdk.md', 'our_personal_space.md', 'space_harrier_clone.md') for entry in self.entries: name = entry['File'] if name in ignored: continue title = entry['Title'] if 'space' in title.lower() and not 'space' in entry['Keyword']: print('Entry "{}" has space in name but not as keyword.'.format(name)) # starts with j + capital letter but not java as language for entry in self.entries: name = entry['File'] title = entry['Title'] if title[0] == 'j' and title[1] == title[1].upper() and not 'Java' in entry['Code language']: print('Entry "{}" title starts with j? but Java is not a code language.'.format(name)) # search for duplicate keywords for entry in self.entries: keywords = entry['Keyword'] duplicates = [keyword for keyword in keywords if keywords.count(keyword) > 1] if duplicates: print('"{}" has duplicate keywords: {}'.format(entry['File'], duplicates)) # if there is a @see-download there should be download fields... def clean_rejected(self): """ Only sorts the rejected games list file. """ # sort rejected games list file sort_text_file(os.path.join(c.root_path, 'code', 'rejected.txt'), 'rejected games list') def clean_backlog(self): """ :return: """ if not self.entries: print('entries not yet loaded') return # get urls from entries included_urls = osg.all_urls(self.entries) included_urls = list(included_urls.keys()) # only need the URLs here # get urls from rejected file text = utils.read_text(c.rejected_file) regex = re.compile(r"\((http.*?)\)", re.MULTILINE) matches = regex.findall(text) rejected_urls = [] for match in matches: urls = match.split(',') urls = [x.strip() for x in urls] rejected_urls.extend(urls) included_urls.extend(rejected_urls) # those that only have a web archive version, also get the original version more_urls = [] for url in included_urls: if url.startswith('https://web.archive.org/web'): # print(url) # sometimes the http is missing in archive links (would need proper parsing) url = url[url.index('http', 5):] more_urls.append(url) included_urls.extend(more_urls) # now we strip the urls stripped_urls = [utils.strip_url(x) for x in included_urls] stripped_urls = set(stripped_urls) # removes duplicates for performance # read backlog and get urls from there text = utils.read_text(c.backlog_file) text = text.split('\n') # remove those that are in stripped_game_urls text = [x for x in text if utils.strip_url(x) not in stripped_urls] # remove duplicates and sort text = sorted(list(set(text)), key=str.casefold) print('backlog contains {} items'.format(len(text))) # join and save again text = '\n'.join(text) utils.write_text(c.backlog_file, text) print('backlog cleaned') def check_external_links(self): """ Checks all external links it can find for validity. Prints those with non OK HTTP responses. Does only need to be run from time to time. """ # regex for finding urls (can be in <> or in ]() or after a whitespace regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n\)]") # ignore the following patterns (they give false positives here) ignored_urls = ( 'https://git.tukaani.org/xz.git', 'https://git.code.sf.net/', 'http://hg.hedgewars.org/hedgewars/', 'https://git.xiph.org/vorbis.git', 'http://svn.uktrainsim.com/svn/openrails', 'https://www.srb2.org/', 'http://wiki.srb2.org/') # some do redirect, but we nedertheless want the original URL in the database redirect_okay = ('https://octaforge.org/', 'https://svn.openttd.org/', 'https://godotengine.org/download') # extract all links from entries import urllib3 urllib3.disable_warnings() # otherwise we cannot verify those with SSL errors without getting warnings urls = {} for entry, _, content in osg.entry_iterator(): # apply regex matches = regex.findall(content) # for each match for match in matches: for url in match: if url and not any((url.startswith(x) for x in ignored_urls)): # ignore bzr.sourceforge, no web address found if 'bzr.sourceforge.net/bzrroot/' in url: continue # add "/" at the end if any((url.startswith(x) for x in ( 'https://anongit.freedesktop.org/git', 'https://git.savannah.gnu.org/git/', 'https://git.savannah.nongnu.org/git/', 'https://git.artsoft.org/'))): url += '/' if url.startswith('https://bitbucket.org/') and url.endswith('.git'): url = url[:-4] + '/commits/' if url.startswith('https://svn.code.sf.net/p/'): url = 'http' + url[5:] + '/' if url.startswith('http://cvs.savannah.nongnu.org:/sources/'): url = 'http://cvs.savannah.nongnu.org/viewvc/' + url[40:] + '/' if url.startswith('http://cvs.savannah.gnu.org:/sources/'): url = 'http://cvs.savannah.gnu.org/viewvc/' + url[37:] + '/' # generally ".git" at the end is not working well, except sometimes if url.endswith('.git') and not any((url.startswith(x) for x in ( 'https://repo.or.cz', 'https://git.tuxfamily.org/fanwor/fanwor'))): url = url[:-4] if url in urls: urls[url].add(entry) else: urls[url] = {entry} print('found {} unique links'.format(len(urls))) print("start checking external links (can take a while)") # now iterate over all urls for url, names in urls.items(): names = list(names) # was a set if len(names) == 1: names = names[0] try: verify = True # some have an expired certificate but otherwise still work if any((url.startswith(x) for x in ( 'https://perso.b2b2c.ca/~sarrazip/dev/', 'https://dreerally.com/', 'https://henlin.net/', 'https://www.megamek.org/', 'https://pixeldoctrine.com/', 'https://gitorious.org/', 'https://www.opmon-game.ga/'))): verify = False r = requests.head(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=20, allow_redirects=True, verify=verify) if r.status_code == 405: # head method not supported, try get r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=20, allow_redirects=True, verify=verify) # check for bad status if r.status_code != requests.codes.ok: print('{}: {} - {}'.format(names, url, r.status_code)) # check for redirect if r.history and url not in redirect_okay: # only / added or http->https sometimes redirected_url = r.url if redirected_url == url + '/': output = '{}: {} -> {} - redirect "/" at end ' elif redirected_url == 'https' + url[4:]: output = '{}: {} -> {} - redirect "https" at start' else: output = '{}: {} -> {} - redirect ' print(output.format(names, url, redirected_url)) except Exception as e: error_name = type(e).__name__ if error_name == 'SSLError' and any((url.startswith(x) for x in ( 'https://gitorious.org/', 'https://www.freedroid.org/download/'))): continue # even though verify is False, these errors still get through print('{}: {} - exception {}'.format(names, url, error_name)) print('external links checked') def update_readme_tocs(self): """ Recounts entries in sub categories and writes them to the readme. Also updates the _toc files in the categories directories. Note: The Readme must have a specific structure at the beginning, starting with "# Open Source Games" and ending on "A collection.." Needs to be performed regularly. """ # completely delete content of toc path for file in os.listdir(c.tocs_path): os.remove(os.path.join(c.tocs_path, file)) # read readme readme_file = os.path.join(c.root_path, 'README.md') readme_text = utils.read_text(readme_file) # compile regex for identifying the building blocks in the readme regex = re.compile(r"(.*?)(\[comment\]: # \(start.*?end of autogenerated content\))(.*)", re.DOTALL) # apply regex matches = regex.findall(readme_text) if len(matches) != 1: raise RuntimeError('readme file has invalid structure') matches = matches[0] start = matches[0] end = matches[2] tocs_text = '' # split into games, tools, frameworks, libraries games = [x for x in self.entries if not any([y in x['Keyword'] for y in ('tool', 'framework', 'library')])] tools = [x for x in self.entries if 'tool' in x['Keyword']] frameworks = [x for x in self.entries if 'framework' in x['Keyword']] libraries = [x for x in self.entries if 'library' in x['Keyword']] # create games, tools, frameworks, libraries tocs title = 'Games' file = '_games.md' tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(games)) create_toc(title, file, games) title = 'Tools' file = '_tools.md' tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(tools)) create_toc(title, file, tools) title = 'Frameworks' file = '_frameworks.md' tocs_text += '**[{}](entries/tocs/{}#{})** ({}) - '.format(title, file, title, len(frameworks)) create_toc(title, file, frameworks) title = 'Libraries' file = '_libraries.md' tocs_text += '**[{}](entries/tocs/{}#{})** ({})\n'.format(title, file, title, len(libraries)) create_toc(title, file, libraries) # create by category categories_text = [] for keyword in c.recommended_keywords: filtered = [x for x in self.entries if keyword in x['Keyword']] title = keyword.capitalize() name = keyword.replace(' ', '-') file = '_{}.md'.format(name) categories_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(title, file, name, len(filtered))) create_toc(title, file, filtered) categories_text.sort() tocs_text += '\nBy category: {}\n'.format(', '.join(categories_text)) # create by platform platforms_text = [] for platform in c.valid_platforms: filtered = [x for x in self.entries if platform in x.get('Platform', [])] title = platform name = platform.lower() file = '_{}.md'.format(name) platforms_text.append('**[{}](entries/tocs/{}#{})** ({})'.format(title, file, name, len(filtered))) create_toc(title, file, filtered) tocs_text += '\nBy platform: {}\n'.format(', '.join(platforms_text)) # insert new text in the middle (the \n before the second comment is necessary, otherwise Markdown displays it as part of the bullet list) text = start + "[comment]: # (start of autogenerated content, do not edit)\n" + tocs_text + "\n[comment]: # (end of autogenerated content)" + end # write to readme utils.write_text(readme_file, text) print('Readme and TOCs updated') def update_statistics(self): """ Generates the statistics page. Should be done every time the entries change. """ if not self.entries: print('entries not yet loaded') return # start the page statistics = '[comment]: # (autogenerated content, do not edit)\n# Statistics\n\n' # total number number_entries = len(self.entries) rel = lambda x: x / number_entries * 100 # conversion to percent statistics += 'analyzed {} entries on {}\n\n'.format(number_entries, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) # State (beta, mature, inactive) statistics += '## State\n\n' number_state_beta = sum(1 for x in self.entries if 'beta' in x['State']) number_state_mature = sum(1 for x in self.entries if 'mature' in x['State']) number_inactive = sum(1 for x in self.entries if osg.is_inactive(x)) statistics += '- mature: {} ({:.1f}%)\n- beta: {} ({:.1f}%)\n- inactive: {} ({:.1f}%)\n\n'.format( number_state_mature, rel(number_state_mature), number_state_beta, rel(number_state_beta), number_inactive, rel(number_inactive)) if number_inactive > 0: entries_inactive = [(x['Title'], osg.extract_inactive_year(x)) for x in self.entries if osg.is_inactive(x)] entries_inactive.sort(key=lambda x: str.casefold(x[0])) # first sort by name entries_inactive.sort(key=lambda x: x[1], reverse=True) # then sort by inactive year (more recently first) entries_inactive = ['{} ({})'.format(*x) for x in entries_inactive] statistics += '##### Inactive State\n\n' + ', '.join(entries_inactive) + '\n\n' # Language statistics += '## Code Languages\n\n' field = 'Code language' # get all languages together languages = [] for entry in self.entries: languages.extend(entry[field]) languages = [x.value for x in languages] unique_languages = set(languages) unique_languages = [(l, languages.count(l) / len(languages)) for l in unique_languages] unique_languages.sort(key=lambda x: str.casefold(x[0])) # first sort by name # print languages to console print('\nLanguages\n') print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_languages)) unique_languages.sort(key=lambda x: x[1], reverse=True) # then sort by occurrence (highest occurrence first) unique_languages = ['- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_languages] statistics += '##### Language frequency\n\n' + ''.join(unique_languages) + '\n' # Licenses statistics += '## Code licenses\n\n' field = 'Code license' # get all licenses together licenses = [] for entry in self.entries: licenses.extend(entry[field]) licenses = [x.value for x in licenses] unique_licenses = set(licenses) unique_licenses = [(l, licenses.count(l) / len(licenses)) for l in unique_licenses] unique_licenses.sort(key=lambda x: str.casefold(x[0])) # first sort by name # print licenses to console print('\nLicenses\n') print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_licenses)) unique_licenses.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first) unique_licenses = ['- {} ({:.1f}%)\n'.format(x[0], x[1] * 100) for x in unique_licenses] statistics += '##### Licenses frequency\n\n' + ''.join(unique_licenses) + '\n' # Keywords statistics += '## Keywords\n\n' field = 'Keyword' # get all keywords together keywords = [] for entry in self.entries: keywords.extend(entry[field]) keywords = [x.value for x in keywords] # reduce those starting with "multiplayer" keywords = [x if not x.startswith('multiplayer') else 'multiplayer' for x in keywords] unique_keywords = set(keywords) unique_keywords = [(l, keywords.count(l) / len(keywords)) for l in unique_keywords] unique_keywords.sort(key=lambda x: str.casefold(x[0])) # first sort by name # print keywords to console print('\nKeywords\n') print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_keywords)) unique_keywords.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first) unique_keywords = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_keywords] statistics += '##### Keywords frequency\n\n' + '\n'.join(unique_keywords) + '\n\n' # no download or play field statistics += '## Entries without download or play fields\n\n' entries = [] for entry in self.entries: if 'Download' not in entry and 'Play' not in entry: entries.append(entry['Title']) entries.sort(key=str.casefold) statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n' # code hosted not on github, gitlab, bitbucket, launchpad, sourceforge popular_code_repositories = ('github.com', 'gitlab.com', 'bitbucket.org', 'code.sf.net', 'code.launchpad.net') statistics += '## Entries with a code repository not on a popular site\n\n' entries = [] field = 'Code repository' for entry in self.entries: popular = False for repo in entry.get(field, []): for popular_repo in popular_code_repositories: if popular_repo in repo.value: popular = True break # if there were repositories, but none popular, add them to the list if not popular: entries.append(entry['Title']) # print(info[field]) entries.sort(key=str.casefold) statistics += '{}: '.format(len(entries)) + ', '.join(entries) + '\n\n' # Code dependencies statistics += '## Code dependencies\n\n' field = 'Code dependency' # get all code dependencies together code_dependencies = [] entries_with_code_dependency = 0 for entry in self.entries: if field in entry: code_dependencies.extend(entry[field]) entries_with_code_dependency += 1 code_dependencies = [x.value for x in code_dependencies] statistics += 'With code dependency field {} ({:.1f}%)\n\n'.format(entries_with_code_dependency, rel(entries_with_code_dependency)) unique_code_dependencies = set(code_dependencies) unique_code_dependencies = [(l, code_dependencies.count(l) / len(code_dependencies)) for l in unique_code_dependencies] unique_code_dependencies.sort(key=lambda x: str.casefold(x[0])) # first sort by name # print code dependencies to console print('\nCode dependencies\n') print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_code_dependencies)) unique_code_dependencies.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first) unique_code_dependencies = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_code_dependencies] statistics += '##### Code dependencies frequency\n\n' + '\n'.join(unique_code_dependencies) + '\n\n' # Build systems: statistics += '## Build systems\n\n' field = 'Build system' # get all build systems together build_systems = [] for entry in self.entries: if field in entry['Building']: build_systems.extend(entry['Building'][field]) build_systems = [x.value for x in build_systems] statistics += 'Build systems information available for {:.1f}% of all projects.\n\n'.format( rel(len(build_systems))) unique_build_systems = set(build_systems) unique_build_systems = [(l, build_systems.count(l) / len(build_systems)) for l in unique_build_systems] unique_build_systems.sort(key=lambda x: str.casefold(x[0])) # first sort by name # print build systems to console print('\nBuild systems\n') print('\n'.join('{} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_build_systems)) unique_build_systems.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first) unique_build_systems = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_build_systems] statistics += '##### Build systems frequency ({})\n\n'.format(len(build_systems)) + '\n'.join( unique_build_systems) + '\n\n' # C, C++ projects without build system information c_cpp_project_without_build_system = [] for entry in self.entries: if field not in entry and ('C' in entry['Code language'] or 'C++' in entry['Code language']): c_cpp_project_without_build_system.append(entry['Title']) c_cpp_project_without_build_system.sort(key=str.casefold) statistics += '##### C and C++ projects without build system information ({})\n\n'.format( len(c_cpp_project_without_build_system)) + ', '.join(c_cpp_project_without_build_system) + '\n\n' # C, C++ projects with build system information but without CMake as build system c_cpp_project_not_cmake = [] for entry in entries: if field in entry and 'CMake' in entry[field] and ( 'C' in entry['Code language'] or 'C++' in entry['Code language']): c_cpp_project_not_cmake.append(entry['Title']) c_cpp_project_not_cmake.sort(key=str.casefold) statistics += '##### C and C++ projects with a build system different from CMake ({})\n\n'.format( len(c_cpp_project_not_cmake)) + ', '.join(c_cpp_project_not_cmake) + '\n\n' # Platform statistics += '## Platform\n\n' field = 'Platform' # get all platforms together platforms = [] for entry in self.entries: if field in entry: platforms.extend(entry[field]) platforms = [x.value for x in platforms] statistics += 'Platform information available for {:.1f}% of all projects.\n\n'.format(rel(len(platforms))) unique_platforms = set(platforms) unique_platforms = [(l, platforms.count(l) / len(platforms)) for l in unique_platforms] unique_platforms.sort(key=lambda x: str.casefold(x[0])) # first sort by name unique_platforms.sort(key=lambda x: -x[1]) # then sort by occurrence (highest occurrence first) unique_platforms = ['- {} ({:.1f}%)'.format(x[0], x[1] * 100) for x in unique_platforms] statistics += '##### Platforms frequency\n\n' + '\n'.join(unique_platforms) + '\n\n' # write to statistics file utils.write_text(c.statistics_file, statistics) print('statistics updated') def update_html(self): """ Parses all entries, collects interesting info and stores it in a json file suitable for displaying with a dynamic table in a browser. """ if not self.entries: print('entries not yet loaded') return # make database out of it db = {'headings': ['Game', 'Description', 'Download', 'State', 'Keyword', 'Source']} entries = [] for info in self.entries: # game & description entry = ['{} (home, entry)'.format(info['Title'], info['Home'][0], r'https://github.com/Trilarion/opensourcegames/blob/master/entries/' + info['File']), textwrap.shorten(info.get('Note', ''), width=60, placeholder='..')] # download field = 'Download' if field in info and info[field]: entry.append('Link'.format(info[field][0])) else: entry.append('') # state (field state is essential) entry.append('{} / {}'.format(info['State'][0], 'inactive since {}'.format(osg.extract_inactive_year(info)) if osg.is_inactive(info) else 'active')) # keywords keywords = info['Keyword'] keywords = [x.value for x in keywords] entry.append(', '.join(keywords)) # source text = [] field = 'Code repository' if field in info and info[field]: text.append('Source'.format(info[field][0].value)) languages = info['Code language'] languages = [x.value for x in languages] text.append(', '.join(languages)) licenses = info['Code license'] licenses = [x.value for x in licenses] text.append(', '.join(licenses)) entry.append(' - '.join(text)) # append to entries entries.append(entry) # sort entries by game name entries.sort(key=lambda x: str.casefold(x[0])) db['data'] = entries # output text = json.dumps(db, indent=1) utils.write_text(c.json_db_file, text) print('HTML updated') def update_repos(self): """ export to json for local repository update of primary repos """ if not self.entries: print('entries not yet loaded') return primary_repos = {'git': [], 'svn': [], 'hg': []} unconsumed_entries = [] # for every entry filter those that are known git repositories (add additional repositories) for entry in self.entries: repos = entry.get('Code repository', []) repos = [x.value for x in repos] # keep the first and all others containing @add if not repos: continue repos = [repos[0]] + [x for x in repos[1:] if "@add" in x] for repo in repos: consumed = False repo = repo.split(' ')[0].strip() url = osg.git_repo(repo) if url: primary_repos['git'].append(url) consumed = True continue url = osg.svn_repo(repo) if url: primary_repos['svn'].append(url) consumed = True continue url = osg.hg_repo(repo) if url: primary_repos['hg'].append(url) consumed = True continue if not consumed: unconsumed_entries.append([entry['Title'], repo]) print('Entry "{}" unconsumed repo: {}'.format(entry['File'], repo)) # sort them alphabetically (and remove duplicates) for k, v in primary_repos.items(): primary_repos[k] = sorted(set(v)) # statistics of gits git_repos = primary_repos['git'] print('{} Git repositories'.format(len(git_repos))) for domain in ( 'repo.or.cz', 'anongit.kde.org', 'bitbucket.org', 'git.code.sf.net', 'git.savannah', 'git.tuxfamily', 'github.com', 'gitlab.com', 'gitlab.com/osgames', 'gitlab.gnome.org'): print('{} on {}'.format(sum(1 if domain in x else 0 for x in git_repos), domain)) # write them to code/git json_path = os.path.join(c.root_path, 'code', 'archives.json') text = json.dumps(primary_repos, indent=1) utils.write_text(json_path, text) print('Repositories updated') def collect_git_repos(self): """ for every entry, get all git :return: """ git_repos = [] for entry in self.entries: repos = entry['Code repository'] repos = [x.value for x in repos] for repo in repos: repo = repo.split(' ')[0].strip() url = osg.git_repo(repo) if url: git_repos.append(repo) # sort them alphabetically (and remove duplicates) git_repos = sorted(list(set(git_repos)), key=str.casefold) # write them to code/git json_path = os.path.join(c.root_path, 'code', 'git_repositories.json') text = json.dumps(git_repos, indent=1) utils.write_text(json_path, text) def special_ops(self): """ For special operations that are one-time and may change. :return: """ if not self.entries: print('entries not yet loaded') return # # which fields have lots of comments # for field in c.valid_fields: # values = [value for entry in self.entries for value in entry.get(field, [])] # if isinstance(values[0], osg_parse.ValueWithComment): # comments = [value.comment for value in values if value.comment] # # split by comma # comments = [c.strip() for comment in comments for c in comment.split(',')] # print('field {} has {} comments'.format(field, len(comments))) # for comment in set(comments): # print(' {} - {}'.format(comment, comments.count(comment))) # # remove download urls that are also in home # for entry in self.entries: # homes = entry['Home'] # downloads = entry.get('Download', []) # downloads = [download for download in downloads if download not in homes] # if downloads: # entry['Download'] = downloads # if not downloads and 'Download' in entry: # del entry['Download'] # remove developers from all that have library as keyword for entry in self.entries: if 'library' in entry['Keyword']: devs = entry.get('Developer', []) if devs: print('entry {} is library and has {} developer'.format(entry['File'], len(devs))) del entry['Developer'] # # collect statistics on git repositories # stats = {} # for entry in self.entries: # repos = entry.get('Code repository', []) # comments = [x.comment for x in repos if x.value.startswith('https://github.com/') and x.comment] # for comment in comments: # for part in comment.split(','): # part = part.strip() # if not part.startswith('@'): # continue # part = part.split(' ') # key = part[0][1:] # without the @ # value = part[1] if len(part) > 1 else None # stats[key] = stats.get(key, []) + [value] # # process statistics # stats['archived'] = len(stats['archived']) # created = stats['created'] # stats['created'] = {} # for year in created: # stats['created'][year] = stats['created'].get(year, 0) + 1 # # for key, value in sorted(stats['created'].items(), key=lambda x: x[0]): # print("{} : {}".format(key, value)) # # import numpy as np # np.set_printoptions(suppress=True) # stars = np.array(stats['stars'], dtype=np.float) # forks = np.array(stats['forks'], dtype=np.float) # q = np.arange(0, 1, 0.333) # print(q) # print(np.quantile(stars, q)) # print(np.quantile(forks, q)) # # cvs without any git # for entry in self.entries: # repos = entry['Code repository'] # cvs = [repo for repo in repos if 'cvs' in repo] # git = [repo for repo in repos if 'git' in repo] # if len(cvs) > 0 and len(git) == 0: # print('Entry "{}" with repos: {}'.format(entry['File'], repos)) # # combine content keywords # n = len('content ') # for entry in self.entries: # keywords = entry['Keyword'] # content = [keyword for keyword in keywords if keyword.startswith('content')] # if len(content) > 1: # # remove from keywords # keywords = [keyword for keyword in keywords if keyword not in content] # # remove prefix # content = [str(keyword)[n:].strip() for keyword in content] # # join with + # content = 'content {}'.format(' + '.join(content)) # keywords.append(osg_parse.ValueWithComment(content)) # entry['Keyword'] = keywords # print('fixed "{}"'.format(entry['File'])) print('special ops finished') def complete_run(self): pass if __name__ == "__main__": m = EntriesMaintainer() actions = { 'Read entries': m.read_entries, 'Write entries': m.write_entries, 'Check template leftovers': m.check_template_leftovers, 'Check inconsistencies': m.check_inconsistencies, 'Check rejected entries': m.clean_rejected, 'Check external links (takes quite long)': m.check_external_links, 'Clean backlog': m.clean_backlog, 'Update Readme and TOCs': m.update_readme_tocs, 'Update statistics': m.update_statistics, 'Update HTML': m.update_html, 'Update repository list': m.update_repos, 'Special': m.special_ops, 'Complete run': m.complete_run } # TODO sort developers alphabetically and remove duplicate entries osg_ui.run_simple_button_app('Entries developer', actions)