opensourcegames/code/synchronization/synchronize_awesome_lists.py

88 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Synchronizes with awesome lists from
"""
import re
import requests
from utils import osg, osg_rejected
# TODO Probably could fix some of the ignored cases within the awesome lists (or fix the small deviations in structure)
# TODO not all of them are awesome actually
# AWESOME_LIST = 'https://raw.githubusercontent.com/radek-sprta/awesome-game-remakes/master/README.md'
# IGNORED = ('2006rebotted', 'raw(gl)', 'fheroes2', 'FS2OPEN', 'Barbarian', 'Hexen II: Hammer of Thyrion')
AWESOME_LIST = 'https://raw.githubusercontent.com/leereilly/games/master/README.md'
IGNORED = ('Warsow',)
# two different - signs are used sometimes
matcher = re.compile(r'\[(.*)?\]\((.*?)\) [- ]*(.*)') # general structure: - [title](link) - description
if __name__ == "__main__":
# read rejected
rejected = osg_rejected.read_rejected_file()
# read awesome list
print('read {}'.format(AWESOME_LIST))
r = requests.get(AWESOME_LIST, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=20, allow_redirects=True)
if r.status_code != requests.codes.ok:
raise RuntimeError('Cannot download awesome list.')
text = r.text
text = text.split('\n##')[2:]
entries = []
for items in text:
items = items.split('\n')
category = items[0].strip()
items = [item for item in items[1:] if item.startswith('- ') or item.startswith('* ')]
for item in items:
# print(item)
# print(matcher.findall(item))
matches = matcher.findall(item)[0] # we know it will be exactly one
title = matches[0]
url = matches[1]
description = matches[2]
entries.append({'Title': title, 'URL': url, 'Description': description, 'Category': category})
print('contains {} entries'.format(len(entries)))
# remove those from the ignored list
entries = [entry for entry in entries if not any(entry['Title'] == x for x in IGNORED)]
# remove those that are in our rejected list
rejected_titles = [x['Title'] for x in rejected]
entries = [entry for entry in entries if entry['Title'] not in rejected_titles]
print('after filtering for rejected and ignored entries {}'.format(len(entries)))
# a bit of statistics about this awesome list
print('contains {} entries in {} categories'.format(len(entries), len(text)))
n = [0, 0]
for entry in entries:
if entry['URL'].startswith('https://github.com/'):
n[0] += 1
else:
n[1] += 1
print('{} links to Github, {} links not to Github'.format(*n))
# read our database
our_entries = osg.read_entries()
print('{} entries read (osgl)'.format(len(our_entries)))
# go through this awesome list entries one by one and compare to our list
index = 1
for entry in entries:
title = entry['Title']
url = entry['URL']
# go through our entries
similar_entries = []
for our_entry in our_entries:
title_equal = title == our_entry['Title']
url_present = any(url in x for x in our_entry['Home']) or any(url in x for x in our_entry.get('Code repository', []))
if title_equal or url_present:
similar_entries.append(our_entry)
if not similar_entries:
print('Unknown entry ({}) "{}" {} - {} - {}'.format(index, entry['Title'], entry['URL'], entry['Category'], entry['Description']))
index += 1