additions from backlog and removals because of unclear license status that could not be resolved and a bit of gitlab statistics

This commit is contained in:
Trilarion
2021-10-01 14:13:08 +02:00
parent e7ea8fc6ab
commit ac85e5fa99
322 changed files with 1081 additions and 713 deletions

View File

@ -6,11 +6,17 @@ import re
import requests
from utils import osg, osg_rejected
AWESOME_LIST = 'https://raw.githubusercontent.com/radek-sprta/awesome-game-remakes/master/README.md'
# Probably could fix them within the awesome lists
IGNORED = ('2006rebotted', 'raw(gl)', 'fheroes2', 'FS2OPEN', 'Barbarian', 'Hexen II: Hammer of Thyrion')
# TODO Probably could fix some of the ignored cases within the awesome lists (or fix the small deviations in structure)
# TODO not all of them are awesome actually
matcher = re.compile(r'\[(.*)?\]\((.*?)\) - (.*)') # general structure: - [title](link) - description
# AWESOME_LIST = 'https://raw.githubusercontent.com/radek-sprta/awesome-game-remakes/master/README.md'
# IGNORED = ('2006rebotted', 'raw(gl)', 'fheroes2', 'FS2OPEN', 'Barbarian', 'Hexen II: Hammer of Thyrion')
AWESOME_LIST = 'https://raw.githubusercontent.com/leereilly/games/master/README.md'
IGNORED = ('Warsow',)
# two different - signs are used sometimes
matcher = re.compile(r'\[(.*)?\]\((.*?)\) [- ]*(.*)') # general structure: - [title](link) - description
if __name__ == "__main__":
@ -28,13 +34,16 @@ if __name__ == "__main__":
for items in text:
items = items.split('\n')
category = items[0].strip()
items = [item for item in items[1:] if item.startswith('-')]
items = [item for item in items[1:] if item.startswith('- ') or item.startswith('* ')]
for item in items:
# print(item)
# print(matcher.findall(item))
matches = matcher.findall(item)[0] # we know it will be exactly one
title = matches[0]
url = matches[1]
description = matches[2]
entries.append({'Title': title, 'URL': url, 'Description': description, 'Category': category})
print('contains {} entries'.format(len(entries)))
# remove those from the ignored list
entries = [entry for entry in entries if not any(entry['Title'] == x for x in IGNORED)]
@ -42,7 +51,7 @@ if __name__ == "__main__":
# remove those that are in our rejected list
rejected_titles = [x['Title'] for x in rejected]
entries = [entry for entry in entries if entry['Title'] not in rejected_titles]
print('after filtering for rejected entries {}'.format(len(entries)))
print('after filtering for rejected and ignored entries {}'.format(len(entries)))
# a bit of statistics about this awesome list
print('contains {} entries in {} categories'.format(len(entries), len(text)))
@ -59,6 +68,7 @@ if __name__ == "__main__":
print('{} entries read (osgl)'.format(len(our_entries)))
# go through this awesome list entries one by one and compare to our list
index = 1
for entry in entries:
title = entry['Title']
url = entry['URL']
@ -70,7 +80,8 @@ if __name__ == "__main__":
if title_equal or url_present:
similar_entries.append(our_entry)
if not similar_entries:
print('Unknown entry "{}" {} - {} - {}'.format(entry['Title'], entry['URL'], entry['Category'], entry['Description']))
print('Unknown entry ({}) "{}" {} - {} - {}'.format(index, entry['Title'], entry['URL'], entry['Category'], entry['Description']))
index += 1