sorting of developer names

This commit is contained in:
Trilarion
2021-01-13 14:15:53 +01:00
parent 023ca7e9f0
commit c6997c8a7a
710 changed files with 803 additions and 725 deletions

View File

@ -15,6 +15,14 @@ http://haxepunk.com/
http://hcsoftware.sourceforge.net/jason-rohrer/ (various games there)
http://hge.relishgames.com/
http://hgm.nubati.net/
https://en.wikipedia.org/wiki/Catacomb_(video_game) (released under GNU)
https://github.com/JCWasmx86/Conquer
https://github.com/SaeruHikari/SakuraEngine
https://github.com/zhangdoa/InnocenceEngine
https://github.com/septag/rizz
https://github.com/EvilPudding/candle
https://github.com/TorqueGameEngines/Torque3D
https://github.com/polymonster/pmtech
http://icculus.org/
http://icculus.org/asciiroth/
http://icculus.org/avp/

View File

@ -25,7 +25,7 @@ name_aliases = {'Andreas Rosdal': 'Andreas Røsdal', 'davefancella': 'Dave Fance
'buginator': 'Buginator', 'CiprianKhlud': 'Ciprian Khlud', 'dericpage': 'Deric Page',
'DI Murat Sari': 'Murat Sari', 'DolceTriade': 'Dolce Triade', 'DreamingPsion': 'Dreaming Psion',
'edwardlii': 'Edward Lii', 'erik-vos': 'Erik Vos', 'joevenzon': 'Joe Venzon', 'noamgat': 'Noam Gat',
'Dr. Martin Brumm': 'Martin Brumm'}
'Dr. Martin Brumm': 'Martin Brumm', 'South Bound Apps (Android)': 'South Bound Apps'}
def collect_github_entries():

View File

@ -39,7 +39,7 @@ class DevelopersMaintainer:
for other_name in developer_names[index + 1:]:
if osg.name_similarity(str.casefold(name), str.casefold(other_name)) > 0.85:
print(' {} - {} is similar'.format(name, other_name))
print('duplicates checked (took {:.3f}s)'.format(time.process_time()-start_time))
print('duplicates checked (took {:.1f}s)'.format(time.process_time()-start_time))
def check_for_orphans(self):
if not self.developers:

View File

@ -7,9 +7,8 @@ Sorts the entries in the contents files of each sub folder alphabetically.
"""
# TODO check for within an entry for similar dev names
# TODO special mode (load all and safe all)
# TODO sort devs alphabetically upon save (if not done yet)
# TODO statistics on git repositories (created, stars, forks) and meaningful categories
# TODO wikipedia (media search)
# TODO google search (for homepages or media entries) for popular ones at least
import os
import re
@ -864,13 +863,46 @@ class EntriesMaintainer:
print('entries not yet loaded')
return
# cvs without any git
# collect statistics on git repositories
created = {}
stars = []
forks = []
for entry in self.entries:
repos = entry['Code repository']
cvs = [repo for repo in repos if 'cvs' in repo]
git = [repo for repo in repos if 'git' in repo]
if len(cvs) > 0 and len(git) == 0:
print('Entry "{}" with repos: {}'.format(entry['File'], repos))
comments = [x.comment for x in repos if x.value.startswith('https://github.com/') and x.comment]
for comment in comments:
comment = comment.split(',')
comment = [c.strip() for c in comment]
comment = [c for c in comment if c.startswith('@')]
if comment:
try:
comment = [c.split(' ') for c in comment]
comment = [c[1] for c in comment if len(c) > 1]
except Exception:
print(comment)
raise
created[comment[0]] = created.get(comment[0], 0) + 1
stars.append(comment[1])
forks.append(comment[2])
for key, value in sorted(created.items(), key=lambda x: x[0]):
print("{} : {}".format(key, value))
import numpy as np
np.set_printoptions(suppress=True)
stars = np.array(stars, dtype=np.float)
forks = np.array(forks, dtype=np.float)
q = np.arange(0, 1, 0.1)
print(np.quantile(stars, q))
print(np.quantile(forks, q))
# # cvs without any git
# for entry in self.entries:
# repos = entry['Code repository']
# cvs = [repo for repo in repos if 'cvs' in repo]
# git = [repo for repo in repos if 'git' in repo]
# if len(cvs) > 0 and len(git) == 0:
# print('Entry "{}" with repos: {}'.format(entry['File'], repos))
# # combine content keywords
# n = len('content ')

View File

@ -2,11 +2,17 @@
Maintenance of inspirations.md and synchronization with the inspirations in the entries.
"""
from utils import osg, osg_ui
# TODO wikipedia search and match
# TODO mark those that are contained in the database
# TODO search fandom
import time
from utils import osg, osg_ui, osg_wikipedia
valid_duplicates = ('Age of Empires', 'ARMA', 'Catacomb', 'Civilization', 'Company of Heroes', 'Descent', 'Duke Nukem', 'Dungeon Keeper',
'Final Fantasy', 'Heroes of Might and Magic', 'Jazz Jackrabbit', 'Marathon', 'Master of Orion', 'Quake',
'RollerCoaster Tycoon', 'Star Wars Jedi Knight', 'The Settlers', 'Ultima', 'Ship Simulator')
'RollerCoaster Tycoon', 'Star Wars Jedi Knight', 'The Settlers', 'Ultima', 'Ship Simulator', 'Prince of Persia',
'Panzer General', 'LBreakout', 'Jagged Alliance')
class InspirationMaintainer:
@ -30,14 +36,15 @@ class InspirationMaintainer:
if not self.inspirations:
print('inspirations not yet loaded')
return
start_time = time.process_time()
inspiration_names = list(self.inspirations.keys())
for index, name in enumerate(inspiration_names):
for other_name in inspiration_names[index + 1:]:
if any((name.startswith(x) and other_name.startswith(x) for x in valid_duplicates)):
continue
if osg.name_similarity(name, other_name) > 0.8:
if osg.name_similarity(str.casefold(name), str.casefold(other_name)) > 0.9:
print(' {} - {} is similar'.format(name, other_name))
print('duplicates checked')
print('duplicates checked took {:.1f}s'.format(time.process_time()-start_time))
def check_for_orphans(self):
if not self.inspirations:
@ -75,7 +82,28 @@ class InspirationMaintainer:
for inspiration in self.inspirations.values():
if 'Media' in inspiration and any(('https://en.wikipedia.org/wiki/' in x for x in inspiration['Media'])):
continue
name = inspiration['Name']
# search in wikipedia
results = osg_wikipedia.search(inspiration['Name'])
# throw out all (disambiguation) pages
results = [r for r in results if not any(x in r for x in ('disambiguation', 'film'))]
# the simple ones
results = [r for r in results if 'video game' in r]
if len(results) == 1 and 'series' not in name:
pages = osg_wikipedia.pages(results)
page = pages[0]
url = page.url
# add url to Media field
inspiration['Media'] = inspiration.get('Media', []) + [url]
print('{}: {}'.format(name, url))
# check for name similarity
# results = [r for r in results if any(x in r for x in ('video game', 'series')) or osg.name_similarity(str.casefold(inspiration['Name']), str.casefold(r)) > 0.8]
# results = [r for r in results if any(x in r for x in ('video game', 'series'))]
# print('{}: {}'.format(inspiration['Name'], results))
def update_inspired_entries(self):
if not self.inspirations:
@ -113,6 +141,7 @@ if __name__ == "__main__":
'Check for duplicates': m.check_for_duplicates,
'Check for orphans': m.check_for_orphans,
'Check for inspirations not listed': m.check_for_missing_inspirations_in_entries,
'Check for wikipedia links': m.check_for_wikipedia_links,
'Update inspirations from entries': m.update_inspired_entries,
'Read entries': m.read_entries
}

View File

@ -458,7 +458,7 @@ def create_entry_content(entry):
# we automatically sort some fields
sort_fun = lambda x: str.casefold(x.value)
for field in ('Media', 'Inspiration', 'Code Language'):
for field in ('Media', 'Inspiration', 'Code Language', 'Developer', 'Build system'):
if field in entry:
values = entry[field]
entry[field] = sorted(values, key=sort_fun)

View File

@ -4,6 +4,7 @@ Using https://github.com/goldsmith/Wikipedia
"""
import wikipedia
wikipedia.set_lang('en') # just in case that isn't so already
def search(search_term, results=3):
@ -13,4 +14,12 @@ def search(search_term, results=3):
:param max_results:
:return:
"""
return wikipedia.search(search_term, results=results)
return wikipedia.search(search_term, results=results)
def pages(titles):
pages = []
for title in titles:
page = wikipedia.page(title, auto_suggest=False)
pages.append(page)
return pages