sorting of developer names
This commit is contained in:
@ -15,6 +15,14 @@ http://haxepunk.com/
|
||||
http://hcsoftware.sourceforge.net/jason-rohrer/ (various games there)
|
||||
http://hge.relishgames.com/
|
||||
http://hgm.nubati.net/
|
||||
https://en.wikipedia.org/wiki/Catacomb_(video_game) (released under GNU)
|
||||
https://github.com/JCWasmx86/Conquer
|
||||
https://github.com/SaeruHikari/SakuraEngine
|
||||
https://github.com/zhangdoa/InnocenceEngine
|
||||
https://github.com/septag/rizz
|
||||
https://github.com/EvilPudding/candle
|
||||
https://github.com/TorqueGameEngines/Torque3D
|
||||
https://github.com/polymonster/pmtech
|
||||
http://icculus.org/
|
||||
http://icculus.org/asciiroth/
|
||||
http://icculus.org/avp/
|
||||
|
@ -25,7 +25,7 @@ name_aliases = {'Andreas Rosdal': 'Andreas Røsdal', 'davefancella': 'Dave Fance
|
||||
'buginator': 'Buginator', 'CiprianKhlud': 'Ciprian Khlud', 'dericpage': 'Deric Page',
|
||||
'DI Murat Sari': 'Murat Sari', 'DolceTriade': 'Dolce Triade', 'DreamingPsion': 'Dreaming Psion',
|
||||
'edwardlii': 'Edward Lii', 'erik-vos': 'Erik Vos', 'joevenzon': 'Joe Venzon', 'noamgat': 'Noam Gat',
|
||||
'Dr. Martin Brumm': 'Martin Brumm'}
|
||||
'Dr. Martin Brumm': 'Martin Brumm', 'South Bound Apps (Android)': 'South Bound Apps'}
|
||||
|
||||
|
||||
def collect_github_entries():
|
||||
|
@ -39,7 +39,7 @@ class DevelopersMaintainer:
|
||||
for other_name in developer_names[index + 1:]:
|
||||
if osg.name_similarity(str.casefold(name), str.casefold(other_name)) > 0.85:
|
||||
print(' {} - {} is similar'.format(name, other_name))
|
||||
print('duplicates checked (took {:.3f}s)'.format(time.process_time()-start_time))
|
||||
print('duplicates checked (took {:.1f}s)'.format(time.process_time()-start_time))
|
||||
|
||||
def check_for_orphans(self):
|
||||
if not self.developers:
|
||||
|
@ -7,9 +7,8 @@ Sorts the entries in the contents files of each sub folder alphabetically.
|
||||
"""
|
||||
|
||||
# TODO check for within an entry for similar dev names
|
||||
# TODO special mode (load all and safe all)
|
||||
# TODO sort devs alphabetically upon save (if not done yet)
|
||||
# TODO statistics on git repositories (created, stars, forks) and meaningful categories
|
||||
# TODO wikipedia (media search)
|
||||
# TODO google search (for homepages or media entries) for popular ones at least
|
||||
|
||||
import os
|
||||
import re
|
||||
@ -864,13 +863,46 @@ class EntriesMaintainer:
|
||||
print('entries not yet loaded')
|
||||
return
|
||||
|
||||
# cvs without any git
|
||||
# collect statistics on git repositories
|
||||
created = {}
|
||||
stars = []
|
||||
forks = []
|
||||
for entry in self.entries:
|
||||
repos = entry['Code repository']
|
||||
cvs = [repo for repo in repos if 'cvs' in repo]
|
||||
git = [repo for repo in repos if 'git' in repo]
|
||||
if len(cvs) > 0 and len(git) == 0:
|
||||
print('Entry "{}" with repos: {}'.format(entry['File'], repos))
|
||||
comments = [x.comment for x in repos if x.value.startswith('https://github.com/') and x.comment]
|
||||
for comment in comments:
|
||||
comment = comment.split(',')
|
||||
comment = [c.strip() for c in comment]
|
||||
comment = [c for c in comment if c.startswith('@')]
|
||||
if comment:
|
||||
try:
|
||||
comment = [c.split(' ') for c in comment]
|
||||
comment = [c[1] for c in comment if len(c) > 1]
|
||||
except Exception:
|
||||
print(comment)
|
||||
raise
|
||||
created[comment[0]] = created.get(comment[0], 0) + 1
|
||||
stars.append(comment[1])
|
||||
forks.append(comment[2])
|
||||
|
||||
for key, value in sorted(created.items(), key=lambda x: x[0]):
|
||||
print("{} : {}".format(key, value))
|
||||
|
||||
import numpy as np
|
||||
np.set_printoptions(suppress=True)
|
||||
stars = np.array(stars, dtype=np.float)
|
||||
forks = np.array(forks, dtype=np.float)
|
||||
q = np.arange(0, 1, 0.1)
|
||||
print(np.quantile(stars, q))
|
||||
print(np.quantile(forks, q))
|
||||
|
||||
# # cvs without any git
|
||||
# for entry in self.entries:
|
||||
# repos = entry['Code repository']
|
||||
# cvs = [repo for repo in repos if 'cvs' in repo]
|
||||
# git = [repo for repo in repos if 'git' in repo]
|
||||
# if len(cvs) > 0 and len(git) == 0:
|
||||
# print('Entry "{}" with repos: {}'.format(entry['File'], repos))
|
||||
|
||||
# # combine content keywords
|
||||
# n = len('content ')
|
||||
|
@ -2,11 +2,17 @@
|
||||
Maintenance of inspirations.md and synchronization with the inspirations in the entries.
|
||||
"""
|
||||
|
||||
from utils import osg, osg_ui
|
||||
# TODO wikipedia search and match
|
||||
# TODO mark those that are contained in the database
|
||||
# TODO search fandom
|
||||
|
||||
import time
|
||||
from utils import osg, osg_ui, osg_wikipedia
|
||||
|
||||
valid_duplicates = ('Age of Empires', 'ARMA', 'Catacomb', 'Civilization', 'Company of Heroes', 'Descent', 'Duke Nukem', 'Dungeon Keeper',
|
||||
'Final Fantasy', 'Heroes of Might and Magic', 'Jazz Jackrabbit', 'Marathon', 'Master of Orion', 'Quake',
|
||||
'RollerCoaster Tycoon', 'Star Wars Jedi Knight', 'The Settlers', 'Ultima', 'Ship Simulator')
|
||||
'RollerCoaster Tycoon', 'Star Wars Jedi Knight', 'The Settlers', 'Ultima', 'Ship Simulator', 'Prince of Persia',
|
||||
'Panzer General', 'LBreakout', 'Jagged Alliance')
|
||||
|
||||
|
||||
class InspirationMaintainer:
|
||||
@ -30,14 +36,15 @@ class InspirationMaintainer:
|
||||
if not self.inspirations:
|
||||
print('inspirations not yet loaded')
|
||||
return
|
||||
start_time = time.process_time()
|
||||
inspiration_names = list(self.inspirations.keys())
|
||||
for index, name in enumerate(inspiration_names):
|
||||
for other_name in inspiration_names[index + 1:]:
|
||||
if any((name.startswith(x) and other_name.startswith(x) for x in valid_duplicates)):
|
||||
continue
|
||||
if osg.name_similarity(name, other_name) > 0.8:
|
||||
if osg.name_similarity(str.casefold(name), str.casefold(other_name)) > 0.9:
|
||||
print(' {} - {} is similar'.format(name, other_name))
|
||||
print('duplicates checked')
|
||||
print('duplicates checked took {:.1f}s'.format(time.process_time()-start_time))
|
||||
|
||||
def check_for_orphans(self):
|
||||
if not self.inspirations:
|
||||
@ -75,7 +82,28 @@ class InspirationMaintainer:
|
||||
for inspiration in self.inspirations.values():
|
||||
if 'Media' in inspiration and any(('https://en.wikipedia.org/wiki/' in x for x in inspiration['Media'])):
|
||||
continue
|
||||
name = inspiration['Name']
|
||||
# search in wikipedia
|
||||
results = osg_wikipedia.search(inspiration['Name'])
|
||||
# throw out all (disambiguation) pages
|
||||
results = [r for r in results if not any(x in r for x in ('disambiguation', 'film'))]
|
||||
|
||||
# the simple ones
|
||||
results = [r for r in results if 'video game' in r]
|
||||
if len(results) == 1 and 'series' not in name:
|
||||
pages = osg_wikipedia.pages(results)
|
||||
page = pages[0]
|
||||
url = page.url
|
||||
# add url to Media field
|
||||
inspiration['Media'] = inspiration.get('Media', []) + [url]
|
||||
print('{}: {}'.format(name, url))
|
||||
|
||||
|
||||
|
||||
# check for name similarity
|
||||
# results = [r for r in results if any(x in r for x in ('video game', 'series')) or osg.name_similarity(str.casefold(inspiration['Name']), str.casefold(r)) > 0.8]
|
||||
# results = [r for r in results if any(x in r for x in ('video game', 'series'))]
|
||||
# print('{}: {}'.format(inspiration['Name'], results))
|
||||
|
||||
def update_inspired_entries(self):
|
||||
if not self.inspirations:
|
||||
@ -113,6 +141,7 @@ if __name__ == "__main__":
|
||||
'Check for duplicates': m.check_for_duplicates,
|
||||
'Check for orphans': m.check_for_orphans,
|
||||
'Check for inspirations not listed': m.check_for_missing_inspirations_in_entries,
|
||||
'Check for wikipedia links': m.check_for_wikipedia_links,
|
||||
'Update inspirations from entries': m.update_inspired_entries,
|
||||
'Read entries': m.read_entries
|
||||
}
|
||||
|
@ -458,7 +458,7 @@ def create_entry_content(entry):
|
||||
|
||||
# we automatically sort some fields
|
||||
sort_fun = lambda x: str.casefold(x.value)
|
||||
for field in ('Media', 'Inspiration', 'Code Language'):
|
||||
for field in ('Media', 'Inspiration', 'Code Language', 'Developer', 'Build system'):
|
||||
if field in entry:
|
||||
values = entry[field]
|
||||
entry[field] = sorted(values, key=sort_fun)
|
||||
|
@ -4,6 +4,7 @@ Using https://github.com/goldsmith/Wikipedia
|
||||
"""
|
||||
|
||||
import wikipedia
|
||||
wikipedia.set_lang('en') # just in case that isn't so already
|
||||
|
||||
|
||||
def search(search_term, results=3):
|
||||
@ -13,4 +14,12 @@ def search(search_term, results=3):
|
||||
:param max_results:
|
||||
:return:
|
||||
"""
|
||||
return wikipedia.search(search_term, results=results)
|
||||
return wikipedia.search(search_term, results=results)
|
||||
|
||||
|
||||
def pages(titles):
|
||||
pages = []
|
||||
for title in titles:
|
||||
page = wikipedia.page(title, auto_suggest=False)
|
||||
pages.append(page)
|
||||
return pages
|
||||
|
Reference in New Issue
Block a user