141 lines
5.4 KiB
Python
141 lines
5.4 KiB
Python
"""
|
|
Once data from libregamewiki is imported, synchronize with our database, i.e. identify the entries both have in common,
|
|
estimate the differences in the entries both have in common, suggest to add the entries they have not in common to each
|
|
other.
|
|
|
|
unique imported fields: 'assets license', 'categories', 'code language', 'code license', 'developer', 'engine', 'genre', 'library', 'linux-packages', 'name', 'platform'
|
|
"""
|
|
|
|
import json
|
|
from utils.osg import *
|
|
|
|
|
|
def get_unique_field_content(field, entries):
|
|
"""
|
|
|
|
"""
|
|
unique_content = set()
|
|
for entry in entries:
|
|
if field in entry:
|
|
unique_content.update(entry[field])
|
|
return sorted(list(unique_content))
|
|
|
|
platform_replacements = {'Mac': 'macOS'}
|
|
name_replacements = {'Eat the Whistle': 'Eat The Whistle', 'Scorched 3D': 'Scorched3D', 'Silver Tree': 'SilverTree', 'Blob Wars Episode 1 : Metal Blob Solid': 'Blobwars: Metal Blob Solid',
|
|
'Fall Of Imiryn': 'Fall of Imiryn', 'Liquid War 6': 'Liquid War', 'Gusanos': 'GUSANOS'}
|
|
language_replacements = {'lua': 'Lua'}
|
|
ignored_languages = ['HTML', 'XML', 'WML']
|
|
|
|
|
|
def list_compare(a, b, k):
|
|
"""
|
|
|
|
"""
|
|
x = [x for x in a if x not in b]
|
|
p = ''
|
|
for x in x:
|
|
p += ' {} {} missing\n'.format(k, x)
|
|
return p
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
similarity_threshold = 0.8
|
|
|
|
# paths
|
|
root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
|
|
|
|
# import lgw import
|
|
json_path = os.path.join(root_path, 'tools', 'lgw_import.json')
|
|
text = read_text(json_path)
|
|
lgw_entries = json.loads(text)
|
|
|
|
# perform replacements and disregarding
|
|
for index, lgw_entry in enumerate(lgw_entries):
|
|
if lgw_entry['name'] in name_replacements:
|
|
lgw_entry['name'] = name_replacements[lgw_entry['name']]
|
|
if 'code language' in lgw_entry:
|
|
languages = lgw_entry['code language']
|
|
languages = ['Python' if x.startswith('Python') else x for x in languages]
|
|
languages = ['PHP' if x.startswith('PHP') else x for x in languages]
|
|
h = []
|
|
for l in languages:
|
|
for g in ('/', 'and'):
|
|
if g in l:
|
|
l = l.split(g)
|
|
l = [x.strip() for x in l]
|
|
if type(l) == str:
|
|
l = [l]
|
|
h.extend(l)
|
|
languages = ['C++' if x.startswith('C++') else x for x in h]
|
|
languages = ['C' if x.startswith('C ') else x for x in languages]
|
|
languages = [language_replacements[x] if x in language_replacements else x for x in languages]
|
|
languages = [x for x in languages if x not in ignored_languages]
|
|
|
|
lgw_entry['code language'] = languages
|
|
lgw_entries[index] = lgw_entry
|
|
|
|
# check for unique field names
|
|
unique_fields = set()
|
|
for lgw_entry in lgw_entries:
|
|
unique_fields.update(lgw_entry.keys())
|
|
unique_fields = sorted(list(unique_fields))
|
|
print('unique lgw fields: {}'.format(unique_fields))
|
|
|
|
# unique contents
|
|
print('{}: {}'.format('platform', get_unique_field_content('platform', lgw_entries)))
|
|
print('{}: {}'.format('code language', get_unique_field_content('code language', lgw_entries)))
|
|
print('{}: {}'.format('categories', get_unique_field_content('categories', lgw_entries)))
|
|
print('{}: {}'.format('genre', get_unique_field_content('genre', lgw_entries)))
|
|
print('{}: {}'.format('library', get_unique_field_content('library', lgw_entries)))
|
|
print('{}: {}'.format('code license', get_unique_field_content('code license', lgw_entries)))
|
|
print('{}: {}'.format('assets license', get_unique_field_content('assets license', lgw_entries)))
|
|
print('{}: {}'.format('engine', get_unique_field_content('engine', lgw_entries)))
|
|
|
|
# read our database
|
|
games_path = os.path.join(root_path, 'games')
|
|
our_entries = assemble_infos(games_path)
|
|
print('{} entries with us'.format(len(our_entries)))
|
|
|
|
# just the names
|
|
lgw_names = set([x['name'] for x in lgw_entries])
|
|
our_names = set([x['name'] for x in our_entries])
|
|
common_names = lgw_names & our_names
|
|
lgw_names -= common_names
|
|
our_names -= common_names
|
|
print('{} in both, {} only in LGW, {} only with us'.format(len(common_names), len(lgw_names), len(our_names)))
|
|
|
|
# find similar names among the rest
|
|
#print('similar names')
|
|
#for lgw_name in lgw_names:
|
|
# for our_name in our_names:
|
|
# if game_name_similarity(lgw_name, our_name) > similarity_threshold:
|
|
# print('{} - {}'.format(lgw_name, our_name))
|
|
|
|
# iterate over their entries
|
|
print('\n')
|
|
for lgw_entry in lgw_entries:
|
|
lgw_name = lgw_entry['name']
|
|
|
|
is_included = False
|
|
for our_entry in our_entries:
|
|
our_name = our_entry['name']
|
|
|
|
# find those that entries in LGW that are also in our database and compare them
|
|
if lgw_name == our_name:
|
|
is_included = True
|
|
# a match, check the fields
|
|
name = lgw_name
|
|
|
|
p = ''
|
|
|
|
# platform
|
|
key = 'platform'
|
|
p += list_compare(lgw_entry.get(key, []), our_entry.get(key, []), key)
|
|
|
|
# code language
|
|
key = 'code language'
|
|
p += list_compare(lgw_entry.get(key, []), our_entry.get(key, []), key)
|
|
|
|
if p:
|
|
print('{}\n{}'.format(name, p)) |