synchronization of developers in entries with developers list
This commit is contained in:
243
code/maintenance_developers.py
Normal file
243
code/maintenance_developers.py
Normal file
@ -0,0 +1,243 @@
|
||||
"""
|
||||
Checks the entries and tries to detect additional developer content, by retrieving websites or logging information from
|
||||
stored Git repositories.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import requests
|
||||
from utils import osg, osg_ui
|
||||
from bs4 import BeautifulSoup
|
||||
from utils import constants as c, utils, osg, osg_github
|
||||
|
||||
|
||||
def developer_info_lookup(name):
|
||||
for dev in developer_info:
|
||||
if name == dev['Name']:
|
||||
return dev
|
||||
return None
|
||||
|
||||
|
||||
# author names in SF that aren't the author names how we have them
|
||||
SF_alias_list = {'Erik Johansson (aka feneur)': 'Erik Johansson', 'Itms': 'Nicolas Auvray',
|
||||
'Wraitii': 'Lancelot de Ferrière', 'Simzer': 'Simon Laszlo', 'armin bajramovic': 'Armin Bajramovic'}
|
||||
|
||||
def test():
|
||||
# loop over infos
|
||||
developers = ''
|
||||
try:
|
||||
i = 0
|
||||
# active = False
|
||||
for entry in entries:
|
||||
|
||||
# if entry['Name'] == 'Aleph One':
|
||||
# active = True
|
||||
# if not active:
|
||||
# continue
|
||||
|
||||
# for testing purposes
|
||||
i += 1
|
||||
if i > 40:
|
||||
break
|
||||
|
||||
# print
|
||||
entry_name = '{} - {}'.format(entry['file'], entry['Name'])
|
||||
print(entry_name)
|
||||
content = ''
|
||||
|
||||
entry_developer = entry.get('developer', [])
|
||||
|
||||
# parse home
|
||||
home = entry['home']
|
||||
# sourceforge project site
|
||||
prefix = 'https://sourceforge.net/projects/'
|
||||
url = [x for x in home if x.startswith(prefix)]
|
||||
if len(url) == 1:
|
||||
url = url[0]
|
||||
print(' sourceforge project site: {}'.format(url))
|
||||
url = 'https://sourceforge.net/p/' + url[len(prefix):] + '_members/'
|
||||
response = requests.get(url)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
authors = soup.find('div', id='content_base').find('table').find_all('tr')
|
||||
authors = [author.find_all('td') for author in authors]
|
||||
authors = [author[1].a['href'] for author in authors if len(author) == 3]
|
||||
for author in authors:
|
||||
# sometimes author already contains the full url, sometimes not
|
||||
url = 'https://sourceforge.net' + author if not author.startswith('http') else author
|
||||
response = requests.get(url)
|
||||
url = response.url # could be different now
|
||||
if 'auth/?return_to' in url:
|
||||
# for some reason authorisation is forbidden
|
||||
author_name = author
|
||||
nickname = author
|
||||
else:
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
author_name = soup.h1.get_text()
|
||||
author_name = SF_alias_list.get(author_name, author_name) # replace by alias if possible
|
||||
nickname = soup.find('dl', class_='personal-data').find('dd').get_text()
|
||||
nickname = nickname.replace('\n', '').strip()
|
||||
dev = developer_info_lookup(author_name)
|
||||
in_devs = dev and 'contact' in dev and nickname + '@SF' in dev['contact']
|
||||
in_entry = author_name in entry_developer
|
||||
if in_devs and in_entry:
|
||||
continue # already existing in entry and devs
|
||||
content += ' {} : {}@SF'.format(author_name, nickname)
|
||||
if not in_devs:
|
||||
content += ' (not in devs)'
|
||||
if not in_entry:
|
||||
content += ' (not in entry)'
|
||||
content += '\n'
|
||||
|
||||
# parse source repository
|
||||
repos = entry.get('code repository', [])
|
||||
|
||||
# Github
|
||||
urls = [x for x in repos if x.startswith('https://github.com/')]
|
||||
urls = []
|
||||
for url in urls:
|
||||
print(' github repo: {}'.format(url))
|
||||
github_info = osg_github.retrieve_repo_info(url)
|
||||
for contributor in github_info['contributors']:
|
||||
name = contributor.name
|
||||
dev = developer_info_lookup(name)
|
||||
in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev['contact']
|
||||
in_entry = name in entry_developer
|
||||
if in_devs and in_entry:
|
||||
continue # already existing in entry and devs
|
||||
content += ' {}: {}@GH'.format(name, contributor.login)
|
||||
if contributor.blog:
|
||||
content += ' url: {}'.format(contributor.blog)
|
||||
if not in_devs:
|
||||
content += ' (not in devs)'
|
||||
if not in_entry:
|
||||
content += ' (not in entry)'
|
||||
content += '\n'
|
||||
|
||||
if content:
|
||||
developers += '{}\n\n{}\n'.format(entry_name, content)
|
||||
|
||||
except RuntimeError as e:
|
||||
raise e
|
||||
# pass
|
||||
finally:
|
||||
# store developer info
|
||||
utils.write_text(os.path.join(c.root_path, 'collected_developer_info.txt'), developers)
|
||||
|
||||
def compare_entries_developers(entries, developers):
|
||||
"""
|
||||
Cross checks the game entries lists and the developers lists.
|
||||
:param entries: List of game entries
|
||||
:param developers: List of developers
|
||||
"""
|
||||
|
||||
# from the entries create a dictionary with developer names
|
||||
devs1 = {}
|
||||
for entry in entries:
|
||||
name = entry['Name']
|
||||
for dev in entry.get('developer', []):
|
||||
if dev in devs1:
|
||||
devs1[dev].append(name)
|
||||
else:
|
||||
devs1[dev] = [name]
|
||||
devs1_names = set(devs1.keys())
|
||||
|
||||
# from the developers create a dictionary with developer names
|
||||
devs2 = dict(zip((dev['Name'] for dev in developers), (dev['Games'] for dev in developers)))
|
||||
devs2_names = set(devs2.keys())
|
||||
|
||||
# devs only in entries
|
||||
for dev in devs1_names - devs2_names:
|
||||
print('Warning: dev "{}" only in entries ({}), not in developers'.format(dev, ','.join(devs1[dev])))
|
||||
# devs only in developers
|
||||
for dev in devs2_names - devs1_names:
|
||||
print('Warning: dev "{}" only in developers ({}), not in entries'.format(dev, ','.join(devs2[dev])))
|
||||
# for those in both, check that the games lists are equal
|
||||
for dev in devs1_names.intersection(devs2_names):
|
||||
games1 = set(devs1[dev])
|
||||
games2 = set(devs2[dev])
|
||||
delta = games1 - games2
|
||||
if delta:
|
||||
print('Warning: dev "{}" has games in entries ({}) that are not present in developers'.format(dev,
|
||||
', '.join(
|
||||
delta)))
|
||||
delta = games2 - games1
|
||||
if delta:
|
||||
print('Warning: dev "{}" has games in developers ({}) that are not present in entries'.format(dev, delta))
|
||||
|
||||
|
||||
class DevelopersMaintainer:
|
||||
|
||||
def __init__(self):
|
||||
self.developers = None
|
||||
self.entries = None
|
||||
|
||||
def read_developer(self):
|
||||
self.developers = osg.read_developers()
|
||||
print('{} developers read'.format(len(self.developers)))
|
||||
|
||||
def write_developer(self):
|
||||
if not self.developers:
|
||||
print('developers not yet loaded')
|
||||
return
|
||||
osg.write_developers(self.developers)
|
||||
print('developers written')
|
||||
|
||||
def check_for_duplicates(self):
|
||||
if not self.developers:
|
||||
print('developers not yet loaded')
|
||||
return
|
||||
developer_names = [x['Name'] for x in self.developers]
|
||||
for index, name in enumerate(developer_names):
|
||||
for other_name in developer_names[index + 1:]:
|
||||
if osg.name_similarity(name, other_name) > 0.8:
|
||||
print(' {} - {} is similar'.format(name, other_name))
|
||||
print('duplicates checked')
|
||||
|
||||
def check_for_orphans(self):
|
||||
if not self.developers:
|
||||
print('developers not yet loaded')
|
||||
return
|
||||
for dev in self.developers:
|
||||
if not dev['Games']:
|
||||
print(' {} has no "Games" field'.format(dev['Name']))
|
||||
print('orphanes checked')
|
||||
|
||||
def check_for_missing_developers_in_entries(self):
|
||||
if not self.developers:
|
||||
print('developer not yet loaded')
|
||||
return
|
||||
if not self.entries:
|
||||
print('entries not yet loaded')
|
||||
return
|
||||
for dev in self.developers:
|
||||
dev_name = dev['Name']
|
||||
for entry_name in dev['Games']:
|
||||
x = [x for x in self.entries if x['Title'] == entry_name]
|
||||
assert len(x) <= 1
|
||||
if not x:
|
||||
print('Entry "{}" listed as game of developer "{}" but this entry does not exist'.format(entry_name, dev_name))
|
||||
else:
|
||||
entry = x[0]
|
||||
if 'Developer' not in entry or dev_name not in entry['Developer']:
|
||||
print('Entry "{}" listed in developer "{}" but not listed in that entry'.format(entry_name, dev_name))
|
||||
print('missed developer checked')
|
||||
|
||||
def read_entries(self):
|
||||
self.entries = osg.read_entries()
|
||||
print('{} entries read'.format(len(self.entries)))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
m = DevelopersMaintainer()
|
||||
|
||||
actions = {
|
||||
'Read developers': m.read_developer,
|
||||
'Write developers': m.write_developer,
|
||||
'Check for duplicates': m.check_for_duplicates,
|
||||
'Check for orphans': m.check_for_orphans,
|
||||
'Check for games in developers not listed': m.check_for_missing_developers_in_entries,
|
||||
'Read entries': m.read_entries
|
||||
}
|
||||
|
||||
osg_ui.run_simple_button_app('Maintenance developer', actions)
|
Reference in New Issue
Block a user