developer import from sourceforge

This commit is contained in:
Trilarion
2021-01-06 16:13:17 +01:00
parent 489adf0f88
commit 32ae77c7da
99 changed files with 2873 additions and 189 deletions

View File

@ -3,119 +3,8 @@ Checks the entries and tries to detect additional developer content, by retrievi
stored Git repositories.
"""
import os
import sys
import requests
from utils import osg, osg_ui
from bs4 import BeautifulSoup
from utils import constants as c, utils, osg, osg_github
# author names in SF that aren't the author names how we have them
SF_alias_list = {'Erik Johansson (aka feneur)': 'Erik Johansson', 'Itms': 'Nicolas Auvray',
'Wraitii': 'Lancelot de Ferrière', 'Simzer': 'Simon Laszlo', 'armin bajramovic': 'Armin Bajramovic'}
def test():
# loop over infos
developers = ''
try:
i = 0
# active = False
for entry in entries:
# if entry['Name'] == 'Aleph One':
# active = True
# if not active:
# continue
# for testing purposes
i += 1
if i > 40:
break
# print
entry_name = '{} - {}'.format(entry['file'], entry['Name'])
print(entry_name)
content = ''
entry_developer = entry.get('developer', [])
# parse home
home = entry['home']
# sourceforge project site
prefix = 'https://sourceforge.net/projects/'
url = [x for x in home if x.startswith(prefix)]
if len(url) == 1:
url = url[0]
print(' sourceforge project site: {}'.format(url))
url = 'https://sourceforge.net/p/' + url[len(prefix):] + '_members/'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
authors = soup.find('div', id='content_base').find('table').find_all('tr')
authors = [author.find_all('td') for author in authors]
authors = [author[1].a['href'] for author in authors if len(author) == 3]
for author in authors:
# sometimes author already contains the full url, sometimes not
url = 'https://sourceforge.net' + author if not author.startswith('http') else author
response = requests.get(url)
url = response.url # could be different now
if 'auth/?return_to' in url:
# for some reason authorisation is forbidden
author_name = author
nickname = author
else:
soup = BeautifulSoup(response.text, 'html.parser')
author_name = soup.h1.get_text()
author_name = SF_alias_list.get(author_name, author_name) # replace by alias if possible
nickname = soup.find('dl', class_='personal-data').find('dd').get_text()
nickname = nickname.replace('\n', '').strip()
dev = developer_info_lookup(author_name)
in_devs = dev and 'contact' in dev and nickname + '@SF' in dev['contact']
in_entry = author_name in entry_developer
if in_devs and in_entry:
continue # already existing in entry and devs
content += ' {} : {}@SF'.format(author_name, nickname)
if not in_devs:
content += ' (not in devs)'
if not in_entry:
content += ' (not in entry)'
content += '\n'
# parse source repository
repos = entry.get('code repository', [])
# Github
urls = [x for x in repos if x.startswith('https://github.com/')]
urls = []
for url in urls:
print(' github repo: {}'.format(url))
github_info = osg_github.retrieve_repo_info(url)
for contributor in github_info['contributors']:
name = contributor.name
dev = developer_info_lookup(name)
in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev['contact']
in_entry = name in entry_developer
if in_devs and in_entry:
continue # already existing in entry and devs
content += ' {}: {}@GH'.format(name, contributor.login)
if contributor.blog:
content += ' url: {}'.format(contributor.blog)
if not in_devs:
content += ' (not in devs)'
if not in_entry:
content += ' (not in entry)'
content += '\n'
if content:
developers += '{}\n\n{}\n'.format(entry_name, content)
except RuntimeError as e:
raise e
# pass
finally:
# store developer info
utils.write_text(os.path.join(c.root_path, 'collected_developer_info.txt'), developers)
from utils import osg_ui
from utils import osg
class DevelopersMaintainer:
@ -202,6 +91,16 @@ class DevelopersMaintainer:
self.entries = osg.read_entries()
print('{} entries read'.format(len(self.entries)))
def special_ops(self):
# need entries loaded
if not self.entries:
print('entries not yet loaded')
return
for entry in self.entries:
for developer in entry.get('Developer', []):
if developer.comment:
print('{:<25} - {:<25} - {}'.format(entry['File'], developer.value, developer.comment))
if __name__ == "__main__":
@ -214,6 +113,7 @@ if __name__ == "__main__":
'Check for orphans': m.check_for_orphans,
'Check for games in developers not listed': m.check_for_missing_developers_in_entries,
'Update developers from entries': m.update_developers_from_entries,
'Special': m.special_ops,
'Read entries': m.read_entries
}