sourceforge import of developer information
This commit is contained in:
@ -2,27 +2,143 @@
|
||||
Uses the Github API to learn more about the Github projects.
|
||||
"""
|
||||
|
||||
# Github
|
||||
urls = [x for x in repos if x.startswith('https://github.com/')]
|
||||
urls = []
|
||||
for url in urls:
|
||||
print(' github repo: {}'.format(url))
|
||||
github_info = osg_github.retrieve_repo_info(url)
|
||||
for contributor in github_info['contributors']:
|
||||
name = contributor.name
|
||||
dev = developer_info_lookup(name)
|
||||
in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev['contact']
|
||||
in_entry = name in entry_developer
|
||||
if in_devs and in_entry:
|
||||
continue # already existing in entry and devs
|
||||
content += ' {}: {}@GH'.format(name, contributor.login)
|
||||
if contributor.blog:
|
||||
content += ' url: {}'.format(contributor.blog)
|
||||
if not in_devs:
|
||||
content += ' (not in devs)'
|
||||
if not in_entry:
|
||||
content += ' (not in entry)'
|
||||
content += '\n'
|
||||
import os
|
||||
import json
|
||||
from utils import constants as c, utils, osg, osg_parse, osg_github
|
||||
|
||||
gh_entries_file = os.path.join(c.code_path, 'github_entries.txt')
|
||||
prefix = 'https://github.com/'
|
||||
|
||||
|
||||
def collect_github_entries():
|
||||
"""
|
||||
Reads the entries of the database and collects all entries with github as repository
|
||||
"""
|
||||
|
||||
# read entries
|
||||
entries = osg.read_entries()
|
||||
print('{} entries read'.format(len(entries)))
|
||||
|
||||
# loop over entries
|
||||
files = []
|
||||
for entry in entries:
|
||||
urls = [x for x in entry['Code repository'] if x.startswith(prefix)]
|
||||
if urls:
|
||||
files.append(entry['File'])
|
||||
|
||||
# write to file
|
||||
print('{} entries with github repos'.format(len(files)))
|
||||
utils.write_text(gh_entries_file, json.dumps(files, indent=1))
|
||||
|
||||
|
||||
def github_import():
|
||||
"""
|
||||
|
||||
:return:
|
||||
"""
|
||||
files = json.loads(utils.read_text(gh_entries_file))
|
||||
|
||||
all_developers = osg.read_developers()
|
||||
print(' {} developers read'.format(len(all_developers)))
|
||||
|
||||
# all exceptions that happen will be eaten (but will end the execution)
|
||||
try:
|
||||
# loop over each entry
|
||||
for index, file in enumerate(files):
|
||||
print(' process {}'.format(file))
|
||||
|
||||
# read entry
|
||||
entry = osg.read_entry(file)
|
||||
code_repositories = entry['Code repository']
|
||||
repos = [x.value for x in code_repositories if x.startswith(prefix)]
|
||||
for repo in repos:
|
||||
print(' GH repo {}'.format(repo))
|
||||
|
||||
info = osg_github.retrieve_repo_info(repo)
|
||||
|
||||
new_comments = []
|
||||
# is archived
|
||||
if info['archived']:
|
||||
if not osg.is_inactive(entry):
|
||||
print('warning: repo is archived but not inactive state??')
|
||||
# add archive to repo comment
|
||||
new_comments.append('@archived')
|
||||
|
||||
# add created comment
|
||||
new_comments.append('@created {}'.format(info['created'].year))
|
||||
|
||||
# add stars
|
||||
new_comments.append('@stars {}'.format(info['stars']))
|
||||
|
||||
# add forks
|
||||
new_comments.append('@forks {}'.format(info['forks']))
|
||||
|
||||
# update comment
|
||||
for r in code_repositories:
|
||||
if r.value == repo:
|
||||
break
|
||||
comments = r.comment
|
||||
if comments:
|
||||
comments = comments.split(',')
|
||||
comments = [c.strip() for c in comments if not c.startswith('@')]
|
||||
r.comment = ', '.join(comments + new_comments)
|
||||
|
||||
# language in languages
|
||||
language = info['language']
|
||||
if language not in entry['Code language']:
|
||||
entry['Code language'].append(language)
|
||||
|
||||
# contributors
|
||||
for contributor in info['contributors']:
|
||||
if contributor.type != 'User':
|
||||
continue
|
||||
if contributor.contributions < 4:
|
||||
continue
|
||||
# contributor.login/name/blog
|
||||
name = contributor.name
|
||||
if not name:
|
||||
name = contributor.login
|
||||
nickname = '{}@GH'.format(contributor.login)
|
||||
|
||||
# look up author in entry developers
|
||||
if name not in entry.get('Developer', []):
|
||||
print(' dev "{}" added to entry {}'.format(name, file))
|
||||
entry['Developer'] = entry.get('Developer', []) + [osg_parse.ValueWithComment(name)]
|
||||
|
||||
# look up author in developers data base
|
||||
if name in all_developers:
|
||||
dev = all_developers[name]
|
||||
if not nickname in dev.get('Contact', []):
|
||||
print(' existing dev "{}" added nickname ({}) to developer database'.format(name, nickname))
|
||||
# check that name has not already @GH contact
|
||||
if any(x.endswith('@GH') for x in dev.get('Contact', [])):
|
||||
print('warning: already GH contact')
|
||||
dev['Contact'] = dev.get('Contact', []) + [nickname]
|
||||
if contributor.blog and contributor.blog not in dev.get('Home', []):
|
||||
dev['Home'] = dev.get('Home', []) + [contributor.blog]
|
||||
else:
|
||||
print(' dev "{}" ({}) added to developer database'.format(name, nickname))
|
||||
all_developers[name] = {'Name': name, 'Contact': [nickname], 'Games': [entry['Title']]}
|
||||
if contributor.blog:
|
||||
all_developers[name]['Home'] = [contributor.blog]
|
||||
|
||||
|
||||
entry['Code repository'] = code_repositories
|
||||
osg.write_entry(entry)
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
# shorten file list
|
||||
utils.write_text(gh_entries_file, json.dumps(files[index:], indent=1))
|
||||
|
||||
osg.write_developers(all_developers)
|
||||
print('developers database updated')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# collect entries
|
||||
# collect_github_entries()
|
||||
|
||||
# import information from gh
|
||||
github_import()
|
||||
|
@ -33,7 +33,7 @@ class DevelopersMaintainer:
|
||||
developer_names = list(self.developers.keys())
|
||||
for index, name in enumerate(developer_names):
|
||||
for other_name in developer_names[index + 1:]:
|
||||
if osg.name_similarity(name, other_name) > 0.8:
|
||||
if osg.name_similarity(str.casefold(name), str.casefold(other_name)) > 0.85:
|
||||
print(' {} - {} is similar'.format(name, other_name))
|
||||
print('duplicates checked')
|
||||
|
||||
|
@ -14,7 +14,9 @@ prefix = 'https://sourceforge.net/projects/'
|
||||
# author names in SF that aren't the author names how we have them
|
||||
SF_alias_list = {'Erik Johansson (aka feneur)': 'Erik Johansson', 'Itms': 'Nicolas Auvray', 'baris yuksel': 'Baris Yuksel',
|
||||
'Wraitii': 'Lancelot de Ferrière', 'Simzer': 'Simon Laszlo', 'armin bajramovic': 'Armin Bajramovic',
|
||||
'bleu tailfly': 'bleutailfly', 'dlh': 'DLH', 'Bjorn Hansen': 'Bjørn Hansen', 'Louens Veen': 'Lourens Veen'}
|
||||
'bleu tailfly': 'bleutailfly', 'dlh': 'DLH', 'Bjorn Hansen': 'Bjørn Hansen', 'Louens Veen': 'Lourens Veen',
|
||||
'linley_henzell': 'Linley Henzell', 'Patrice DUHAMEL': 'Patrice Duhamel', 'Etienne SOBOLE': 'Etienne Sobole',
|
||||
'L. H. [Lubomír]': 'L. H. Lubomír'}
|
||||
|
||||
SF_ignore_list = ('', 'Arianne Integration Bot')
|
||||
|
||||
@ -75,7 +77,8 @@ def sourceforge_import():
|
||||
url_members = 'https://sourceforge.net/p/' + url[len(prefix):] + '_members/'
|
||||
response = requests.get(url_members)
|
||||
if response.status_code != 200:
|
||||
raise RuntimeError('url {} not accessible'.format(url_members))
|
||||
print('error: url {} not accessible, status {}'.format(url_members, response.status_code))
|
||||
raise RuntimeError()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
authors = soup.find('div', id='content_base').find('table').find_all('tr')
|
||||
authors = [author.find_all('td') for author in authors]
|
||||
@ -84,8 +87,11 @@ def sourceforge_import():
|
||||
# sometimes author already contains the full url, sometimes not
|
||||
url_author = 'https://sourceforge.net' + author if not author.startswith('http') else author
|
||||
response = requests.get(url_author)
|
||||
if response.status_code != 200 and author not in ('/u/favorito/',):
|
||||
print('error: url {} not accessible, status {}'.format(url_author, response.status_code))
|
||||
raise RuntimeError()
|
||||
url_author = response.url # could be different now
|
||||
if 'auth/?return_to' in url_author:
|
||||
if 'auth/?return_to' in url_author or response.status_code != 200:
|
||||
# for some reason authorisation is forbidden or page was not available (happens for example for /u/kantaros)
|
||||
author_name = author[3:-1]
|
||||
nickname = author_name
|
||||
@ -96,6 +102,7 @@ def sourceforge_import():
|
||||
nickname = soup.find('dl', class_='personal-data').find('dd').get_text()
|
||||
nickname = nickname.replace('\n', '').strip()
|
||||
nickname += '@SF' # our indication of the platform to search for
|
||||
author_name = author_name.strip() # names can still have white spaces before or after
|
||||
|
||||
if author_name in SF_ignore_list:
|
||||
continue
|
||||
@ -119,7 +126,7 @@ def sourceforge_import():
|
||||
all_developers_changed = True
|
||||
else:
|
||||
print(' dev "{}" ({}) added to developer database'.format(author_name, nickname))
|
||||
all_developers[author_name] = {'Name': author_name, 'Contact': nickname, 'Games': [entry['Title']]}
|
||||
all_developers[author_name] = {'Name': author_name, 'Contact': [nickname], 'Games': [entry['Title']]}
|
||||
all_developers_changed = True
|
||||
|
||||
if entry_changed:
|
||||
|
@ -40,9 +40,9 @@ def retrieve_repo_info(repos):
|
||||
for repo in repos:
|
||||
repo = normalize_repo_name(repo)
|
||||
r = g.get_repo(repo)
|
||||
e = {'archived': r.archived, 'contributors': repo_get_contributors(r), 'description': r.description,
|
||||
'language': r.language, 'last modified': r.last_modified, 'open issues count': r.open_issues_count,
|
||||
'stars count': r.stargazers_count, 'topics': r.topics, 'repo': repo}
|
||||
e = {'archived': r.archived, 'contributors': repo_get_contributors(r), 'created': r.created_at, 'description': r.description,
|
||||
'forks': r.forks_count, 'language': r.language, 'last modified': r.last_modified, 'name': r.name,
|
||||
'open issues count': r.open_issues_count, 'owner': r.owner, 'stars': r.stargazers_count, 'topics': r.get_topics(), 'repo': repo}
|
||||
result.append(e)
|
||||
if single_repo:
|
||||
result = result[0]
|
||||
|
Reference in New Issue
Block a user