adding developers
This commit is contained in:
@ -8,10 +8,10 @@ property: "- " _key ": " _value "\n"
|
||||
_key: /(?! ).+?(?=:)(?<! )/ // key: everything until next ":", not beginning or ending with a space
|
||||
_value: /.+(?<! )/ // everything until the end of the line, not ending with a space
|
||||
|
||||
name: /.+?(?= \()/ // developer name: everything until " ("
|
||||
name: /.+?(?= \()/ // developer name: everything until " ("
|
||||
number: /[0-9]+/
|
||||
|
||||
COMMENT: /^\[comment\]: #.*$\n/m // [comment]: # xxx
|
||||
COMMENT: /^\[comment\]: #.*$\n/m // [comment]: # xxx
|
||||
_E: /^$\n/m // empty new line
|
||||
|
||||
%ignore COMMENT
|
129
code/maintenance_collect_developer_infos.py
Normal file
129
code/maintenance_collect_developer_infos.py
Normal file
@ -0,0 +1,129 @@
|
||||
"""
|
||||
Checks the entries and tries to detect additional developer content, by retrieving websites or logging information from
|
||||
stored Git repositories.
|
||||
"""
|
||||
|
||||
import os
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from utils import constants as c, utils, osg, osg_github
|
||||
|
||||
|
||||
def developer_info_lookup(name):
|
||||
for dev in developer_info:
|
||||
if name == dev['name']:
|
||||
return dev
|
||||
return None
|
||||
|
||||
# author names in SF that aren't the author names how we have them
|
||||
SF_alias_list = {'Erik Johansson (aka feneur)': 'Erik Johansson', 'Itms': 'Nicolas Auvray', 'Wraitii': 'Lancelot de Ferrière', 'Simzer': 'Simon Laszlo', 'armin bajramovic': 'Armin Bajramovic'}
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# read developer info
|
||||
developer_info = osg.read_developer_info()
|
||||
osg.write_developer_info(developer_info)
|
||||
|
||||
# assemble info
|
||||
entries = osg.assemble_infos()
|
||||
|
||||
# loop over infos
|
||||
developers = ''
|
||||
try:
|
||||
i = 0
|
||||
#active = False
|
||||
for entry in entries:
|
||||
|
||||
#if entry['name'] == 'Aleph One':
|
||||
# active = True
|
||||
#if not active:
|
||||
# continue
|
||||
|
||||
# for testing purposes
|
||||
i += 1
|
||||
if i > 40:
|
||||
break
|
||||
|
||||
# print
|
||||
entry_name = '{} - {}'.format(entry['file'], entry['name'])
|
||||
print(entry_name)
|
||||
content = ''
|
||||
|
||||
entry_developer = entry.get('developer', [])
|
||||
|
||||
# parse home
|
||||
home = entry['home']
|
||||
# sourceforge project site
|
||||
prefix = 'https://sourceforge.net/projects/'
|
||||
url = [x for x in home if x.startswith(prefix)]
|
||||
if len(url) == 1:
|
||||
url = url[0]
|
||||
print(' sourceforge project site: {}'.format(url))
|
||||
url = 'https://sourceforge.net/p/' + url[len(prefix):] + '_members/'
|
||||
response = requests.get(url)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
authors = soup.find('div', id='content_base').find('table').find_all('tr')
|
||||
authors = [author.find_all('td') for author in authors]
|
||||
authors = [author[1].a['href'] for author in authors if len(author) == 3]
|
||||
for author in authors:
|
||||
# sometimes author already contains the full url, sometimes not
|
||||
url = 'https://sourceforge.net' + author if not author.startswith('http') else author
|
||||
response = requests.get(url)
|
||||
url = response.url # could be different now
|
||||
if 'auth/?return_to' in url:
|
||||
# for some reason authorisation is forbidden
|
||||
author_name = author
|
||||
nickname = author
|
||||
else:
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
author_name = soup.h1.get_text()
|
||||
author_name = SF_alias_list.get(author_name, author_name) # replace by alias if possible
|
||||
nickname = soup.find('dl', class_= 'personal-data').find('dd').get_text()
|
||||
nickname = nickname.replace('\n', '').strip()
|
||||
dev = developer_info_lookup(author_name)
|
||||
in_devs = dev and 'contact' in dev and nickname + '@SF' in dev['contact']
|
||||
in_entry = author_name in entry_developer
|
||||
if in_devs and in_entry:
|
||||
continue # already existing in entry and devs
|
||||
content += ' {} : {}@SF'.format(author_name, nickname)
|
||||
if not in_devs:
|
||||
content += ' (not in devs)'
|
||||
if not in_entry:
|
||||
content += ' (not in entry)'
|
||||
content += '\n'
|
||||
|
||||
# parse source repository
|
||||
repos = entry.get('code repository', [])
|
||||
|
||||
# Github
|
||||
urls = [x for x in repos if x.startswith('https://github.com/')]
|
||||
urls = []
|
||||
for url in urls:
|
||||
print(' github repo: {}'.format(url))
|
||||
github_info = osg_github.retrieve_repo_info(url)
|
||||
for contributor in github_info['contributors']:
|
||||
name = contributor.name
|
||||
dev = developer_info_lookup(name)
|
||||
in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev['contact']
|
||||
in_entry = name in entry_developer
|
||||
if in_devs and in_entry:
|
||||
continue # already existing in entry and devs
|
||||
content += ' {}: {}@GH'.format(name, contributor.login)
|
||||
if contributor.blog:
|
||||
content += ' url: {}'.format(contributor.blog)
|
||||
if not in_devs:
|
||||
content += ' (not in devs)'
|
||||
if not in_entry:
|
||||
content += ' (not in entry)'
|
||||
content += '\n'
|
||||
|
||||
if content:
|
||||
developers += '{}\n\n{}\n'.format(entry_name, content)
|
||||
|
||||
|
||||
except RuntimeError as e:
|
||||
raise(e)
|
||||
# pass
|
||||
finally:
|
||||
# store developer info
|
||||
utils.write_text(os.path.join(c.root_path, 'collected_developer_info.txt'), developers)
|
@ -1,2 +1,3 @@
|
||||
pygithub
|
||||
lark-parser
|
||||
lark-parser
|
||||
BeautifulSoup
|
@ -15,7 +15,7 @@ class ListingTransformer(lark.Transformer):
|
||||
raise lark.Discard
|
||||
|
||||
def property(self, x):
|
||||
return (x[0].value, x[1].value)
|
||||
return (x[0].value.lower(), x[1].value)
|
||||
|
||||
def name(self, x):
|
||||
return ('name', x[0].value)
|
||||
@ -71,6 +71,8 @@ code_dependencies_without_entry = {'OpenGL': 'https://www.opengl.org/',
|
||||
regex_sanitize_name = re.compile(r"[^A-Za-z 0-9-+]+")
|
||||
regex_sanitize_name_space_eater = re.compile(r" +")
|
||||
|
||||
valid_developer_fields = ('name', 'games', 'contact', 'organization', 'home')
|
||||
|
||||
comment_string = '[comment]: # (partly autogenerated content, edit with care, read the manual before)'
|
||||
|
||||
|
||||
@ -378,7 +380,22 @@ def read_developer_info():
|
||||
developer_file = os.path.join(c.root_path, 'developer.md')
|
||||
grammar_file = os.path.join(c.code_path, 'grammar_listing.lark')
|
||||
transformer = ListingTransformer()
|
||||
return read_and_parse(developer_file, grammar_file, transformer)
|
||||
developers = read_and_parse(developer_file, grammar_file, transformer)
|
||||
# now transform a bit more
|
||||
for index, dev in enumerate(developers):
|
||||
for field in dev.keys():
|
||||
if field not in valid_developer_fields:
|
||||
raise RuntimeError('Unknown developer field "{}" for developer: {}.'.format(field, dev['name']))
|
||||
for field in ('name', 'organization'):
|
||||
if field in dev:
|
||||
dev[field] = dev[field].strip()
|
||||
for field in ('games', 'contact'):
|
||||
if field in dev:
|
||||
content = dev[field]
|
||||
content = content.split(',')
|
||||
content = [x.strip() for x in content]
|
||||
dev[field] = content
|
||||
return developers
|
||||
|
||||
|
||||
def write_developer_info(developers):
|
||||
@ -386,7 +403,38 @@ def write_developer_info(developers):
|
||||
|
||||
:return:
|
||||
"""
|
||||
# comment
|
||||
content = '{}\n'.format(comment_string)
|
||||
|
||||
# number of developer
|
||||
content += '# Developer ({})\n\n'.format(len(developers))
|
||||
|
||||
# sort by name
|
||||
developers.sort(key=lambda x: str.casefold(x['name']))
|
||||
|
||||
# iterate over them
|
||||
for dev in developers:
|
||||
# developer name
|
||||
content += '## {} ({})\n\n'.format(dev['name'], len(dev['games']))
|
||||
|
||||
# games
|
||||
content += '- Games: {}\n'.format(', '.join(sorted(dev['games'], key=str.casefold)))
|
||||
|
||||
# all the remaining in alphabetical order
|
||||
for field in sorted(dev.keys()):
|
||||
if field not in ('name', 'games'):
|
||||
value = dev[field]
|
||||
field = field.capitalize()
|
||||
if isinstance(value, str):
|
||||
content += '- {}: {}\n'.format(field, value)
|
||||
else:
|
||||
content += '- {}: {}\n'.format(field, ', '.join(sorted(value, key=str.casefold)))
|
||||
content += '\n'
|
||||
|
||||
# write
|
||||
developer_file = os.path.join(c.root_path, 'developer.md')
|
||||
utils.write_text(developer_file, content)
|
||||
|
||||
|
||||
|
||||
def read_inspirations_info():
|
||||
|
@ -5,16 +5,45 @@ Everything specific to the Github API (via PyGithub).
|
||||
from github import Github
|
||||
|
||||
|
||||
def normalize_repo_name(repo):
|
||||
"""
|
||||
Bring repo to style xxx/yyy
|
||||
"""
|
||||
prefix = 'https://github.com/'
|
||||
if repo.startswith(prefix):
|
||||
repo = repo[len(prefix):]
|
||||
suffix = '.git'
|
||||
if repo.endswith(suffix):
|
||||
repo = repo[:-len(suffix)]
|
||||
return repo
|
||||
|
||||
|
||||
def repo_get_contributors(repo):
|
||||
contributors = []
|
||||
c = repo.get_contributors()
|
||||
for i in range(c.totalCount):
|
||||
contributors.append(c[i])
|
||||
return contributors
|
||||
|
||||
|
||||
def retrieve_repo_info(repos):
|
||||
"""
|
||||
For a list of Github repos, retrieves repo information
|
||||
For a list of Github repos, retrieves repo information.
|
||||
|
||||
Repos must be have the style xxx/yyy example: "PyGithub/PyGithub"
|
||||
"""
|
||||
single_repo = isinstance(repos, str)
|
||||
if single_repo:
|
||||
repos = (repos,)
|
||||
result = []
|
||||
g = Github()
|
||||
for repo in repos:
|
||||
repo = normalize_repo_name(repo)
|
||||
r = g.get_repo(repo)
|
||||
e = {'archived': r.archived, 'description': r.description, 'language': r.language,
|
||||
'last modified': r.last_modified, 'open issues count': r.open_issues_count,
|
||||
e = {'archived': r.archived, 'contributors': repo_get_contributors(r), 'description': r.description,
|
||||
'language': r.language, 'last modified': r.last_modified, 'open issues count': r.open_issues_count,
|
||||
'stars count': r.stargazers_count, 'topics': r.topics, 'repo': repo}
|
||||
result.append(e)
|
||||
if single_repo:
|
||||
result = result[0]
|
||||
return result
|
||||
|
Reference in New Issue
Block a user