adding developers

This commit is contained in:
Trilarion
2020-02-10 12:35:22 +01:00
parent eeb20a670a
commit 48ade4bc0b
13 changed files with 345 additions and 14 deletions

View File

@ -8,10 +8,10 @@ property: "- " _key ": " _value "\n"
_key: /(?! ).+?(?=:)(?<! )/ // key: everything until next ":", not beginning or ending with a space
_value: /.+(?<! )/ // everything until the end of the line, not ending with a space
name: /.+?(?= \()/ // developer name: everything until " ("
name: /.+?(?= \()/ // developer name: everything until " ("
number: /[0-9]+/
COMMENT: /^\[comment\]: #.*$\n/m // [comment]: # xxx
COMMENT: /^\[comment\]: #.*$\n/m // [comment]: # xxx
_E: /^$\n/m // empty new line
%ignore COMMENT

View File

@ -0,0 +1,129 @@
"""
Checks the entries and tries to detect additional developer content, by retrieving websites or logging information from
stored Git repositories.
"""
import os
import requests
from bs4 import BeautifulSoup
from utils import constants as c, utils, osg, osg_github
def developer_info_lookup(name):
for dev in developer_info:
if name == dev['name']:
return dev
return None
# author names in SF that aren't the author names how we have them
SF_alias_list = {'Erik Johansson (aka feneur)': 'Erik Johansson', 'Itms': 'Nicolas Auvray', 'Wraitii': 'Lancelot de Ferrière', 'Simzer': 'Simon Laszlo', 'armin bajramovic': 'Armin Bajramovic'}
if __name__ == "__main__":
# read developer info
developer_info = osg.read_developer_info()
osg.write_developer_info(developer_info)
# assemble info
entries = osg.assemble_infos()
# loop over infos
developers = ''
try:
i = 0
#active = False
for entry in entries:
#if entry['name'] == 'Aleph One':
# active = True
#if not active:
# continue
# for testing purposes
i += 1
if i > 40:
break
# print
entry_name = '{} - {}'.format(entry['file'], entry['name'])
print(entry_name)
content = ''
entry_developer = entry.get('developer', [])
# parse home
home = entry['home']
# sourceforge project site
prefix = 'https://sourceforge.net/projects/'
url = [x for x in home if x.startswith(prefix)]
if len(url) == 1:
url = url[0]
print(' sourceforge project site: {}'.format(url))
url = 'https://sourceforge.net/p/' + url[len(prefix):] + '_members/'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
authors = soup.find('div', id='content_base').find('table').find_all('tr')
authors = [author.find_all('td') for author in authors]
authors = [author[1].a['href'] for author in authors if len(author) == 3]
for author in authors:
# sometimes author already contains the full url, sometimes not
url = 'https://sourceforge.net' + author if not author.startswith('http') else author
response = requests.get(url)
url = response.url # could be different now
if 'auth/?return_to' in url:
# for some reason authorisation is forbidden
author_name = author
nickname = author
else:
soup = BeautifulSoup(response.text, 'html.parser')
author_name = soup.h1.get_text()
author_name = SF_alias_list.get(author_name, author_name) # replace by alias if possible
nickname = soup.find('dl', class_= 'personal-data').find('dd').get_text()
nickname = nickname.replace('\n', '').strip()
dev = developer_info_lookup(author_name)
in_devs = dev and 'contact' in dev and nickname + '@SF' in dev['contact']
in_entry = author_name in entry_developer
if in_devs and in_entry:
continue # already existing in entry and devs
content += ' {} : {}@SF'.format(author_name, nickname)
if not in_devs:
content += ' (not in devs)'
if not in_entry:
content += ' (not in entry)'
content += '\n'
# parse source repository
repos = entry.get('code repository', [])
# Github
urls = [x for x in repos if x.startswith('https://github.com/')]
urls = []
for url in urls:
print(' github repo: {}'.format(url))
github_info = osg_github.retrieve_repo_info(url)
for contributor in github_info['contributors']:
name = contributor.name
dev = developer_info_lookup(name)
in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev['contact']
in_entry = name in entry_developer
if in_devs and in_entry:
continue # already existing in entry and devs
content += ' {}: {}@GH'.format(name, contributor.login)
if contributor.blog:
content += ' url: {}'.format(contributor.blog)
if not in_devs:
content += ' (not in devs)'
if not in_entry:
content += ' (not in entry)'
content += '\n'
if content:
developers += '{}\n\n{}\n'.format(entry_name, content)
except RuntimeError as e:
raise(e)
# pass
finally:
# store developer info
utils.write_text(os.path.join(c.root_path, 'collected_developer_info.txt'), developers)

View File

@ -1,2 +1,3 @@
pygithub
lark-parser
lark-parser
BeautifulSoup

View File

@ -15,7 +15,7 @@ class ListingTransformer(lark.Transformer):
raise lark.Discard
def property(self, x):
return (x[0].value, x[1].value)
return (x[0].value.lower(), x[1].value)
def name(self, x):
return ('name', x[0].value)
@ -71,6 +71,8 @@ code_dependencies_without_entry = {'OpenGL': 'https://www.opengl.org/',
regex_sanitize_name = re.compile(r"[^A-Za-z 0-9-+]+")
regex_sanitize_name_space_eater = re.compile(r" +")
valid_developer_fields = ('name', 'games', 'contact', 'organization', 'home')
comment_string = '[comment]: # (partly autogenerated content, edit with care, read the manual before)'
@ -378,7 +380,22 @@ def read_developer_info():
developer_file = os.path.join(c.root_path, 'developer.md')
grammar_file = os.path.join(c.code_path, 'grammar_listing.lark')
transformer = ListingTransformer()
return read_and_parse(developer_file, grammar_file, transformer)
developers = read_and_parse(developer_file, grammar_file, transformer)
# now transform a bit more
for index, dev in enumerate(developers):
for field in dev.keys():
if field not in valid_developer_fields:
raise RuntimeError('Unknown developer field "{}" for developer: {}.'.format(field, dev['name']))
for field in ('name', 'organization'):
if field in dev:
dev[field] = dev[field].strip()
for field in ('games', 'contact'):
if field in dev:
content = dev[field]
content = content.split(',')
content = [x.strip() for x in content]
dev[field] = content
return developers
def write_developer_info(developers):
@ -386,7 +403,38 @@ def write_developer_info(developers):
:return:
"""
# comment
content = '{}\n'.format(comment_string)
# number of developer
content += '# Developer ({})\n\n'.format(len(developers))
# sort by name
developers.sort(key=lambda x: str.casefold(x['name']))
# iterate over them
for dev in developers:
# developer name
content += '## {} ({})\n\n'.format(dev['name'], len(dev['games']))
# games
content += '- Games: {}\n'.format(', '.join(sorted(dev['games'], key=str.casefold)))
# all the remaining in alphabetical order
for field in sorted(dev.keys()):
if field not in ('name', 'games'):
value = dev[field]
field = field.capitalize()
if isinstance(value, str):
content += '- {}: {}\n'.format(field, value)
else:
content += '- {}: {}\n'.format(field, ', '.join(sorted(value, key=str.casefold)))
content += '\n'
# write
developer_file = os.path.join(c.root_path, 'developer.md')
utils.write_text(developer_file, content)
def read_inspirations_info():

View File

@ -5,16 +5,45 @@ Everything specific to the Github API (via PyGithub).
from github import Github
def normalize_repo_name(repo):
"""
Bring repo to style xxx/yyy
"""
prefix = 'https://github.com/'
if repo.startswith(prefix):
repo = repo[len(prefix):]
suffix = '.git'
if repo.endswith(suffix):
repo = repo[:-len(suffix)]
return repo
def repo_get_contributors(repo):
contributors = []
c = repo.get_contributors()
for i in range(c.totalCount):
contributors.append(c[i])
return contributors
def retrieve_repo_info(repos):
"""
For a list of Github repos, retrieves repo information
For a list of Github repos, retrieves repo information.
Repos must be have the style xxx/yyy example: "PyGithub/PyGithub"
"""
single_repo = isinstance(repos, str)
if single_repo:
repos = (repos,)
result = []
g = Github()
for repo in repos:
repo = normalize_repo_name(repo)
r = g.get_repo(repo)
e = {'archived': r.archived, 'description': r.description, 'language': r.language,
'last modified': r.last_modified, 'open issues count': r.open_issues_count,
e = {'archived': r.archived, 'contributors': repo_get_contributors(r), 'description': r.description,
'language': r.language, 'last modified': r.last_modified, 'open issues count': r.open_issues_count,
'stars count': r.stargazers_count, 'topics': r.topics, 'repo': repo}
result.append(e)
if single_repo:
result = result[0]
return result