developer import from sourceforge
This commit is contained in:
@ -191,6 +191,13 @@ https://gamejolt.com/ (search there)
|
||||
https://games.kde.org/ (all of them)
|
||||
https://games.kde.org/old/kde_arcade.php
|
||||
https://gdevelop-app.com/
|
||||
https://github.com/FaronBracy/RogueSharp
|
||||
https://github.com/jmorton06/Lumos
|
||||
https://github.com/codenamecpp/carnage3d
|
||||
https://github.com/zhangdoa/InnocenceEngine
|
||||
https://github.com/marukrap/RoguelikeDevResources
|
||||
http://www.gjt.org/ (all there)
|
||||
https://github.blog/2014-01-06-github-game-off-ii-winners/
|
||||
https://github.com/00-Evan/shattered-pixel-dungeon
|
||||
https://github.com/00-Evan/shattered-pixel-dungeon-gdx
|
||||
https://github.com/acedogblast/Project-Uranium-Godot
|
||||
|
28
code/github_import.py
Normal file
28
code/github_import.py
Normal file
@ -0,0 +1,28 @@
|
||||
"""
|
||||
Uses the Github API to learn more about the Github projects.
|
||||
"""
|
||||
|
||||
# Github
|
||||
urls = [x for x in repos if x.startswith('https://github.com/')]
|
||||
urls = []
|
||||
for url in urls:
|
||||
print(' github repo: {}'.format(url))
|
||||
github_info = osg_github.retrieve_repo_info(url)
|
||||
for contributor in github_info['contributors']:
|
||||
name = contributor.name
|
||||
dev = developer_info_lookup(name)
|
||||
in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev['contact']
|
||||
in_entry = name in entry_developer
|
||||
if in_devs and in_entry:
|
||||
continue # already existing in entry and devs
|
||||
content += ' {}: {}@GH'.format(name, contributor.login)
|
||||
if contributor.blog:
|
||||
content += ' url: {}'.format(contributor.blog)
|
||||
if not in_devs:
|
||||
content += ' (not in devs)'
|
||||
if not in_entry:
|
||||
content += ' (not in entry)'
|
||||
content += '\n'
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -3,119 +3,8 @@ Checks the entries and tries to detect additional developer content, by retrievi
|
||||
stored Git repositories.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import requests
|
||||
from utils import osg, osg_ui
|
||||
from bs4 import BeautifulSoup
|
||||
from utils import constants as c, utils, osg, osg_github
|
||||
|
||||
|
||||
# author names in SF that aren't the author names how we have them
|
||||
SF_alias_list = {'Erik Johansson (aka feneur)': 'Erik Johansson', 'Itms': 'Nicolas Auvray',
|
||||
'Wraitii': 'Lancelot de Ferrière', 'Simzer': 'Simon Laszlo', 'armin bajramovic': 'Armin Bajramovic'}
|
||||
|
||||
def test():
|
||||
# loop over infos
|
||||
developers = ''
|
||||
try:
|
||||
i = 0
|
||||
# active = False
|
||||
for entry in entries:
|
||||
|
||||
# if entry['Name'] == 'Aleph One':
|
||||
# active = True
|
||||
# if not active:
|
||||
# continue
|
||||
|
||||
# for testing purposes
|
||||
i += 1
|
||||
if i > 40:
|
||||
break
|
||||
|
||||
# print
|
||||
entry_name = '{} - {}'.format(entry['file'], entry['Name'])
|
||||
print(entry_name)
|
||||
content = ''
|
||||
|
||||
entry_developer = entry.get('developer', [])
|
||||
|
||||
# parse home
|
||||
home = entry['home']
|
||||
# sourceforge project site
|
||||
prefix = 'https://sourceforge.net/projects/'
|
||||
url = [x for x in home if x.startswith(prefix)]
|
||||
if len(url) == 1:
|
||||
url = url[0]
|
||||
print(' sourceforge project site: {}'.format(url))
|
||||
url = 'https://sourceforge.net/p/' + url[len(prefix):] + '_members/'
|
||||
response = requests.get(url)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
authors = soup.find('div', id='content_base').find('table').find_all('tr')
|
||||
authors = [author.find_all('td') for author in authors]
|
||||
authors = [author[1].a['href'] for author in authors if len(author) == 3]
|
||||
for author in authors:
|
||||
# sometimes author already contains the full url, sometimes not
|
||||
url = 'https://sourceforge.net' + author if not author.startswith('http') else author
|
||||
response = requests.get(url)
|
||||
url = response.url # could be different now
|
||||
if 'auth/?return_to' in url:
|
||||
# for some reason authorisation is forbidden
|
||||
author_name = author
|
||||
nickname = author
|
||||
else:
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
author_name = soup.h1.get_text()
|
||||
author_name = SF_alias_list.get(author_name, author_name) # replace by alias if possible
|
||||
nickname = soup.find('dl', class_='personal-data').find('dd').get_text()
|
||||
nickname = nickname.replace('\n', '').strip()
|
||||
dev = developer_info_lookup(author_name)
|
||||
in_devs = dev and 'contact' in dev and nickname + '@SF' in dev['contact']
|
||||
in_entry = author_name in entry_developer
|
||||
if in_devs and in_entry:
|
||||
continue # already existing in entry and devs
|
||||
content += ' {} : {}@SF'.format(author_name, nickname)
|
||||
if not in_devs:
|
||||
content += ' (not in devs)'
|
||||
if not in_entry:
|
||||
content += ' (not in entry)'
|
||||
content += '\n'
|
||||
|
||||
# parse source repository
|
||||
repos = entry.get('code repository', [])
|
||||
|
||||
# Github
|
||||
urls = [x for x in repos if x.startswith('https://github.com/')]
|
||||
urls = []
|
||||
for url in urls:
|
||||
print(' github repo: {}'.format(url))
|
||||
github_info = osg_github.retrieve_repo_info(url)
|
||||
for contributor in github_info['contributors']:
|
||||
name = contributor.name
|
||||
dev = developer_info_lookup(name)
|
||||
in_devs = dev and 'contact' in dev and contributor.login + '@GH' in dev['contact']
|
||||
in_entry = name in entry_developer
|
||||
if in_devs and in_entry:
|
||||
continue # already existing in entry and devs
|
||||
content += ' {}: {}@GH'.format(name, contributor.login)
|
||||
if contributor.blog:
|
||||
content += ' url: {}'.format(contributor.blog)
|
||||
if not in_devs:
|
||||
content += ' (not in devs)'
|
||||
if not in_entry:
|
||||
content += ' (not in entry)'
|
||||
content += '\n'
|
||||
|
||||
if content:
|
||||
developers += '{}\n\n{}\n'.format(entry_name, content)
|
||||
|
||||
except RuntimeError as e:
|
||||
raise e
|
||||
# pass
|
||||
finally:
|
||||
# store developer info
|
||||
utils.write_text(os.path.join(c.root_path, 'collected_developer_info.txt'), developers)
|
||||
|
||||
from utils import osg_ui
|
||||
from utils import osg
|
||||
|
||||
|
||||
class DevelopersMaintainer:
|
||||
@ -202,6 +91,16 @@ class DevelopersMaintainer:
|
||||
self.entries = osg.read_entries()
|
||||
print('{} entries read'.format(len(self.entries)))
|
||||
|
||||
def special_ops(self):
|
||||
# need entries loaded
|
||||
if not self.entries:
|
||||
print('entries not yet loaded')
|
||||
return
|
||||
for entry in self.entries:
|
||||
for developer in entry.get('Developer', []):
|
||||
if developer.comment:
|
||||
print('{:<25} - {:<25} - {}'.format(entry['File'], developer.value, developer.comment))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -214,6 +113,7 @@ if __name__ == "__main__":
|
||||
'Check for orphans': m.check_for_orphans,
|
||||
'Check for games in developers not listed': m.check_for_missing_developers_in_entries,
|
||||
'Update developers from entries': m.update_developers_from_entries,
|
||||
'Special': m.special_ops,
|
||||
'Read entries': m.read_entries
|
||||
}
|
||||
|
||||
|
@ -859,21 +859,29 @@ class EntriesMaintainer:
|
||||
print('entries not yet loaded')
|
||||
return
|
||||
|
||||
# combine content keywords
|
||||
n = len('content ')
|
||||
# cvs without any git
|
||||
for entry in self.entries:
|
||||
keywords = entry['Keyword']
|
||||
content = [keyword for keyword in keywords if keyword.startswith('content')]
|
||||
if len(content) > 1:
|
||||
# remove from keywords
|
||||
keywords = [keyword for keyword in keywords if keyword not in content]
|
||||
# remove prefix
|
||||
content = [str(keyword)[n:].strip() for keyword in content]
|
||||
# join with +
|
||||
content = 'content {}'.format(' + '.join(content))
|
||||
keywords.append(osg_parse.ValueWithComment(content))
|
||||
entry['Keyword'] = keywords
|
||||
print('fixed "{}"'.format(entry['File']))
|
||||
repos = entry['Code repository']
|
||||
cvs = [repo for repo in repos if 'cvs' in repo]
|
||||
git = [repo for repo in repos if 'git' in repo]
|
||||
if len(cvs) > 0 and len(git) == 0:
|
||||
print('Entry "{}" with repos: {}'.format(entry['File'], repos))
|
||||
|
||||
# # combine content keywords
|
||||
# n = len('content ')
|
||||
# for entry in self.entries:
|
||||
# keywords = entry['Keyword']
|
||||
# content = [keyword for keyword in keywords if keyword.startswith('content')]
|
||||
# if len(content) > 1:
|
||||
# # remove from keywords
|
||||
# keywords = [keyword for keyword in keywords if keyword not in content]
|
||||
# # remove prefix
|
||||
# content = [str(keyword)[n:].strip() for keyword in content]
|
||||
# # join with +
|
||||
# content = 'content {}'.format(' + '.join(content))
|
||||
# keywords.append(osg_parse.ValueWithComment(content))
|
||||
# entry['Keyword'] = keywords
|
||||
# print('fixed "{}"'.format(entry['File']))
|
||||
|
||||
print('special ops finished')
|
||||
|
||||
|
152
code/sourceforge_import.py
Normal file
152
code/sourceforge_import.py
Normal file
@ -0,0 +1,152 @@
|
||||
"""
|
||||
Scrapes Sourceforge project sites and adds (mostly developer) information to our database.
|
||||
""" # TODO sourceforge sites that are not existing anymore but we have an archive link, also scrape
|
||||
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from utils import constants as c, utils, osg, osg_parse
|
||||
|
||||
sf_entries_file = os.path.join(c.code_path, 'sourceforge_entries.txt')
|
||||
prefix = 'https://sourceforge.net/projects/'
|
||||
|
||||
# author names in SF that aren't the author names how we have them
|
||||
SF_alias_list = {'Erik Johansson (aka feneur)': 'Erik Johansson', 'Itms': 'Nicolas Auvray', 'baris yuksel': 'Baris Yuksel',
|
||||
'Wraitii': 'Lancelot de Ferrière', 'Simzer': 'Simon Laszlo', 'armin bajramovic': 'Armin Bajramovic',
|
||||
'bleu tailfly': 'bleutailfly', 'dlh': 'DLH', 'Bjorn Hansen': 'Bjørn Hansen'}
|
||||
|
||||
SF_ignore_list = ('', 'Arianne Integration Bot')
|
||||
|
||||
|
||||
def collect_sourceforge_entries():
|
||||
"""
|
||||
Reads the entries of the database and collects all entries with sourceforge as project site
|
||||
"""
|
||||
|
||||
# read entries
|
||||
entries = osg.read_entries()
|
||||
print('{} entries read'.format(len(entries)))
|
||||
|
||||
# loop over entries
|
||||
files = []
|
||||
for entry in entries:
|
||||
urls = [x for x in entry['Home'] if x.startswith(prefix)]
|
||||
if urls:
|
||||
files.append(entry['File'])
|
||||
|
||||
# write to file
|
||||
print('{} entries with sourceforge projects'.format(len(files)))
|
||||
utils.write_text(sf_entries_file, json.dumps(files, indent=1))
|
||||
|
||||
|
||||
def sourceforge_import():
|
||||
"""
|
||||
|
||||
:return:
|
||||
"""
|
||||
files = json.loads(utils.read_text(sf_entries_file))
|
||||
|
||||
all_developers = osg.read_developers()
|
||||
print(' {} developers read'.format(len(all_developers)))
|
||||
all_developers_changed = False
|
||||
|
||||
# all exceptions that happen will be eaten (but will end the execution)
|
||||
try:
|
||||
# loop over each entry
|
||||
for index, file in enumerate(files):
|
||||
print(' process {}'.format(file))
|
||||
|
||||
# read entry
|
||||
entry = osg.read_entry(file)
|
||||
developers = entry.get('Developer', [])
|
||||
urls = [x.value for x in entry['Home'] if x.startswith('https://sourceforge.net/projects/')]
|
||||
|
||||
entry_changed = False
|
||||
|
||||
for url in urls:
|
||||
print(' sf project {}'.format(url))
|
||||
|
||||
if not url.endswith('/'):
|
||||
print('error: sf project does not end with slash')
|
||||
url += '/'
|
||||
|
||||
# members
|
||||
url_members = 'https://sourceforge.net/p/' + url[len(prefix):] + '_members/'
|
||||
response = requests.get(url_members)
|
||||
if response.status_code != 200:
|
||||
raise RuntimeError('url {} not accessible'.format(url_members))
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
authors = soup.find('div', id='content_base').find('table').find_all('tr')
|
||||
authors = [author.find_all('td') for author in authors]
|
||||
authors = [author[1].a['href'] for author in authors if len(author) == 3]
|
||||
for author in authors:
|
||||
# sometimes author already contains the full url, sometimes not
|
||||
url_author = 'https://sourceforge.net' + author if not author.startswith('http') else author
|
||||
response = requests.get(url_author)
|
||||
url_author = response.url # could be different now
|
||||
if 'auth/?return_to' in url_author:
|
||||
# for some reason authorisation is forbidden or page was not available (happens for example for /u/kantaros)
|
||||
author_name = author[3:-1]
|
||||
nickname = author_name
|
||||
else:
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
author_name = soup.h1.get_text()
|
||||
author_name = SF_alias_list.get(author_name, author_name) # replace by alias if possible
|
||||
nickname = soup.find('dl', class_='personal-data').find('dd').get_text()
|
||||
nickname = nickname.replace('\n', '').strip()
|
||||
nickname += '@SF' # our indication of the platform to search for
|
||||
|
||||
if author_name in SF_ignore_list:
|
||||
continue
|
||||
|
||||
# look author up in entry developers
|
||||
if author_name not in developers:
|
||||
print(' dev "{}" added to entry {}'.format(author_name, file))
|
||||
entry['Developer'] = entry.get('Developer', []) + [osg_parse.ValueWithComment(author_name)]
|
||||
entry_changed = True
|
||||
developers = entry.get('Developer', [])
|
||||
|
||||
# look author and SF nickname up in developers data base
|
||||
if author_name in all_developers:
|
||||
dev = all_developers[author_name]
|
||||
if not nickname in dev.get('Contact', []):
|
||||
print(' existing dev "{}" added nickname ({}) to developer database'.format(author_name, nickname))
|
||||
# check that name has not already @SF contact
|
||||
if any(x.endswith('@SF') for x in dev.get('Contact', [])):
|
||||
print('warning: already SF contact')
|
||||
all_developers[author_name]['Contact'] = dev.get('Contact', []) + [nickname]
|
||||
all_developers_changed = True
|
||||
else:
|
||||
print(' dev "{}" ({}) added to developer database'.format(author_name, nickname))
|
||||
all_developers[author_name] = {'Name': author_name, 'Contact': nickname, 'Games': [entry['Title']]}
|
||||
all_developers_changed = True
|
||||
|
||||
if entry_changed:
|
||||
# save entry
|
||||
osg.write_entry(entry)
|
||||
print(' entry updated')
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
# shorten file list
|
||||
utils.write_text(sf_entries_file, json.dumps(files[index:], indent=1))
|
||||
|
||||
# save entry
|
||||
osg.write_entry(entry)
|
||||
print(' entry updated')
|
||||
|
||||
# maybe save all developers
|
||||
if all_developers_changed:
|
||||
# save all developers
|
||||
osg.write_developers(all_developers)
|
||||
print('developers database updated')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# collect entries
|
||||
# collect_sourceforge_entries()
|
||||
|
||||
# import information from sf
|
||||
sourceforge_import()
|
@ -260,6 +260,35 @@ def read_entries():
|
||||
return entries
|
||||
|
||||
|
||||
def read_entry(file):
|
||||
"""
|
||||
Reads a single entry
|
||||
:param file: the entry file (without path)
|
||||
:return: the entry
|
||||
"""
|
||||
|
||||
# setup parser and transformer
|
||||
grammar_file = os.path.join(c.code_path, 'grammar_entries.lark')
|
||||
grammar = utils.read_text(grammar_file)
|
||||
parse = osg_parse.create(grammar, osg_parse.EntryTransformer)
|
||||
|
||||
# read entry file
|
||||
content = utils.read_text(os.path.join(c.entries_path, file))
|
||||
if not content.endswith('\n'):
|
||||
content += '\n'
|
||||
|
||||
# parse and transform entry content
|
||||
try:
|
||||
entry = parse(content)
|
||||
entry = [('File', file),] + entry # add file information to the beginning
|
||||
entry = check_and_process_entry(entry)
|
||||
except Exception as e:
|
||||
print('{} - {}'.format(file, e))
|
||||
raise RuntimeError(e)
|
||||
|
||||
return entry
|
||||
|
||||
|
||||
def check_and_process_entry(entry):
|
||||
message = ''
|
||||
|
||||
|
@ -21,11 +21,11 @@ class ListingTransformer(lark.Transformer):
|
||||
|
||||
def property(self, x):
|
||||
"""
|
||||
The key of a property will be converted to lower case and the value part is the second part
|
||||
Key is first part, values are following.
|
||||
:param x:
|
||||
:return:
|
||||
"""
|
||||
return x[0], x[1:]
|
||||
return x[0].value, x[1:]
|
||||
|
||||
def name(self, x):
|
||||
"""
|
||||
|
Reference in New Issue
Block a user