check of external links now with redirects
This commit is contained in:
@ -129,6 +129,7 @@
|
||||
"https://git.tuxfamily.org/rba/rogueboxadventures.git",
|
||||
"https://git.xiph.org/vorbis.git",
|
||||
"https://github.com/0ad/0ad.git",
|
||||
"https://github.com/2006rebotted/2006rebotted.git",
|
||||
"https://github.com/AdamAtomic/flixel.git",
|
||||
"https://github.com/AdrienTD/wkbre.git",
|
||||
"https://github.com/Afr0Games/Project-Dollhouse.git",
|
||||
@ -416,7 +417,7 @@
|
||||
"https://github.com/alexdantas/www.git",
|
||||
"https://github.com/alexknvl/fonline.git",
|
||||
"https://github.com/alphaonex86/CatchChallenger.git",
|
||||
"https://github.com/alxm/a2x.git",
|
||||
"https://github.com/alxm/faur.git",
|
||||
"https://github.com/amerkoleci/alimer.git",
|
||||
"https://github.com/amerkoleci/vortice.git",
|
||||
"https://github.com/amroibrahim/DIYDoom.git",
|
||||
@ -446,6 +447,7 @@
|
||||
"https://github.com/apsillers/Taggem.git",
|
||||
"https://github.com/apsillers/lords-of-the-fey.git",
|
||||
"https://github.com/arcadia-xenos/progress-quest.git",
|
||||
"https://github.com/ardentryst/ardentryst.git",
|
||||
"https://github.com/arescentral/antares.git",
|
||||
"https://github.com/arianne/stendhal.git",
|
||||
"https://github.com/arx/ArxLibertatis.git",
|
||||
@ -465,6 +467,7 @@
|
||||
"https://github.com/blakeohare/pyweek-sentientstorage.git",
|
||||
"https://github.com/blockattack/blockattack-game.git",
|
||||
"https://github.com/bni/orbium.git",
|
||||
"https://github.com/boardgameio/boardgame.io.git",
|
||||
"https://github.com/boostorg/boost.git",
|
||||
"https://github.com/bote-team/bote.git",
|
||||
"https://github.com/bradharding/doomretro.git",
|
||||
@ -526,7 +529,6 @@
|
||||
"https://github.com/delight-im/OpenSoccer.git",
|
||||
"https://github.com/demonixis/C3DE.git",
|
||||
"https://github.com/dgengin/DGEngine.git",
|
||||
"https://github.com/dginovker/2006rebotted.git",
|
||||
"https://github.com/dhewm/dhewm3.git",
|
||||
"https://github.com/diasurgical/devilution.git",
|
||||
"https://github.com/diasurgical/devilutionX.git",
|
||||
@ -619,7 +621,6 @@
|
||||
"https://github.com/haroldo-ok/datastorm.git",
|
||||
"https://github.com/hedgewars/hw.git",
|
||||
"https://github.com/henkboom/pax-britannica.git",
|
||||
"https://github.com/hhirsch/ardentryst.git",
|
||||
"https://github.com/highfestiva/life.git",
|
||||
"https://github.com/hinogi/eternalwinterwars.git",
|
||||
"https://github.com/hypatia-software-org/hypatia-engine.git",
|
||||
@ -748,7 +749,6 @@
|
||||
"https://github.com/nenadalm/Train.git",
|
||||
"https://github.com/nevat/abbayedesmorts-gpl.git",
|
||||
"https://github.com/nhydock/UlDunAd.git",
|
||||
"https://github.com/nicolodavis/boardgame.io.git",
|
||||
"https://github.com/nicupavel/openpanzer.git",
|
||||
"https://github.com/nigels-com/glew.git",
|
||||
"https://github.com/nikki-and-the-robots/nikki.git",
|
||||
|
@ -157,6 +157,7 @@ https://eblong.com/zarf/twilight/index.html
|
||||
https://edu.kde.org/
|
||||
https://empiredirectory.net/
|
||||
https://empiredirectory.net/index.php/downloads/viewdownload/6-server-software/13-empire-server
|
||||
https://emulation.gametechwiki.com/index.php/Main_Page
|
||||
https://en.wikipedia.org/w/index.php?title=GNU_Backgammon&action=edit&redlink=1
|
||||
https://en.wikipedia.org/w/index.php?title=Golden_Age_of_Civilizations&action=edit&redlink=1
|
||||
https://en.wikipedia.org/w/index.php?title=Kdegames&action=edit&redlink=1
|
||||
@ -223,6 +224,7 @@ https://github.com/collections/tools-for-open-source (maybe we can apply some)
|
||||
https://github.com/collections/web-games (only OS)
|
||||
https://github.com/collinhover/kaiopua
|
||||
https://github.com/cookgreen/Yuris-Revenge
|
||||
https://github.com/corewar/corewar.io
|
||||
https://github.com/Cortrah/SpaceOperaDesign, https://github.com/Cortrah/SpaceOperaRuby/blob/master/design/turnstyles.md
|
||||
https://github.com/cping/LGame
|
||||
https://github.com/cymonsgames/CymonsGames (collection)
|
||||
@ -286,6 +288,7 @@ https://github.com/libretro/libretro-chailove
|
||||
https://github.com/libretro/libretro-prboom
|
||||
https://github.com/ligurio/awesome-ttygames
|
||||
https://github.com/luciopanepinto/pacman
|
||||
https://github.com/luciusDXL/TheForceEngine
|
||||
https://github.com/MarcoLizza/tofu-engine
|
||||
https://github.com/MarilynDafa/Bulllord-Engine
|
||||
https://github.com/MatthewTheGlutton/HideousDestructor
|
||||
@ -301,6 +304,7 @@ https://github.com/MustaphaTR/Romanovs-Vengeance
|
||||
https://github.com/MyGUI/mygui
|
||||
https://github.com/MyreMylar/pygame_gui
|
||||
https://github.com/nCine/nCine
|
||||
https://github.com/Noesis/UE4-ShooterGame
|
||||
https://github.com/ogarcia/opensudoku
|
||||
https://github.com/OGRECave/scape
|
||||
https://github.com/OpenHV/OpenHV
|
||||
@ -325,6 +329,7 @@ https://github.com/prime31/Nez-Samples
|
||||
https://github.com/psuong/ig-developer-console
|
||||
https://github.com/qiciengine/qiciengine
|
||||
https://github.com/Quaver/Wobble
|
||||
https://github.com/quinnvoker/qurobullet
|
||||
https://github.com/rakugoteam/Rakugo
|
||||
https://github.com/rds1983/Myra
|
||||
https://github.com/redomar/JavaGame
|
||||
@ -366,6 +371,7 @@ https://github.com/Tinob/Ishiiruka (https://github.com/shiiion/Ishiiruka, https:
|
||||
https://github.com/tizian/Cendric2
|
||||
https://github.com/TomBebb/awe
|
||||
https://github.com/topics/top-down-shooter
|
||||
https://github.com/uberspot/2048-android
|
||||
https://github.com/untakenstupidnick/nbsdgames (Blockout II)
|
||||
https://github.com/untakenstupidnick/nbsdgames (Cross-platform ncurses/pdcurses based games under active development)
|
||||
https://github.com/UnterrainerInformatik/GameDevelopmentLinks
|
||||
|
@ -295,7 +295,9 @@ def clean_lgw_content():
|
||||
|
||||
# statistics before
|
||||
print('field contents before')
|
||||
fields = sorted(list(unique_fields - set(('description', 'external links', 'dev home', 'forum', 'home', 'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name', 'repo', 'Release date', 'categories'))))
|
||||
fields = sorted(list(unique_fields - {'description', 'external links', 'dev home', 'forum', 'home',
|
||||
'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name',
|
||||
'repo', 'Release date', 'categories'}))
|
||||
for field in fields:
|
||||
content = [entry[field] for entry in entries if field in entry]
|
||||
# flatten
|
||||
@ -345,7 +347,9 @@ def clean_lgw_content():
|
||||
|
||||
# list for every unique field
|
||||
print('\nfield contents after')
|
||||
fields = sorted(list(unique_fields - set(('description', 'external links', 'dev home', 'forum', 'home', 'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name', 'repo', 'Release date', 'categories'))))
|
||||
fields = sorted(list(unique_fields - {'description', 'external links', 'dev home', 'forum', 'home',
|
||||
'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name',
|
||||
'repo', 'Release date', 'categories'}))
|
||||
for field in fields:
|
||||
content = [entry[field] for entry in entries if field in entry]
|
||||
# flatten
|
||||
|
@ -9,6 +9,7 @@
|
||||
"""
|
||||
|
||||
import urllib.request
|
||||
import requests
|
||||
import http.client
|
||||
import datetime
|
||||
import json
|
||||
@ -137,52 +138,57 @@ def check_validity_external_links():
|
||||
from time to time.
|
||||
"""
|
||||
|
||||
# TODO check if links are occurring in multiple entries, first go through all entries and find all links, then check links for multiple entries, then check links, follow redirects
|
||||
|
||||
print("check external links (can take a while)")
|
||||
# TODO Gitorius works in principle but onyl without SSL verify (requests probably can do that)
|
||||
|
||||
# regex for finding urls (can be in <> or in ]() or after a whitespace
|
||||
regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n\)]")
|
||||
# regex = re.compile(r"[\s\n<(](http://.*?)[\s\n>)]")
|
||||
|
||||
# count
|
||||
number_checked_links = 0
|
||||
|
||||
# ignore the following urls (they give false positives here)
|
||||
ignored_urls = ('https://git.tukaani.org/xz.git',)
|
||||
|
||||
# iterate over all entries
|
||||
for _, entry_path, content in osg.entry_iterator():
|
||||
# ignore the following patterns (they give false positives here)
|
||||
ignored_urls = ('https://git.tukaani.org/xz.git', 'https://git.code.sf.net/p/')
|
||||
|
||||
# extract all links from entries
|
||||
urls = {}
|
||||
for entry, _, content in osg.entry_iterator():
|
||||
# apply regex
|
||||
matches = regex.findall(content)
|
||||
|
||||
# for each match
|
||||
for match in matches:
|
||||
|
||||
# for each possible clause
|
||||
for url in match:
|
||||
if url and not any((url.startswith(x) for x in ignored_urls)):
|
||||
# github and gitlab git URLs are shortened to not contain .git
|
||||
if any((url.startswith(x) for x in ('https://github.com/', 'https://gitlab.com/', 'https://salsa.debian.org/', 'https://src.fedoraproject.org/', 'https://gitlab.gnome.org/GNOME/'))) and url.endswith('.git'):
|
||||
url = url[:-4]
|
||||
if url.startswith('https://svn.code.sf.net/p/') and url.endswith('code'):
|
||||
url = url + '/'
|
||||
if url.startswith('https://bitbucket.org/') and url.endswith('.git'):
|
||||
url = url[:-4] + '/commits/'
|
||||
if url.startswith('https://svn.code.sf.net/p/'):
|
||||
url = 'http' + url[5:]
|
||||
if url.startswith('https://git.savannah.gnu.org/git/'):
|
||||
url = url + '/'
|
||||
|
||||
# if there was something (and not a sourceforge git url)
|
||||
if url and not url.startswith('https://git.code.sf.net/p/') and url not in ignored_urls:
|
||||
try:
|
||||
# without a special header, frequent 403 responses occur
|
||||
req = urllib.request.Request(url,
|
||||
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'})
|
||||
urllib.request.urlopen(req)
|
||||
except urllib.error.HTTPError as e:
|
||||
print("{}: {} - {}".format(os.path.basename(entry_path), url, e.code))
|
||||
except urllib.error.URLError as e:
|
||||
print("{}: {} - {}".format(os.path.basename(entry_path), url, e.reason))
|
||||
except http.client.RemoteDisconnected:
|
||||
print("{}: {} - disconnected without response".format(os.path.basename(entry_path), url))
|
||||
if url in urls:
|
||||
urls[url].add(entry)
|
||||
else:
|
||||
urls[url] = {entry}
|
||||
print('found {} unique links'.format(len(urls)))
|
||||
print("start checking external links (can take a while)")
|
||||
|
||||
number_checked_links += 1
|
||||
|
||||
if number_checked_links % 50 == 0:
|
||||
print("{} links checked".format(number_checked_links))
|
||||
|
||||
print("{} links checked".format(number_checked_links))
|
||||
# now iterate over all urls
|
||||
for index, url in enumerate(urls.keys()):
|
||||
try:
|
||||
r = requests.head(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=10, allow_redirects=True)
|
||||
# check for bad status
|
||||
if r.status_code != requests.codes.ok:
|
||||
print('{}: URL {} in entry {} has status {}'.format(index, url, urls[url], r.status_code))
|
||||
# check for redirect
|
||||
if r.history:
|
||||
print('{}: URL {} in entry {} was redirected to {}'.format(index, url, urls[url], r.url))
|
||||
except Exception as e:
|
||||
print('{}: URL {} in entry {} gave error {}'.format(index, url, urls[url], e))
|
||||
# print regular updates
|
||||
if index > 0 and index % 100 == 0:
|
||||
print('{} / {}'.format(index, len(urls)))
|
||||
|
||||
|
||||
def check_template_leftovers():
|
||||
@ -915,31 +921,35 @@ def check_code_dependencies(infos):
|
||||
|
||||
"""
|
||||
|
||||
# get all names
|
||||
names = [x['name'] for x in infos]
|
||||
# get all names of frameworks and library also using osg.code_dependencies_aliases
|
||||
valid_dependencies = list(osg.code_dependencies_without_entry.keys())
|
||||
for info in infos:
|
||||
if any((x in ('framework', 'library', 'game engine') for x in info['keywords'])):
|
||||
name = info['name']
|
||||
if name in osg.code_dependencies_aliases:
|
||||
valid_dependencies.extend(osg.code_dependencies_aliases[name])
|
||||
else:
|
||||
valid_dependencies.append(name)
|
||||
|
||||
# TODO get all names of frameworks and libraries only and use osg.code_dependencies_aliases
|
||||
|
||||
# get all code dependencies
|
||||
dependencies = {}
|
||||
# get all referenced code dependencies
|
||||
referenced_dependencies = {}
|
||||
for info in infos:
|
||||
deps = info.get('code dependencies', [])
|
||||
for dependency in deps:
|
||||
if dependency in dependencies:
|
||||
dependencies[dependency] += 1
|
||||
if dependency in referenced_dependencies:
|
||||
referenced_dependencies[dependency] += 1
|
||||
else:
|
||||
dependencies[dependency] = 1
|
||||
referenced_dependencies[dependency] = 1
|
||||
|
||||
# delete those that are in names
|
||||
dependencies = [(k, v) for k, v in dependencies.items() if
|
||||
k not in names and k not in osg.code_dependencies_without_entry]
|
||||
# delete those that are valid dependencies
|
||||
referenced_dependencies = [(k, v) for k, v in referenced_dependencies.items() if k not in valid_dependencies]
|
||||
|
||||
# sort by number
|
||||
dependencies.sort(key=lambda x: x[1], reverse=True)
|
||||
referenced_dependencies.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
# print out
|
||||
print('Code dependencies not included as entry')
|
||||
for dep in dependencies:
|
||||
for dep in referenced_dependencies:
|
||||
print('{} ({})'.format(*dep))
|
||||
|
||||
|
||||
@ -947,7 +957,7 @@ if __name__ == "__main__":
|
||||
|
||||
# check_validity_backlog()
|
||||
|
||||
# backlog
|
||||
# clean backlog
|
||||
game_urls = osg.extract_links()
|
||||
text = utils.read_text(os.path.join(c.root_path, 'code', 'rejected.txt'))
|
||||
regex = re.compile(r"\((http.*?)\)", re.MULTILINE)
|
||||
@ -984,10 +994,10 @@ if __name__ == "__main__":
|
||||
update_statistics(infos)
|
||||
|
||||
# update inspirations
|
||||
update_inspirations(infos)
|
||||
# update_inspirations(infos)
|
||||
|
||||
# update developers
|
||||
update_developer(infos)
|
||||
# update_developer(infos)
|
||||
|
||||
# update database for html table
|
||||
export_json(infos)
|
||||
@ -999,11 +1009,10 @@ if __name__ == "__main__":
|
||||
check_code_dependencies(infos)
|
||||
|
||||
# collect list of git code repositories (only one per project) for git_statistics script
|
||||
# export_git_code_repositories_json()
|
||||
export_git_code_repositories_json()
|
||||
|
||||
# check external links (only rarely)
|
||||
# check_validity_external_links()
|
||||
|
||||
# sort backlog and rejected
|
||||
# sort_text_file(os.path.join(c.root_path, 'code', 'backlog.txt'), 'backlog')
|
||||
# sort rejected games list file
|
||||
sort_text_file(os.path.join(c.root_path, 'code', 'rejected.txt'), 'rejected games list')
|
||||
|
@ -87,14 +87,16 @@ known_multiplayer_modes = (
|
||||
'competitive', 'co-op', 'hotseat', 'LAN', 'local', 'massive', 'matchmaking', 'online', 'split-screen')
|
||||
|
||||
# TODO put the abbreviations directly in the name line (parenthesis maybe), that is more natural
|
||||
code_dependencies_aliases = {'Simple DirectMedia Layer': ('SDL', 'SDL2'), 'Simple and Fast Multimedia Library': 'SFML',
|
||||
'Boost (C++ Libraries)': 'Boost', 'SGE Game Engine': 'SGE'}
|
||||
code_dependencies_aliases = {'Simple DirectMedia Layer': ('SDL', 'SDL2'), 'Simple and Fast Multimedia Library': ('SFML',),
|
||||
'Boost (C++ Libraries)': ('Boost',), 'SGE Game Engine': ('SGE',), 'MegaGlest': ('MegaGlest Engine',)}
|
||||
code_dependencies_without_entry = {'OpenGL': 'https://www.opengl.org/',
|
||||
'GLUT': 'https://www.opengl.org/resources/libraries/',
|
||||
'WebGL': 'https://www.khronos.org/webgl/',
|
||||
'Unity': 'https://unity.com/solutions/game',
|
||||
'.NET': 'https://dotnet.microsoft.com/', 'Vulkan': 'https://www.khronos.org/vulkan/',
|
||||
'KDE Frameworks': 'https://kde.org/products/frameworks/'}
|
||||
'KDE Frameworks': 'https://kde.org/products/frameworks/',
|
||||
'jQuery': 'https://jquery.com/',
|
||||
'node.js': 'https://nodejs.org/en/'}
|
||||
|
||||
regex_sanitize_name = re.compile(r"[^A-Za-z 0-9-+]+")
|
||||
regex_sanitize_name_space_eater = re.compile(r" +")
|
||||
|
Reference in New Issue
Block a user