check of external links now with redirects

This commit is contained in:
Trilarion
2020-08-14 14:04:19 +02:00
parent b61e189f49
commit bd8d411f1d
106 changed files with 271 additions and 248 deletions

View File

@ -129,6 +129,7 @@
"https://git.tuxfamily.org/rba/rogueboxadventures.git",
"https://git.xiph.org/vorbis.git",
"https://github.com/0ad/0ad.git",
"https://github.com/2006rebotted/2006rebotted.git",
"https://github.com/AdamAtomic/flixel.git",
"https://github.com/AdrienTD/wkbre.git",
"https://github.com/Afr0Games/Project-Dollhouse.git",
@ -416,7 +417,7 @@
"https://github.com/alexdantas/www.git",
"https://github.com/alexknvl/fonline.git",
"https://github.com/alphaonex86/CatchChallenger.git",
"https://github.com/alxm/a2x.git",
"https://github.com/alxm/faur.git",
"https://github.com/amerkoleci/alimer.git",
"https://github.com/amerkoleci/vortice.git",
"https://github.com/amroibrahim/DIYDoom.git",
@ -446,6 +447,7 @@
"https://github.com/apsillers/Taggem.git",
"https://github.com/apsillers/lords-of-the-fey.git",
"https://github.com/arcadia-xenos/progress-quest.git",
"https://github.com/ardentryst/ardentryst.git",
"https://github.com/arescentral/antares.git",
"https://github.com/arianne/stendhal.git",
"https://github.com/arx/ArxLibertatis.git",
@ -465,6 +467,7 @@
"https://github.com/blakeohare/pyweek-sentientstorage.git",
"https://github.com/blockattack/blockattack-game.git",
"https://github.com/bni/orbium.git",
"https://github.com/boardgameio/boardgame.io.git",
"https://github.com/boostorg/boost.git",
"https://github.com/bote-team/bote.git",
"https://github.com/bradharding/doomretro.git",
@ -526,7 +529,6 @@
"https://github.com/delight-im/OpenSoccer.git",
"https://github.com/demonixis/C3DE.git",
"https://github.com/dgengin/DGEngine.git",
"https://github.com/dginovker/2006rebotted.git",
"https://github.com/dhewm/dhewm3.git",
"https://github.com/diasurgical/devilution.git",
"https://github.com/diasurgical/devilutionX.git",
@ -619,7 +621,6 @@
"https://github.com/haroldo-ok/datastorm.git",
"https://github.com/hedgewars/hw.git",
"https://github.com/henkboom/pax-britannica.git",
"https://github.com/hhirsch/ardentryst.git",
"https://github.com/highfestiva/life.git",
"https://github.com/hinogi/eternalwinterwars.git",
"https://github.com/hypatia-software-org/hypatia-engine.git",
@ -748,7 +749,6 @@
"https://github.com/nenadalm/Train.git",
"https://github.com/nevat/abbayedesmorts-gpl.git",
"https://github.com/nhydock/UlDunAd.git",
"https://github.com/nicolodavis/boardgame.io.git",
"https://github.com/nicupavel/openpanzer.git",
"https://github.com/nigels-com/glew.git",
"https://github.com/nikki-and-the-robots/nikki.git",

View File

@ -157,6 +157,7 @@ https://eblong.com/zarf/twilight/index.html
https://edu.kde.org/
https://empiredirectory.net/
https://empiredirectory.net/index.php/downloads/viewdownload/6-server-software/13-empire-server
https://emulation.gametechwiki.com/index.php/Main_Page
https://en.wikipedia.org/w/index.php?title=GNU_Backgammon&action=edit&redlink=1
https://en.wikipedia.org/w/index.php?title=Golden_Age_of_Civilizations&action=edit&redlink=1
https://en.wikipedia.org/w/index.php?title=Kdegames&action=edit&redlink=1
@ -223,6 +224,7 @@ https://github.com/collections/tools-for-open-source (maybe we can apply some)
https://github.com/collections/web-games (only OS)
https://github.com/collinhover/kaiopua
https://github.com/cookgreen/Yuris-Revenge
https://github.com/corewar/corewar.io
https://github.com/Cortrah/SpaceOperaDesign, https://github.com/Cortrah/SpaceOperaRuby/blob/master/design/turnstyles.md
https://github.com/cping/LGame
https://github.com/cymonsgames/CymonsGames (collection)
@ -286,6 +288,7 @@ https://github.com/libretro/libretro-chailove
https://github.com/libretro/libretro-prboom
https://github.com/ligurio/awesome-ttygames
https://github.com/luciopanepinto/pacman
https://github.com/luciusDXL/TheForceEngine
https://github.com/MarcoLizza/tofu-engine
https://github.com/MarilynDafa/Bulllord-Engine
https://github.com/MatthewTheGlutton/HideousDestructor
@ -301,6 +304,7 @@ https://github.com/MustaphaTR/Romanovs-Vengeance
https://github.com/MyGUI/mygui
https://github.com/MyreMylar/pygame_gui
https://github.com/nCine/nCine
https://github.com/Noesis/UE4-ShooterGame
https://github.com/ogarcia/opensudoku
https://github.com/OGRECave/scape
https://github.com/OpenHV/OpenHV
@ -325,6 +329,7 @@ https://github.com/prime31/Nez-Samples
https://github.com/psuong/ig-developer-console
https://github.com/qiciengine/qiciengine
https://github.com/Quaver/Wobble
https://github.com/quinnvoker/qurobullet
https://github.com/rakugoteam/Rakugo
https://github.com/rds1983/Myra
https://github.com/redomar/JavaGame
@ -366,6 +371,7 @@ https://github.com/Tinob/Ishiiruka (https://github.com/shiiion/Ishiiruka, https:
https://github.com/tizian/Cendric2
https://github.com/TomBebb/awe
https://github.com/topics/top-down-shooter
https://github.com/uberspot/2048-android
https://github.com/untakenstupidnick/nbsdgames (Blockout II)
https://github.com/untakenstupidnick/nbsdgames (Cross-platform ncurses/pdcurses based games under active development)
https://github.com/UnterrainerInformatik/GameDevelopmentLinks

View File

@ -295,7 +295,9 @@ def clean_lgw_content():
# statistics before
print('field contents before')
fields = sorted(list(unique_fields - set(('description', 'external links', 'dev home', 'forum', 'home', 'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name', 'repo', 'Release date', 'categories'))))
fields = sorted(list(unique_fields - {'description', 'external links', 'dev home', 'forum', 'home',
'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name',
'repo', 'Release date', 'categories'}))
for field in fields:
content = [entry[field] for entry in entries if field in entry]
# flatten
@ -345,7 +347,9 @@ def clean_lgw_content():
# list for every unique field
print('\nfield contents after')
fields = sorted(list(unique_fields - set(('description', 'external links', 'dev home', 'forum', 'home', 'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name', 'repo', 'Release date', 'categories'))))
fields = sorted(list(unique_fields - {'description', 'external links', 'dev home', 'forum', 'home',
'linux-packages', 'developer', 'chat', 'tracker', 'Latest release', 'name',
'repo', 'Release date', 'categories'}))
for field in fields:
content = [entry[field] for entry in entries if field in entry]
# flatten

View File

@ -9,6 +9,7 @@
"""
import urllib.request
import requests
import http.client
import datetime
import json
@ -137,52 +138,57 @@ def check_validity_external_links():
from time to time.
"""
# TODO check if links are occurring in multiple entries, first go through all entries and find all links, then check links for multiple entries, then check links, follow redirects
print("check external links (can take a while)")
# TODO Gitorius works in principle but onyl without SSL verify (requests probably can do that)
# regex for finding urls (can be in <> or in ]() or after a whitespace
regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n\)]")
# regex = re.compile(r"[\s\n<(](http://.*?)[\s\n>)]")
# count
number_checked_links = 0
# ignore the following urls (they give false positives here)
ignored_urls = ('https://git.tukaani.org/xz.git',)
# iterate over all entries
for _, entry_path, content in osg.entry_iterator():
# ignore the following patterns (they give false positives here)
ignored_urls = ('https://git.tukaani.org/xz.git', 'https://git.code.sf.net/p/')
# extract all links from entries
urls = {}
for entry, _, content in osg.entry_iterator():
# apply regex
matches = regex.findall(content)
# for each match
for match in matches:
# for each possible clause
for url in match:
if url and not any((url.startswith(x) for x in ignored_urls)):
# github and gitlab git URLs are shortened to not contain .git
if any((url.startswith(x) for x in ('https://github.com/', 'https://gitlab.com/', 'https://salsa.debian.org/', 'https://src.fedoraproject.org/', 'https://gitlab.gnome.org/GNOME/'))) and url.endswith('.git'):
url = url[:-4]
if url.startswith('https://svn.code.sf.net/p/') and url.endswith('code'):
url = url + '/'
if url.startswith('https://bitbucket.org/') and url.endswith('.git'):
url = url[:-4] + '/commits/'
if url.startswith('https://svn.code.sf.net/p/'):
url = 'http' + url[5:]
if url.startswith('https://git.savannah.gnu.org/git/'):
url = url + '/'
# if there was something (and not a sourceforge git url)
if url and not url.startswith('https://git.code.sf.net/p/') and url not in ignored_urls:
try:
# without a special header, frequent 403 responses occur
req = urllib.request.Request(url,
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'})
urllib.request.urlopen(req)
except urllib.error.HTTPError as e:
print("{}: {} - {}".format(os.path.basename(entry_path), url, e.code))
except urllib.error.URLError as e:
print("{}: {} - {}".format(os.path.basename(entry_path), url, e.reason))
except http.client.RemoteDisconnected:
print("{}: {} - disconnected without response".format(os.path.basename(entry_path), url))
if url in urls:
urls[url].add(entry)
else:
urls[url] = {entry}
print('found {} unique links'.format(len(urls)))
print("start checking external links (can take a while)")
number_checked_links += 1
if number_checked_links % 50 == 0:
print("{} links checked".format(number_checked_links))
print("{} links checked".format(number_checked_links))
# now iterate over all urls
for index, url in enumerate(urls.keys()):
try:
r = requests.head(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}, timeout=10, allow_redirects=True)
# check for bad status
if r.status_code != requests.codes.ok:
print('{}: URL {} in entry {} has status {}'.format(index, url, urls[url], r.status_code))
# check for redirect
if r.history:
print('{}: URL {} in entry {} was redirected to {}'.format(index, url, urls[url], r.url))
except Exception as e:
print('{}: URL {} in entry {} gave error {}'.format(index, url, urls[url], e))
# print regular updates
if index > 0 and index % 100 == 0:
print('{} / {}'.format(index, len(urls)))
def check_template_leftovers():
@ -915,31 +921,35 @@ def check_code_dependencies(infos):
"""
# get all names
names = [x['name'] for x in infos]
# get all names of frameworks and library also using osg.code_dependencies_aliases
valid_dependencies = list(osg.code_dependencies_without_entry.keys())
for info in infos:
if any((x in ('framework', 'library', 'game engine') for x in info['keywords'])):
name = info['name']
if name in osg.code_dependencies_aliases:
valid_dependencies.extend(osg.code_dependencies_aliases[name])
else:
valid_dependencies.append(name)
# TODO get all names of frameworks and libraries only and use osg.code_dependencies_aliases
# get all code dependencies
dependencies = {}
# get all referenced code dependencies
referenced_dependencies = {}
for info in infos:
deps = info.get('code dependencies', [])
for dependency in deps:
if dependency in dependencies:
dependencies[dependency] += 1
if dependency in referenced_dependencies:
referenced_dependencies[dependency] += 1
else:
dependencies[dependency] = 1
referenced_dependencies[dependency] = 1
# delete those that are in names
dependencies = [(k, v) for k, v in dependencies.items() if
k not in names and k not in osg.code_dependencies_without_entry]
# delete those that are valid dependencies
referenced_dependencies = [(k, v) for k, v in referenced_dependencies.items() if k not in valid_dependencies]
# sort by number
dependencies.sort(key=lambda x: x[1], reverse=True)
referenced_dependencies.sort(key=lambda x: x[1], reverse=True)
# print out
print('Code dependencies not included as entry')
for dep in dependencies:
for dep in referenced_dependencies:
print('{} ({})'.format(*dep))
@ -947,7 +957,7 @@ if __name__ == "__main__":
# check_validity_backlog()
# backlog
# clean backlog
game_urls = osg.extract_links()
text = utils.read_text(os.path.join(c.root_path, 'code', 'rejected.txt'))
regex = re.compile(r"\((http.*?)\)", re.MULTILINE)
@ -984,10 +994,10 @@ if __name__ == "__main__":
update_statistics(infos)
# update inspirations
update_inspirations(infos)
# update_inspirations(infos)
# update developers
update_developer(infos)
# update_developer(infos)
# update database for html table
export_json(infos)
@ -999,11 +1009,10 @@ if __name__ == "__main__":
check_code_dependencies(infos)
# collect list of git code repositories (only one per project) for git_statistics script
# export_git_code_repositories_json()
export_git_code_repositories_json()
# check external links (only rarely)
# check_validity_external_links()
# sort backlog and rejected
# sort_text_file(os.path.join(c.root_path, 'code', 'backlog.txt'), 'backlog')
# sort rejected games list file
sort_text_file(os.path.join(c.root_path, 'code', 'rejected.txt'), 'rejected games list')

View File

@ -87,14 +87,16 @@ known_multiplayer_modes = (
'competitive', 'co-op', 'hotseat', 'LAN', 'local', 'massive', 'matchmaking', 'online', 'split-screen')
# TODO put the abbreviations directly in the name line (parenthesis maybe), that is more natural
code_dependencies_aliases = {'Simple DirectMedia Layer': ('SDL', 'SDL2'), 'Simple and Fast Multimedia Library': 'SFML',
'Boost (C++ Libraries)': 'Boost', 'SGE Game Engine': 'SGE'}
code_dependencies_aliases = {'Simple DirectMedia Layer': ('SDL', 'SDL2'), 'Simple and Fast Multimedia Library': ('SFML',),
'Boost (C++ Libraries)': ('Boost',), 'SGE Game Engine': ('SGE',), 'MegaGlest': ('MegaGlest Engine',)}
code_dependencies_without_entry = {'OpenGL': 'https://www.opengl.org/',
'GLUT': 'https://www.opengl.org/resources/libraries/',
'WebGL': 'https://www.khronos.org/webgl/',
'Unity': 'https://unity.com/solutions/game',
'.NET': 'https://dotnet.microsoft.com/', 'Vulkan': 'https://www.khronos.org/vulkan/',
'KDE Frameworks': 'https://kde.org/products/frameworks/'}
'KDE Frameworks': 'https://kde.org/products/frameworks/',
'jQuery': 'https://jquery.com/',
'node.js': 'https://nodejs.org/en/'}
regex_sanitize_name = re.compile(r"[^A-Za-z 0-9-+]+")
regex_sanitize_name_space_eater = re.compile(r" +")