additions from backlog

This commit is contained in:
Trilarion
2019-09-08 12:02:45 +02:00
parent 7c32a60300
commit 1581152bc3
26 changed files with 370 additions and 604 deletions

View File

@ -223,4 +223,31 @@ def assemble_infos(games_path):
# add to list
infos.append(info)
return infos
return infos
def extract_links(games_path):
"""
Parses all entries and extracts http(s) links from them
"""
# regex for finding urls (can be in <> or in ]() or after a whitespace
regex = re.compile(r"[\s\n]<(http.+?)>|\]\((http.+?)\)|[\s\n](http[^\s\n,]+?)[\s\n,]")
# iterate over all entries
urls = set()
for _, _, content in entry_iterator(games_path):
# apply regex
matches = regex.findall(content)
# for each match
for match in matches:
# for each possible clause
for url in match:
# if there was something (and not a sourceforge git url)
if url:
urls.add(url)
urls = sorted(list(urls), key=str.casefold)
return urls