rename of folder tools to code
This commit is contained in:
47
code/is_already_included.py
Normal file
47
code/is_already_included.py
Normal file
@ -0,0 +1,47 @@
|
||||
"""
|
||||
Checks a list of game names (comma separated in text file) if they are already included in the database.
|
||||
Is fuzzy, i.e. accepts a certain similarity of names.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from difflib import SequenceMatcher
|
||||
from utils.utils import *
|
||||
|
||||
def similarity(a, b):
|
||||
return SequenceMatcher(None, a, b).ratio()
|
||||
|
||||
if __name__ == "__main__":
|
||||
similarity_threshold = 0.7
|
||||
|
||||
root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
|
||||
|
||||
# read docs/data.json
|
||||
data_file = os.path.join(root_path, 'docs', 'data.json')
|
||||
text = read_text(data_file)
|
||||
data = json.loads(text)
|
||||
|
||||
# extract game names
|
||||
data = data['data']
|
||||
data = (x[0] for x in data)
|
||||
existing_names = list(re.sub(r' \([^)]*\)', '', x) for x in data)
|
||||
|
||||
# read names to test
|
||||
test_file = os.path.join(root_path, 'is_already_included.txt')
|
||||
text = read_text(test_file)
|
||||
test_names = text.split(', ')
|
||||
|
||||
# loop over all test names
|
||||
for test_name in test_names:
|
||||
matches = []
|
||||
# loop over all existing names
|
||||
for existing_name in existing_names:
|
||||
s = similarity(test_name.lower(), existing_name.lower())
|
||||
if s > similarity_threshold:
|
||||
matches.append('{} ({:.2f})'.format(existing_name, s))
|
||||
# were matches found
|
||||
if matches:
|
||||
print('{} maybe included in {}'.format(test_name, ', '.join(matches)))
|
||||
else:
|
||||
print('{} not included'.format(test_name))
|
||||
|
Reference in New Issue
Block a user