update Github information (devs and project stars)
This commit is contained in:
48
code/helpers/is_already_included.py
Normal file
48
code/helpers/is_already_included.py
Normal file
@ -0,0 +1,48 @@
|
||||
"""
|
||||
Checks a list of game names (comma separated in text file) if they are already included in the database.
|
||||
Is fuzzy, i.e. accepts a certain similarity of names.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from difflib import SequenceMatcher
|
||||
from utils.utils import *
|
||||
|
||||
|
||||
def similarity(a, b):
|
||||
return SequenceMatcher(None, a, b).ratio()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
similarity_threshold = 0.7
|
||||
|
||||
root_path = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
|
||||
|
||||
# read docs/data.json
|
||||
data_file = os.path.join(root_path, 'docs', 'data.json')
|
||||
text = read_text(data_file)
|
||||
data = json.loads(text)
|
||||
|
||||
# extract game names
|
||||
data = data['data']
|
||||
data = (x[0] for x in data)
|
||||
existing_names = list(re.sub(r' \([^)]*\)', '', x) for x in data)
|
||||
|
||||
# read names to test
|
||||
test_file = os.path.join(root_path, 'is_already_included.txt')
|
||||
text = read_text(test_file)
|
||||
test_names = text.split(', ')
|
||||
|
||||
# loop over all test names
|
||||
for test_name in test_names:
|
||||
matches = []
|
||||
# loop over all existing names
|
||||
for existing_name in existing_names:
|
||||
s = similarity(test_name.lower(), existing_name.lower())
|
||||
if s > similarity_threshold:
|
||||
matches.append('{} ({:.2f})'.format(existing_name, s))
|
||||
# were matches found
|
||||
if matches:
|
||||
print('{} maybe included in {}'.format(test_name, ', '.join(matches)))
|
||||
else:
|
||||
print('{} not included'.format(test_name))
|
95
code/helpers/list_python_external_imports.py
Normal file
95
code/helpers/list_python_external_imports.py
Normal file
@ -0,0 +1,95 @@
|
||||
"""
|
||||
Where no requirements.txt or setup.py or other information is given for a Python project, get an idea of the external dependencies
|
||||
by parsing the Python files and looking for import statements.
|
||||
"""
|
||||
|
||||
import re
|
||||
from utils.utils import *
|
||||
|
||||
|
||||
def local_module(module_base, file_path, module):
|
||||
"""
|
||||
|
||||
"""
|
||||
module = module.split('.')
|
||||
module[-1] += '.py'
|
||||
pathA = os.path.join(module_base, *module)
|
||||
pathB = os.path.join(file_path, *module)
|
||||
return os.path.exists(pathA) or os.path.exists(pathB)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
system_libraries = {'__builtin__', '.', '..', '*', 'argparse', 'array', 'os', 'copy', 'codecs', 'collections',
|
||||
'ctypes', 'pickle', 'cPickle', 'datetime', 'decimal', 'email', 'functools',
|
||||
'io', 'itertools', 'json', 'httplib', 'glob', 'math', 'cmath', 'heapq', 'md5', 'operator',
|
||||
'random', 're', 'sha', 'shutil', 'smtplib', 'socket', 'string', 'struct', 'subprocess',
|
||||
'sys', 'thread', 'threading', 'time', 'traceback', 'types', 'urllib', 'urllib2', 'urlparse',
|
||||
'unittest', 'yaml', 'yaml3', 'zlib', 'zipfile', '__future__'}
|
||||
regex_import = re.compile(r"^\s*import (.*)", re.MULTILINE)
|
||||
regex_from = re.compile(r"^\s*from (.*) import (.*)", re.MULTILINE)
|
||||
regex_comment = re.compile(r"(#.*)$", re.MULTILINE)
|
||||
regex_as = re.compile(r"(as.*)$", re.MULTILINE)
|
||||
|
||||
# modify these locations
|
||||
root_folder = r''
|
||||
module_base = r''
|
||||
|
||||
# get all *.py files below the root_folder
|
||||
python_files = []
|
||||
setup_files = []
|
||||
for dirpath, dirnames, filenames in os.walk(root_folder):
|
||||
for file in ('setup.py', 'requirements.txt'):
|
||||
if file in filenames:
|
||||
setup_files.append(os.path.join(dirpath, file))
|
||||
filenames = [x for x in filenames if x.endswith('.py') or x.endswith('.pyw') or x.endswith('.cry')]
|
||||
if filenames:
|
||||
filenames = [os.path.join(dirpath, x) for x in filenames]
|
||||
python_files.extend(filenames)
|
||||
print('found {} Python files'.format(len(python_files)))
|
||||
if setup_files:
|
||||
print('found setup files: {}'.format(', '.join(setup_files)))
|
||||
|
||||
# iterate over all these files
|
||||
imports = []
|
||||
for file in python_files:
|
||||
|
||||
# get file path
|
||||
file_path = os.path.split(file)[0]
|
||||
|
||||
# read file content
|
||||
content = read_text(file)
|
||||
|
||||
# remove comments
|
||||
content = regex_comment.sub('', content)
|
||||
|
||||
# remove as clauses
|
||||
content = regex_as.sub('', content)
|
||||
|
||||
# search for "import .." statements
|
||||
matches = regex_import.findall(content)
|
||||
|
||||
for match in matches:
|
||||
modules = match.split(',') # split if more
|
||||
for module in modules:
|
||||
module = module.strip()
|
||||
if not local_module(module_base, file_path, module):
|
||||
imports.append(module)
|
||||
|
||||
# search for "from .. import .." statements
|
||||
matches = regex_from.findall(content)
|
||||
|
||||
for match in matches:
|
||||
module = match[0] # only the from part
|
||||
module = module.strip()
|
||||
if not local_module(module_base, file_path, module):
|
||||
imports.append(module)
|
||||
|
||||
# throw out duplicates
|
||||
imports = list(set(imports) - system_libraries)
|
||||
|
||||
# sort
|
||||
imports.sort()
|
||||
|
||||
# display
|
||||
print('\n'.join(imports))
|
Reference in New Issue
Block a user