Created
October 23, 2021 07:02
-
-
Save jwilk/c7b23a11473776088f9a3ef12573f476 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# | |
# Copyright (c) 2008 Sandro Tosi <[email protected]> | |
# Copyright (c) 2009-2010 Luca Falavigna <[email protected]> | |
# License: Public Domain | |
# | |
# | |
# This tool is written to help Debian Python Modules/Apps Teams | |
# packagers identify what debian packages are needed to execute the | |
# tool being packaged. | |
# | |
# It archives it by identify the import statements and then by | |
# searching on the installed packages which one contains the Python | |
# file of the module. | |
# | |
# NOTE: the biggest wickness is that it can identify only modules for | |
# installed packages; to help you still identify the needed modules, | |
# it prints the list of not verified modules. | |
# | |
# TODO: | |
# * add support for __import__ function (even if it's done only on | |
# some rare cases), it's just another check while parsing AST | |
# * it's not PEP8 complient (pardon me...) | |
# * better formatting of output: | |
# - instead of print <module>: (<deb pkg>, # of occurrence) it | |
# might be better something like <deb pkg>: ((<mod1>,<mod2>,...) | |
# sum of # of occurrence) | |
# * possible package suggestions for unidentified modules, like check | |
# if exists a debpkg name "python-<module>" | |
# * fix all the other tons of bugs I made here and there :) | |
# http://docs.python.org/library/compiler.html | |
# deprecated in 2.6, removed in 3.0, but we are still on 2.5... | |
import compiler | |
# using it to type checking the object in the AST | |
from compiler.ast import Import, From | |
import sys | |
import glob | |
import os | |
import dircache | |
import stat | |
import subprocess | |
import re | |
python_shebang = re.compile('#!/usr/bin/(env |)?python(\d\.\d|)?$') | |
class ImportNotFound(Exception): | |
"""Errors in importing a module""" | |
pass | |
class ImportParseError(Exception): | |
"""Errors accessing module information""" | |
pass | |
def convert_import_to_debian_pkg(imp): | |
"""Tries to identify the Debian package from the module name""" | |
try: | |
# import the module, mapping it to 'mod' | |
mod = __import__(imp) | |
# accessing the __file__ attribute | |
file = mod.__file__ | |
# adjust the extension | |
file = file.replace('.pyc','.py') | |
# obtain the mode to know if it's a symlink | |
# lstat doesn't follow symlinks | |
mode = os.lstat(file)[stat.ST_MODE] | |
if stat.S_ISLNK(mode): | |
# if it's a symlink, follow it | |
file = os.readlink(file) | |
# exec dpkg -S to obtain the package containing file | |
proc = subprocess.Popen("dpkg -S " + file + " | awk -F':' '{ print $1 }'", | |
shell=True, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
) | |
# get (stdout, stderr) tuple | |
output = proc.communicate() | |
# if no stderr | |
if not output[1]: | |
pkg = output[0].replace('\n','') | |
# else, raise exp | |
else: | |
raise ImportNotFound() | |
return pkg | |
except ImportError, e: | |
# module not found | |
print "E: ImportError while checking %s; exception: %s" % (imp, str(e)) | |
raise ImportNotFound() | |
except Exception, e: | |
# __file__ attribute doesn't exist or any other error | |
print "E: error while checking %s; exception: %s" % (imp, str(e)) | |
raise ImportParseError() | |
def find_py_files_in_dir(path): | |
"""Find all py files in a given directory; thanks to recipe 2.19 from Python Cookbook""" | |
# pattern matching any case of "py" extension | |
for match in glob.glob(os.path.join(path, "*.[Pp][Yy]")): | |
yield match | |
def find_py_shebang_in_dir(path): | |
"""Find all files with Python shebang in a given directory""" | |
# pattern matching any case of "py" extension | |
for match in glob.glob(os.path.join(path, "*")): | |
if not os.path.isfile(match): | |
continue | |
try: | |
f = open(match) | |
except IOError: | |
continue | |
shebang = f.readline() | |
f.close() | |
if re.match(python_shebang, shebang): | |
yield match | |
def find_py_files_in_dir_recursive(path): | |
"""Find all py files in a given directory, then go recursing subdirs""" | |
# check first in the dir passed as parameter | |
for match in find_py_files_in_dir(path): | |
yield match | |
for match in find_py_shebang_in_dir(path): | |
yield match | |
# dircache output is sorted and cached | |
# let's join path and item, since files list | |
# returned from listdir has path stripped off | |
for subpath in [os.path.join(path, item) for item in dircache.listdir(path)]: | |
# if it's a dir, then go recursive on it | |
if os.path.isdir(subpath): | |
# yield every item found in the recursive call! | |
for subfile in find_py_files_in_dir_recursive(subpath): | |
yield subfile | |
def parse_file_import(data): | |
try: | |
# scan nodes... | |
for child in data.getChildren(): | |
# ... until you reach an Import object... | |
if isinstance(child, Import): | |
# ... then add it to import dict | |
for name, alias in child.names: | |
add_value_to_dict(import_dict, name, 1) | |
# the same for From objects | |
elif isinstance(child, From): | |
add_value_to_dict(import_dict, child.modname, 1) | |
else: | |
# if object is not From or Import, check his childred | |
parse_file_import(child) | |
except: | |
pass | |
def add_value_to_dict(dict, key, value): | |
"""Adds value to dict[key], or add the item if missing""" | |
if key in dict: | |
dict[key] += value | |
else: | |
dict[key] = value | |
# main | |
import_dict = {} | |
mod_pkgs = {} | |
mod_not_found = {} | |
# main file parse loop | |
for file in find_py_files_in_dir_recursive(sys.argv[1]): | |
try: | |
# parses the syntax tree | |
parse_file_import(compiler.parseFile(file)) | |
except Exception, e: | |
print "Error parsing " + file + "; exception: " + str(e) | |
# loop to identify the deb pkg containg each module, or add to discards list | |
for module, count in import_dict.iteritems(): | |
try: | |
pkg = convert_import_to_debian_pkg(module) | |
mod_pkgs[module] = (pkg, count) | |
except ImportNotFound: | |
mod_not_found[module] = ('module not found on this machine', count) | |
except ImportParseError: | |
mod_not_found[module] = ('error parsing module', count) | |
# temporary output printing | |
import pprint | |
pprint.pprint(mod_pkgs) | |
pprint.pprint(mod_not_found) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment