Return a list of imported Python modules used in a script?
I am writing a program that categorizes a list of Python files by which modules they import. As such I need to scan the collection of .py files ad return a list of which modules they import. As an example, if one of the files I import has the following lines:
import os
import sys, gtk
I would like it to return:
["os", "sys", "gtk"]
I played with modulefinder and wrote:
from modulefinder import ModuleFinder
finder = ModuleFinder()
finder.run_script('testscript.py')
print 'Loaded modules:'
for name, mod in finder.modules.iteritems():
print '%s ' % name,
but this returns more than just the modules used in the script. As an example in a script which merely has:
import os
print os.getenv('USERNAME')
The modules returned from the ModuleFinder script return:
tokenize heapq __future__ copy_reg sre_compile _collections cStringIO _sre functools random cPickle __builtin__ subprocess cmd gc __main__ operator array select _heapq _threading_local abc _bisect posixpath _random os2emxpath tempfile errno pprint binascii token sre_constants re _abcoll collections ntpath threading opcode _struct _warnings math shlex fcntl genericpath stat string warnings UserDict inspect repr struct sys pwd imp getopt readline copy bdb types strop _functools keyword thread StringIO bisect pickle signal traceback difflib marshal linecache itertools dummy_thread posix doctest unittest time sre_parse os pdb dis
...whereas I just want it to return 'os', as that was the module used in the script.
Can anyone help me achieve this?
UPDATE: I just want to clarify that I would like to do this without running the Python file being analyzed, and just scanning the code.
IMO the best way todo this is to use the http://furius.ca/snakefood/ package. The author has done all of the required work to get not only directly imported modules but it uses the AST to parse the code for runtime dependencies that a more static analysis would miss.
Worked up a command example to demonstrate:
sfood ./example.py | sfood-cluster > example.deps
That will generate a basic dependency file of each unique module. For even more detail use:
sfood -r -i ./example.py | sfood-cluster > example.deps
To walk a tree and find all imports, you can also do this in code: Please NOTE - The AST chunks of this routine were lifted from the snakefood source which has this copyright: Copyright (C) 2001-2007 Martin Blais. All Rights Reserved.
import os
import compiler
from compiler.ast import Discard, Const
from compiler.visitor import ASTVisitor
def pyfiles(startPath):
r = []
d = os.path.abspath(startPath)
if os.path.exists(d) and os.path.isdir(d):
for root, dirs, files in os.walk(d):
for f in files:
n, ext = os.path.splitext(f)
if ext == '.py':
r.append([d, f])
return r
class ImportVisitor(object):
def __init__(self):
self.modules = []
self.recent = []
def visitImport(self, node):
self.accept_imports()
self.recent.extend((x[0], None, x[1] or x[0], node.lineno, 0)
for x in node.names)
def visitFrom(self, node):
self.accept_imports()
modname = node.modname
if modname == '__future__':
return # Ignore these.
for name, as_ in node.names:
if name == '*':
# We really don't know...
mod = (modname, None, None, node.lineno, node.level)
else:
mod = (modname, name, as_ or name, node.lineno, node.level)
self.recent.append(mod)
def default(self, node):
pragma = None
if self.recent:
if isinstance(node, Discard):
children = node.getChildren()
if len(children) == 1 and isinstance(children[0], Const):
const_node = children[0]
pragma = const_node.value
self.accept_imports(pragma)
def accept_imports(self, pragma=None):
self.modules.extend((m, r, l, n, lvl, pragma)
for (m, r, l, n, lvl) in self.recent)
self.recent = []
def finalize(self):
self.accept_imports()
return self.modules
class ImportWalker(ASTVisitor):
def __init__(self, visitor):
ASTVisitor.__init__(self)
self._visitor = visitor
def default(self, node, *args):
self._visitor.default(node)
ASTVisitor.default(self, node, *args)
def parse_python_source(fn):
contents = open(fn, 'rU').read()
ast = compiler.parse(contents)
vis = ImportVisitor()
compiler.walk(ast, vis, ImportWalker(vis))
return vis.finalize()
for d, f in pyfiles('/Users/bear/temp/foobar'):
print d, f
print parse_python_source(os.path.join(d, f))
I recently needed all the dependencies for a given python script and I took a different approach than the other answers. I only cared about top level module module names (eg, I wanted foo
from import foo.bar
).
This is the code using the ast module:
import ast
modules = set()
def visit_Import(node):
for name in node.names:
modules.add(name.name.split(".")[0])
def visit_ImportFrom(node):
# if node.module is missing it's a "from . import ..." statement
# if level > 0 it's a "from .submodule import ..." statement
if node.module is not None and node.level == 0:
modules.add(node.module.split(".")[0])
node_iter = ast.NodeVisitor()
node_iter.visit_Import = visit_Import
node_iter.visit_ImportFrom = visit_ImportFrom
Testing with a python file foo.py
that contains:
# foo.py
import sys, os
import foo1
from foo2 import bar
from foo3 import bar as che
import foo4 as boo
import foo5.zoo
from foo6 import *
from . import foo7, foo8
from .foo12 import foo13
from foo9 import foo10, foo11
def do():
import bar1
from bar2 import foo
from bar3 import che as baz
I could get all the modules in foo.py
by doing something like this:
with open("foo.py") as f:
node_iter.visit(ast.parse(f.read()))
print(modules)
which would give me this output:
set(['bar1', 'bar3', 'bar2', 'sys', 'foo9', 'foo4', 'foo5', 'foo6', 'os', 'foo1', 'foo2', 'foo3'])
You might want to try dis (pun intended):
import dis
from collections import defaultdict
from pprint import pprint
statements = """
from __future__ import (absolute_import,
division)
import os
import collections, itertools
from math import *
from gzip import open as gzip_open
from subprocess import check_output, Popen
"""
instructions = dis.get_instructions(statements)
imports = [__ for __ in instructions if 'IMPORT' in __.opname]
grouped = defaultdict(list)
for instr in imports:
grouped[instr.opname].append(instr.argval)
pprint(grouped)
outputs
defaultdict(<class 'list'>,
{'IMPORT_FROM': ['absolute_import',
'division',
'open',
'check_output',
'Popen'],
'IMPORT_NAME': ['__future__',
'os',
'collections',
'itertools',
'math',
'gzip',
'subprocess'],
'IMPORT_STAR': [None]})
Your imported modules are grouped['IMPORT_NAME']
.
It depends how thorough you want to be. Used modules is a turing complete problem: some python code uses lazy importing to only import things they actually use on a particular run, some generate things to import dynamically (e.g. plugin systems).
python -v will trace import statements - its arguably the simplest thing to check.
This works - using importlib to actually import the module, and inspect to get the members :
#! /usr/bin/env python
#
# test.py
#
# Find Modules
#
import inspect, importlib as implib
if __name__ == "__main__":
mod = implib.import_module( "example" )
for i in inspect.getmembers(mod, inspect.ismodule ):
print i[0]
#! /usr/bin/env python
#
# example.py
#
import sys
from os import path
if __name__ == "__main__":
print "Hello World !!!!"
Output :
tony@laptop .../~:$ ./test.py
path
sys