1 """Find modules used by a script, using introspection."""
2 
3 import dis
4 import importlib._bootstrap_external
5 import importlib.machinery
6 import marshal
7 import os
8 import io
9 import sys
10 
11 # Old imp constants:
12 
13 _SEARCH_ERROR = 0
14 _PY_SOURCE = 1
15 _PY_COMPILED = 2
16 _C_EXTENSION = 3
17 _PKG_DIRECTORY = 5
18 _C_BUILTIN = 6
19 _PY_FROZEN = 7
20 
21 # Modulefinder does a good job at simulating Python's, but it can not
22 # handle __path__ modifications packages make at runtime.  Therefore there
23 # is a mechanism whereby you can register extra paths in this map for a
24 # package, and it will be honored.
25 
26 # Note this is a mapping is lists of paths.
27 packagePathMap = {}
28 
29 # A Public interface
30 def AddPackagePath(packagename, path):
31     packagePathMap.setdefault(packagename, []).append(path)
32 
33 replacePackageMap = {}
34 
35 # This ReplacePackage mechanism allows modulefinder to work around
36 # situations in which a package injects itself under the name
37 # of another package into sys.modules at runtime by calling
38 # ReplacePackage("real_package_name", "faked_package_name")
39 # before running ModuleFinder.
40 
41 def ReplacePackage(oldname, newname):
42     replacePackageMap[oldname] = newname
43 
44 
45 def _find_module(name, path=None):
46     """An importlib reimplementation of imp.find_module (for our purposes)."""
47 
48     # It's necessary to clear the caches for our Finder first, in case any
49     # modules are being added/deleted/modified at runtime. In particular,
50     # test_modulefinder.py changes file tree contents in a cache-breaking way:
51 
52     importlib.machinery.PathFinder.invalidate_caches()
53 
54     spec = importlib.machinery.PathFinder.find_spec(name, path)
55 
56     if spec is None:
57         raise ImportError("No module named {name!r}".format(name=name), name=name)
58 
59     # Some special cases:
60 
61     if spec.loader is importlib.machinery.BuiltinImporter:
62         return None, None, ("", "", _C_BUILTIN)
63 
64     if spec.loader is importlib.machinery.FrozenImporter:
65         return None, None, ("", "", _PY_FROZEN)
66 
67     file_path = spec.origin
68 
69     if spec.loader.is_package(name):
70         return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY)
71 
72     if isinstance(spec.loader, importlib.machinery.SourceFileLoader):
73         kind = _PY_SOURCE
74 
75     elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader):
76         kind = _C_EXTENSION
77 
78     elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader):
79         kind = _PY_COMPILED
80 
81     else:  # Should never happen.
82         return None, None, ("", "", _SEARCH_ERROR)
83 
84     file = io.open_code(file_path)
85     suffix = os.path.splitext(file_path)[-1]
86 
87     return file, file_path, (suffix, "rb", kind)
88 
89 
90 class Module:
91 
92     def __init__(self, name, file=None, path=None):
93         self.__name__ = name
94         self.__file__ = file
95         self.__path__ = path
96         self.__code__ = None
97         # The set of global names that are assigned to in the module.
98         # This includes those names imported through starimports of
99         # Python modules.
100         self.globalnames = {}
101         # The set of starimports this module did that could not be
102         # resolved, ie. a starimport from a non-Python module.
103         self.starimports = {}
104 
105     def __repr__(self):
106         s = "Module(%r" % (self.__name__,)
107         if self.__file__ is not None:
108             s = s + ", %r" % (self.__file__,)
109         if self.__path__ is not None:
110             s = s + ", %r" % (self.__path__,)
111         s = s + ")"
112         return s
113 
114 class ModuleFinder:
115 
116     def __init__(self, path=None, debug=0, excludes=None, replace_paths=None):
117         if path is None:
118             path = sys.path
119         self.path = path
120         self.modules = {}
121         self.badmodules = {}
122         self.debug = debug
123         self.indent = 0
124         self.excludes = excludes if excludes is not None else []
125         self.replace_paths = replace_paths if replace_paths is not None else []
126         self.processed_paths = []   # Used in debugging only
127 
128     def msg(self, level, str, *args):
129         if level <= self.debug:
130             for i in range(self.indent):
131                 print("   ", end=' ')
132             print(str, end=' ')
133             for arg in args:
134                 print(repr(arg), end=' ')
135             print()
136 
137     def msgin(self, *args):
138         level = args[0]
139         if level <= self.debug:
140             self.indent = self.indent + 1
141             self.msg(*args)
142 
143     def msgout(self, *args):
144         level = args[0]
145         if level <= self.debug:
146             self.indent = self.indent - 1
147             self.msg(*args)
148 
149     def run_script(self, pathname):
150         self.msg(2, "run_script", pathname)
151         with io.open_code(pathname) as fp:
152             stuff = ("", "rb", _PY_SOURCE)
153             self.load_module('__main__', fp, pathname, stuff)
154 
155     def load_file(self, pathname):
156         dir, name = os.path.split(pathname)
157         name, ext = os.path.splitext(name)
158         with io.open_code(pathname) as fp:
159             stuff = (ext, "rb", _PY_SOURCE)
160             self.load_module(name, fp, pathname, stuff)
161 
162     def import_hook(self, name, caller=None, fromlist=None, level=-1):
163         self.msg(3, "import_hook", name, caller, fromlist, level)
164         parent = self.determine_parent(caller, level=level)
165         q, tail = self.find_head_package(parent, name)
166         m = self.load_tail(q, tail)
167         if not fromlist:
168             return q
169         if m.__path__:
170             self.ensure_fromlist(m, fromlist)
171         return None
172 
173     def determine_parent(self, caller, level=-1):
174         self.msgin(4, "determine_parent", caller, level)
175         if not caller or level == 0:
176             self.msgout(4, "determine_parent -> None")
177             return None
178         pname = caller.__name__
179         if level >= 1: # relative import
180             if caller.__path__:
181                 level -= 1
182             if level == 0:
183                 parent = self.modules[pname]
184                 assert parent is caller
185                 self.msgout(4, "determine_parent ->", parent)
186                 return parent
187             if pname.count(".") < level:
188                 raise ImportError("relative importpath too deep")
189             pname = ".".join(pname.split(".")[:-level])
190             parent = self.modules[pname]
191             self.msgout(4, "determine_parent ->", parent)
192             return parent
193         if caller.__path__:
194             parent = self.modules[pname]
195             assert caller is parent
196             self.msgout(4, "determine_parent ->", parent)
197             return parent
198         if '.' in pname:
199             i = pname.rfind('.')
200             pname = pname[:i]
201             parent = self.modules[pname]
202             assert parent.__name__ == pname
203             self.msgout(4, "determine_parent ->", parent)
204             return parent
205         self.msgout(4, "determine_parent -> None")
206         return None
207 
208     def find_head_package(self, parent, name):
209         self.msgin(4, "find_head_package", parent, name)
210         if '.' in name:
211             i = name.find('.')
212             head = name[:i]
213             tail = name[i+1:]
214         else:
215             head = name
216             tail = ""
217         if parent:
218             qname = "%s.%s" % (parent.__name__, head)
219         else:
220             qname = head
221         q = self.import_module(head, qname, parent)
222         if q:
223             self.msgout(4, "find_head_package ->", (q, tail))
224             return q, tail
225         if parent:
226             qname = head
227             parent = None
228             q = self.import_module(head, qname, parent)
229             if q:
230                 self.msgout(4, "find_head_package ->", (q, tail))
231                 return q, tail
232         self.msgout(4, "raise ImportError: No module named", qname)
233         raise ImportError("No module named " + qname)
234 
235     def load_tail(self, q, tail):
236         self.msgin(4, "load_tail", q, tail)
237         m = q
238         while tail:
239             i = tail.find('.')
240             if i < 0: i = len(tail)
241             head, tail = tail[:i], tail[i+1:]
242             mname = "%s.%s" % (m.__name__, head)
243             m = self.import_module(head, mname, m)
244             if not m:
245                 self.msgout(4, "raise ImportError: No module named", mname)
246                 raise ImportError("No module named " + mname)
247         self.msgout(4, "load_tail ->", m)
248         return m
249 
250     def ensure_fromlist(self, m, fromlist, recursive=0):
251         self.msg(4, "ensure_fromlist", m, fromlist, recursive)
252         for sub in fromlist:
253             if sub == "*":
254                 if not recursive:
255                     all = self.find_all_submodules(m)
256                     if all:
257                         self.ensure_fromlist(m, all, 1)
258             elif not hasattr(m, sub):
259                 subname = "%s.%s" % (m.__name__, sub)
260                 submod = self.import_module(sub, subname, m)
261                 if not submod:
262                     raise ImportError("No module named " + subname)
263 
264     def find_all_submodules(self, m):
265         if not m.__path__:
266             return
267         modules = {}
268         # 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
269         # But we must also collect Python extension modules - although
270         # we cannot separate normal dlls from Python extensions.
271         suffixes = []
272         suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
273         suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
274         suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
275         for dir in m.__path__:
276             try:
277                 names = os.listdir(dir)
278             except OSError:
279                 self.msg(2, "can't list directory", dir)
280                 continue
281             for name in names:
282                 mod = None
283                 for suff in suffixes:
284                     n = len(suff)
285                     if name[-n:] == suff:
286                         mod = name[:-n]
287                         break
288                 if mod and mod != "__init__":
289                     modules[mod] = mod
290         return modules.keys()
291 
292     def import_module(self, partname, fqname, parent):
293         self.msgin(3, "import_module", partname, fqname, parent)
294         try:
295             m = self.modules[fqname]
296         except KeyError:
297             pass
298         else:
299             self.msgout(3, "import_module ->", m)
300             return m
301         if fqname in self.badmodules:
302             self.msgout(3, "import_module -> None")
303             return None
304         if parent and parent.__path__ is None:
305             self.msgout(3, "import_module -> None")
306             return None
307         try:
308             fp, pathname, stuff = self.find_module(partname,
309                                                    parent and parent.__path__, parent)
310         except ImportError:
311             self.msgout(3, "import_module ->", None)
312             return None
313 
314         try:
315             m = self.load_module(fqname, fp, pathname, stuff)
316         finally:
317             if fp:
318                 fp.close()
319         if parent:
320             setattr(parent, partname, m)
321         self.msgout(3, "import_module ->", m)
322         return m
323 
324     def load_module(self, fqname, fp, pathname, file_info):
325         suffix, mode, type = file_info
326         self.msgin(2, "load_module", fqname, fp and "fp", pathname)
327         if type == _PKG_DIRECTORY:
328             m = self.load_package(fqname, pathname)
329             self.msgout(2, "load_module ->", m)
330             return m
331         if type == _PY_SOURCE:
332             co = compile(fp.read(), pathname, 'exec')
333         elif type == _PY_COMPILED:
334             try:
335                 data = fp.read()
336                 importlib._bootstrap_external._classify_pyc(data, fqname, {})
337             except ImportError as exc:
338                 self.msgout(2, "raise ImportError: " + str(exc), pathname)
339                 raise
340             co = marshal.loads(memoryview(data)[16:])
341         else:
342             co = None
343         m = self.add_module(fqname)
344         m.__file__ = pathname
345         if co:
346             if self.replace_paths:
347                 co = self.replace_paths_in_code(co)
348             m.__code__ = co
349             self.scan_code(co, m)
350         self.msgout(2, "load_module ->", m)
351         return m
352 
353     def _add_badmodule(self, name, caller):
354         if name not in self.badmodules:
355             self.badmodules[name] = {}
356         if caller:
357             self.badmodules[name][caller.__name__] = 1
358         else:
359             self.badmodules[name]["-"] = 1
360 
361     def _safe_import_hook(self, name, caller, fromlist, level=-1):
362         # wrapper for self.import_hook() that won't raise ImportError
363         if name in self.badmodules:
364             self._add_badmodule(name, caller)
365             return
366         try:
367             self.import_hook(name, caller, level=level)
368         except ImportError as msg:
369             self.msg(2, "ImportError:", str(msg))
370             self._add_badmodule(name, caller)
371         except SyntaxError as msg:
372             self.msg(2, "SyntaxError:", str(msg))
373             self._add_badmodule(name, caller)
374         else:
375             if fromlist:
376                 for sub in fromlist:
377                     fullname = name + "." + sub
378                     if fullname in self.badmodules:
379                         self._add_badmodule(fullname, caller)
380                         continue
381                     try:
382                         self.import_hook(name, caller, [sub], level=level)
383                     except ImportError as msg:
384                         self.msg(2, "ImportError:", str(msg))
385                         self._add_badmodule(fullname, caller)
386 
387     def scan_opcodes(self, co):
388         # Scan the code, and yield 'interesting' opcode combinations
389         for name in dis._find_store_names(co):
390             yield "store", (name,)
391         for name, level, fromlist in dis._find_imports(co):
392             if level == 0:  # absolute import
393                 yield "absolute_import", (fromlist, name)
394             else:  # relative import
395                 yield "relative_import", (level, fromlist, name)
396 
397     def scan_code(self, co, m):
398         code = co.co_code
399         scanner = self.scan_opcodes
400         for what, args in scanner(co):
401             if what == "store":
402                 name, = args
403                 m.globalnames[name] = 1
404             elif what == "absolute_import":
405                 fromlist, name = args
406                 have_star = 0
407                 if fromlist is not None:
408                     if "*" in fromlist:
409                         have_star = 1
410                     fromlist = [f for f in fromlist if f != "*"]
411                 self._safe_import_hook(name, m, fromlist, level=0)
412                 if have_star:
413                     # We've encountered an "import *". If it is a Python module,
414                     # the code has already been parsed and we can suck out the
415                     # global names.
416                     mm = None
417                     if m.__path__:
418                         # At this point we don't know whether 'name' is a
419                         # submodule of 'm' or a global module. Let's just try
420                         # the full name first.
421                         mm = self.modules.get(m.__name__ + "." + name)
422                     if mm is None:
423                         mm = self.modules.get(name)
424                     if mm is not None:
425                         m.globalnames.update(mm.globalnames)
426                         m.starimports.update(mm.starimports)
427                         if mm.__code__ is None:
428                             m.starimports[name] = 1
429                     else:
430                         m.starimports[name] = 1
431             elif what == "relative_import":
432                 level, fromlist, name = args
433                 if name:
434                     self._safe_import_hook(name, m, fromlist, level=level)
435                 else:
436                     parent = self.determine_parent(m, level=level)
437                     self._safe_import_hook(parent.__name__, None, fromlist, level=0)
438             else:
439                 # We don't expect anything else from the generator.
440                 raise RuntimeError(what)
441 
442         for c in co.co_consts:
443             if isinstance(c, type(co)):
444                 self.scan_code(c, m)
445 
446     def load_package(self, fqname, pathname):
447         self.msgin(2, "load_package", fqname, pathname)
448         newname = replacePackageMap.get(fqname)
449         if newname:
450             fqname = newname
451         m = self.add_module(fqname)
452         m.__file__ = pathname
453         m.__path__ = [pathname]
454 
455         # As per comment at top of file, simulate runtime __path__ additions.
456         m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
457 
458         fp, buf, stuff = self.find_module("__init__", m.__path__)
459         try:
460             self.load_module(fqname, fp, buf, stuff)
461             self.msgout(2, "load_package ->", m)
462             return m
463         finally:
464             if fp:
465                 fp.close()
466 
467     def add_module(self, fqname):
468         if fqname in self.modules:
469             return self.modules[fqname]
470         self.modules[fqname] = m = Module(fqname)
471         return m
472 
473     def find_module(self, name, path, parent=None):
474         if parent is not None:
475             # assert path is not None
476             fullname = parent.__name__+'.'+name
477         else:
478             fullname = name
479         if fullname in self.excludes:
480             self.msgout(3, "find_module -> Excluded", fullname)
481             raise ImportError(name)
482 
483         if path is None:
484             if name in sys.builtin_module_names:
485                 return (None, None, ("", "", _C_BUILTIN))
486 
487             path = self.path
488 
489         return _find_module(name, path)
490 
491     def report(self):
492         """Print a report to stdout, listing the found modules with their
493         paths, as well as modules that are missing, or seem to be missing.
494         """
495         print()
496         print("  %-25s %s" % ("Name", "File"))
497         print("  %-25s %s" % ("----", "----"))
498         # Print modules found
499         keys = sorted(self.modules.keys())
500         for key in keys:
501             m = self.modules[key]
502             if m.__path__:
503                 print("P", end=' ')
504             else:
505                 print("m", end=' ')
506             print("%-25s" % key, m.__file__ or "")
507 
508         # Print missing modules
509         missing, maybe = self.any_missing_maybe()
510         if missing:
511             print()
512             print("Missing modules:")
513             for name in missing:
514                 mods = sorted(self.badmodules[name].keys())
515                 print("?", name, "imported from", ', '.join(mods))
516         # Print modules that may be missing, but then again, maybe not...
517         if maybe:
518             print()
519             print("Submodules that appear to be missing, but could also be", end=' ')
520             print("global names in the parent package:")
521             for name in maybe:
522                 mods = sorted(self.badmodules[name].keys())
523                 print("?", name, "imported from", ', '.join(mods))
524 
525     def any_missing(self):
526         """Return a list of modules that appear to be missing. Use
527         any_missing_maybe() if you want to know which modules are
528         certain to be missing, and which *may* be missing.
529         """
530         missing, maybe = self.any_missing_maybe()
531         return missing + maybe
532 
533     def any_missing_maybe(self):
534         """Return two lists, one with modules that are certainly missing
535         and one with modules that *may* be missing. The latter names could
536         either be submodules *or* just global names in the package.
537 
538         The reason it can't always be determined is that it's impossible to
539         tell which names are imported when "from module import *" is done
540         with an extension module, short of actually importing it.
541         """
542         missing = []
543         maybe = []
544         for name in self.badmodules:
545             if name in self.excludes:
546                 continue
547             i = name.rfind(".")
548             if i < 0:
549                 missing.append(name)
550                 continue
551             subname = name[i+1:]
552             pkgname = name[:i]
553             pkg = self.modules.get(pkgname)
554             if pkg is not None:
555                 if pkgname in self.badmodules[name]:
556                     # The package tried to import this module itself and
557                     # failed. It's definitely missing.
558                     missing.append(name)
559                 elif subname in pkg.globalnames:
560                     # It's a global in the package: definitely not missing.
561                     pass
562                 elif pkg.starimports:
563                     # It could be missing, but the package did an "import *"
564                     # from a non-Python module, so we simply can't be sure.
565                     maybe.append(name)
566                 else:
567                     # It's not a global in the package, the package didn't
568                     # do funny star imports, it's very likely to be missing.
569                     # The symbol could be inserted into the package from the
570                     # outside, but since that's not good style we simply list
571                     # it missing.
572                     missing.append(name)
573             else:
574                 missing.append(name)
575         missing.sort()
576         maybe.sort()
577         return missing, maybe
578 
579     def replace_paths_in_code(self, co):
580         new_filename = original_filename = os.path.normpath(co.co_filename)
581         for f, r in self.replace_paths:
582             if original_filename.startswith(f):
583                 new_filename = r + original_filename[len(f):]
584                 break
585 
586         if self.debug and original_filename not in self.processed_paths:
587             if new_filename != original_filename:
588                 self.msgout(2, "co_filename %r changed to %r" \
589                                     % (original_filename,new_filename,))
590             else:
591                 self.msgout(2, "co_filename %r remains unchanged" \
592                                     % (original_filename,))
593             self.processed_paths.append(original_filename)
594 
595         consts = list(co.co_consts)
596         for i in range(len(consts)):
597             if isinstance(consts[i], type(co)):
598                 consts[i] = self.replace_paths_in_code(consts[i])
599 
600         return co.replace(co_consts=tuple(consts), co_filename=new_filename)
601 
602 
603 def test():
604     # Parse command line
605     import getopt
606     try:
607         opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
608     except getopt.error as msg:
609         print(msg)
610         return
611 
612     # Process options
613     debug = 1
614     domods = 0
615     addpath = []
616     exclude = []
617     for o, a in opts:
618         if o == '-d':
619             debug = debug + 1
620         if o == '-m':
621             domods = 1
622         if o == '-p':
623             addpath = addpath + a.split(os.pathsep)
624         if o == '-q':
625             debug = 0
626         if o == '-x':
627             exclude.append(a)
628 
629     # Provide default arguments
630     if not args:
631         script = "hello.py"
632     else:
633         script = args[0]
634 
635     # Set the path based on sys.path and the script directory
636     path = sys.path[:]
637     path[0] = os.path.dirname(script)
638     path = addpath + path
639     if debug > 1:
640         print("path:")
641         for item in path:
642             print("   ", repr(item))
643 
644     # Create the module finder and turn its crank
645     mf = ModuleFinder(path, debug, exclude)
646     for arg in args[1:]:
647         if arg == '-m':
648             domods = 1
649             continue
650         if domods:
651             if arg[-2:] == '.*':
652                 mf.import_hook(arg[:-2], None, ["*"])
653             else:
654                 mf.import_hook(arg)
655         else:
656             mf.load_file(arg)
657     mf.run_script(script)
658     mf.report()
659     return mf  # for -i debugging
660 
661 
662 if __name__ == '__main__':
663     try:
664         mf = test()
665     except KeyboardInterrupt:
666         print("\n[interrupted]")
667