xref: /aosp_15_r20/external/executorch/scripts/check_binary_dependencies.py (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1#!/usr/bin/env python3
2# Copyright (c) Meta Platforms, Inc. and affiliates.
3# All rights reserved.
4#
5# This source code is licensed under the BSD-style license found in the
6# LICENSE file in the root directory of this source tree.
7
8# pyre-strict
9
10"""
11A script to help check binary dependencies and disallowed symbols in intermediate build files.
12"""
13
14import argparse
15import os
16import re
17import subprocess
18import sys
19from dataclasses import dataclass
20from pathlib import Path
21from typing import Dict, Iterable, List, NoReturn, Optional, Tuple
22
23# Script output statuses.
24STATUS_OK = 0
25STATUS_SCRIPT_ERROR = 1
26STATUS_ERROR = 2
27STATUS_WARNING = 3
28
29# Object file suffix.
30OBJECT_SUFFIX = ".o"
31
32# Project root, assuming this script is in `<root>/scripts/`
33PROJECT_ROOT = Path(__file__).parent.parent.resolve()
34
35# Regex to strip info from nm and readelf.
36NM_REGEX = re.compile(r"\d*\s+(?P<status>\S)\s+(?P<symbol>.*)")
37READELF_DEP_REGEX = re.compile(r".*\(NEEDED\)\s+(?P<so>.*)")
38READELF_DYN_SYM_REGEX = re.compile(r"(UND|\d+)\s+(?P<symbol>[^@\s:]+)(@.*)?$")
39
40# Disallow list of prefixes for standard library symbols.
41DISALLOW_LIST = [
42    "operator new",
43    "operator delete",
44    "std::__cxx11::basic_string",
45    "std::__throw",
46    "std::deque",
47    "std::exception",
48    "std::forward_list",
49    "std::list",
50    "std::map",
51    "std::multimap",
52    "std::multiset",
53    "std::priority_queue",
54    "std::queue",
55    "std::set",
56    "std::stack",
57    "std::unordered_map",
58    "std::unordered_multimap",
59    "std::unordered_multiset",
60    "std::unordered_set",
61    "std::vector",
62]
63
64
65@dataclass
66class Symbol:
67    """Symbol scraped from ELF binary object."""
68
69    mangled: str
70    demangled: str
71    defined: bool
72    disallowed: bool
73    sources: List[Path]
74
75
76# Cached symbols dictionary.
77symbols_cache: Optional[Dict[str, Symbol]] = None
78
79
80def error(message: str) -> NoReturn:
81    """Emit an error message and kill the script."""
82    print(message)
83    sys.exit(STATUS_SCRIPT_ERROR)
84
85
86def get_tool_output(args: List[str]) -> str:
87    """Execute a command in the shell and return the output."""
88    result = subprocess.run(args, stdout=subprocess.PIPE)
89    output = result.stdout.decode("utf-8")
90    return output
91
92
93def read_nm(
94    nm: str, file: Path, exclude: Optional[List[str]] = None
95) -> List[Tuple[str, str]]:
96    """Read a set of symbols using the nm tool."""
97    if exclude is None:
98        exclude = ["N"]
99
100    output = get_tool_output([nm, file])
101    result = []
102    for line in output.splitlines():
103        match = re.search(NM_REGEX, line)
104        if not match:
105            continue
106
107        status = match.group("status").upper()
108        if exclude is None or status not in exclude:
109            result.append((status, match.group("symbol")))
110    return result
111
112
113def get_object_symbols(
114    nm: str, symbols: Dict[str, Symbol], object_file: Path, source_file: Path
115) -> None:
116    """Scrape symbols from a binary object."""
117    symbol_table = read_nm(nm, object_file)
118    for t, symbol in symbol_table:
119        if symbol not in symbols:
120            symbols[symbol] = Symbol(
121                mangled=symbol,
122                demangled="",
123                defined=(t != "U"),
124                disallowed=False,
125                sources=[],
126            )
127        if source_file in symbols[symbol].sources:
128            continue
129        symbols[symbol].sources.append(source_file)
130
131
132def get_elf_dependencies(readelf: str, binary_file: Path) -> List[str]:
133    """Get the shared object dependencies of a binary executable."""
134    shared_objects = []
135    output = get_tool_output([readelf, "-d", binary_file])
136    for line in output.splitlines():
137        match = re.search(READELF_DEP_REGEX, line)
138        if not match:
139            continue
140        shared_objects.append(match.group("so"))
141
142    return shared_objects
143
144
145def get_binary_dynamic_symbols(readelf: str, binary_file: Path) -> List[str]:
146    """Get the dynamic symbols required by a binary executable."""
147    dynamic_symbols = []
148    output = get_tool_output([readelf, "--dyn-syms", "--wide", binary_file])
149    for line in output.splitlines():
150        match = re.search(READELF_DYN_SYM_REGEX, line)
151        if not match:
152            continue
153        dynamic_symbols.append(match.group("symbol"))
154    return list(set(dynamic_symbols))
155
156
157def demangle_symbols(cxxfilt: str, mangled_symbols: Iterable[Symbol]) -> None:
158    """Demangle a collection of symbols using the cxxfilt tool."""
159    output = get_tool_output([cxxfilt] + [symbol.mangled for symbol in mangled_symbols])
160    for symbol, demangled in zip(mangled_symbols, output.splitlines()):
161        symbol.demangled = demangled
162
163
164def check_disallowed_symbols(cxxfilt: str, symbols: Iterable[Symbol]) -> None:
165    """Check a collection of symbols for disallowed prefixes."""
166    for symbol in symbols:
167        assert len(symbol.demangled) > 0
168        if symbol.demangled.startswith(tuple(DISALLOW_LIST)):
169            symbol.disallowed = True
170
171
172def get_cached_symbols(nm: str, build_root: Path) -> Dict[str, Symbol]:
173    """Return a dictionary of symbols scraped from build files"""
174    global symbols_cache
175
176    if symbols_cache is not None:
177        return symbols_cache
178    symbols = {}
179
180    if not build_root.is_dir():
181        error("Specified buck-out is not a directory")
182
183    for root, _, files in os.walk(build_root):
184        root_path = Path(root)
185        for file_name in files:
186            file_path = root_path / file_name
187            if file_path.suffix == OBJECT_SUFFIX:
188                object_file_path = file_path
189                source_file_name = object_file_path.name[: -len(OBJECT_SUFFIX)]
190
191                object_file_rel = Path(os.path.relpath(object_file_path, build_root))
192                if "codegen" in str(object_file_path):
193                    source_file_path = source_file_name + " (generated)"
194                else:
195                    source_file_path = (
196                        PROJECT_ROOT / object_file_rel.parent.parent / source_file_name
197                    )
198                get_object_symbols(nm, symbols, object_file_path, source_file_path)
199
200    symbols_cache = symbols
201    return symbols_cache
202
203
204def check_dependencies(readelf: str, binary_file: Path) -> int:
205    """Check that there are no shared object dependencies of a binary executable."""
206    elf_dependencies = get_elf_dependencies(readelf, binary_file)
207    if len(elf_dependencies) > 0:
208        print("Found the following shared object dependencies:")
209        for dependency in elf_dependencies:
210            print(" *", dependency)
211        print()
212        return STATUS_ERROR
213    return STATUS_OK
214
215
216def check_disallowed_symbols_build_dir(nm: str, cxxfilt: str, build_root: Path) -> int:
217    """Check that there are no disallowed symbols used in intermediate build files."""
218    symbols = get_cached_symbols(nm, build_root)
219    symbol_list = list(symbols.values())
220    demangle_symbols(cxxfilt, symbol_list)
221    check_disallowed_symbols(cxxfilt, symbol_list)
222    disallowed_symbols = filter(lambda symbol: symbol.disallowed, symbol_list)
223
224    disallowed_by_file = {}
225    for symbol in disallowed_symbols:
226        for file in symbol.sources:
227            if file not in disallowed_by_file:
228                disallowed_by_file[file] = []
229            disallowed_by_file[file].append(symbol)
230
231    for file, symbols in disallowed_by_file.items():
232        print(f"{file} contains disallowed symbols:")
233        for symbol in symbols:
234            print(" *", symbol.demangled)
235        print()
236
237    if len(disallowed_by_file) > 0:
238        return STATUS_ERROR
239
240    return STATUS_OK
241
242
243def check_dynamic(
244    nm: str, readelf: str, cxxfilt: str, binary_file: Path, build_root: Optional[Path]
245) -> int:
246    """Check for dynamic symbols required by an executable, categorizing them from the
247    intermediate files that may have included those symbols.
248    """
249    symbols = get_cached_symbols(nm, build_root) if build_root is not None else {}
250
251    dynamic_symbols = []
252    binary_dyn_sym = get_binary_dynamic_symbols(readelf, binary_file)
253    for symbol in binary_dyn_sym:
254        if symbols is not None and symbol in symbols:
255            dynamic_symbols.append(symbols[symbol])
256        else:
257            dynamic_symbols.append(Symbol(symbol, "", False, False, []))
258    demangle_symbols(cxxfilt, dynamic_symbols)
259    check_disallowed_symbols(cxxfilt, dynamic_symbols)
260
261    dynamic_by_file = {}
262    global_dynamic = []
263    for symbol in dynamic_symbols:
264        if len(symbol.sources) == 0:
265            global_dynamic.append(symbol)
266            continue
267
268        for file in symbol.sources:
269            if file not in dynamic_by_file:
270                dynamic_by_file[file] = []
271            dynamic_by_file[file].append(symbol)
272
273    print("Executable relies on the following dynamic symbols:")
274    for file, symbols in dynamic_by_file.items():
275        print(f"{file} contains dynamic symbols:")
276        for symbol in symbols:
277            print(" *", symbol.demangled)
278        print()
279
280    if len(dynamic_by_file) > 0:
281        return STATUS_ERROR
282
283    return STATUS_OK
284
285
286def bubble_error(program_status, routine_status) -> int:
287    """Bubble a routine's error status up to the program status."""
288    # A non-OK error status overrides an OK error status.
289    if routine_status == STATUS_OK:
290        return program_status
291    elif program_status == STATUS_OK:
292        return routine_status
293    else:
294        return min(program_status, routine_status)
295
296
297def main() -> int:
298    """Parse command line arguments and execute tool."""
299    parser = argparse.ArgumentParser(
300        description="A tool to help check binary dependencies and statically included symbols."
301    )
302    parser.add_argument(
303        "--nm",
304        metavar="executable",
305        type=str,
306        help="Path of the nm tool executable",
307        default="nm",
308    )
309    parser.add_argument(
310        "--readelf",
311        metavar="executable",
312        type=str,
313        help="Path of the readelf tool executable",
314        default="readelf",
315    )
316    parser.add_argument(
317        "--cxxfilt",
318        metavar="executable",
319        type=str,
320        help="Path of the cxxfilt tool executable",
321        default="c++filt",
322    )
323    parser.add_argument("--binary", metavar="binary", type=str, help="Binary to check")
324    parser.add_argument(
325        "--buck-out", metavar="dir", type=str, help="Buck output directory"
326    )
327    parser.add_argument(
328        "--check-dependencies",
329        action="store_true",
330        help="Check shared library dependencies for a binary",
331    )
332    parser.add_argument(
333        "--check-disallowed-symbols",
334        action="store_true",
335        help="Check for usage of disallowed symbols",
336    )
337    parser.add_argument(
338        "--check-dynamic",
339        action="store_true",
340        help="Check for usage of dynamic symbols",
341    )
342
343    args = parser.parse_args()
344
345    exit_status = STATUS_OK
346
347    if args.check_dependencies:
348        if args.binary is None:
349            error("--binary flag must be specified when checking dependencies")
350        status = check_dependencies(args.readelf, Path(args.binary))
351        exit_status = bubble_error(exit_status, status)
352
353    if args.check_disallowed_symbols:
354        if args.buck_out is None:
355            error("--buck-out flag must be specified when checking disallowed symbols")
356        status = check_disallowed_symbols_build_dir(
357            args.nm, args.cxxfilt, Path(args.buck_out)
358        )
359        exit_status = bubble_error(exit_status, status)
360
361    if args.check_dynamic:
362        if args.binary is None:
363            error("--binary flag must be specified when checking dynamic symbol usage")
364        status = check_dynamic(
365            args.nm,
366            args.readelf,
367            args.cxxfilt,
368            Path(args.binary),
369            Path(args.buck_out) if args.buck_out is not None else None,
370        )
371        exit_status = bubble_error(exit_status, status)
372
373    return exit_status
374
375
376if __name__ == "__main__":
377    sys.exit(main())
378