1#!/usr/bin/env python3 2# Copyright (c) Meta Platforms, Inc. and affiliates. 3# All rights reserved. 4# 5# This source code is licensed under the BSD-style license found in the 6# LICENSE file in the root directory of this source tree. 7 8# pyre-strict 9 10""" 11A script to help check binary dependencies and disallowed symbols in intermediate build files. 12""" 13 14import argparse 15import os 16import re 17import subprocess 18import sys 19from dataclasses import dataclass 20from pathlib import Path 21from typing import Dict, Iterable, List, NoReturn, Optional, Tuple 22 23# Script output statuses. 24STATUS_OK = 0 25STATUS_SCRIPT_ERROR = 1 26STATUS_ERROR = 2 27STATUS_WARNING = 3 28 29# Object file suffix. 30OBJECT_SUFFIX = ".o" 31 32# Project root, assuming this script is in `<root>/scripts/` 33PROJECT_ROOT = Path(__file__).parent.parent.resolve() 34 35# Regex to strip info from nm and readelf. 36NM_REGEX = re.compile(r"\d*\s+(?P<status>\S)\s+(?P<symbol>.*)") 37READELF_DEP_REGEX = re.compile(r".*\(NEEDED\)\s+(?P<so>.*)") 38READELF_DYN_SYM_REGEX = re.compile(r"(UND|\d+)\s+(?P<symbol>[^@\s:]+)(@.*)?$") 39 40# Disallow list of prefixes for standard library symbols. 41DISALLOW_LIST = [ 42 "operator new", 43 "operator delete", 44 "std::__cxx11::basic_string", 45 "std::__throw", 46 "std::deque", 47 "std::exception", 48 "std::forward_list", 49 "std::list", 50 "std::map", 51 "std::multimap", 52 "std::multiset", 53 "std::priority_queue", 54 "std::queue", 55 "std::set", 56 "std::stack", 57 "std::unordered_map", 58 "std::unordered_multimap", 59 "std::unordered_multiset", 60 "std::unordered_set", 61 "std::vector", 62] 63 64 65@dataclass 66class Symbol: 67 """Symbol scraped from ELF binary object.""" 68 69 mangled: str 70 demangled: str 71 defined: bool 72 disallowed: bool 73 sources: List[Path] 74 75 76# Cached symbols dictionary. 77symbols_cache: Optional[Dict[str, Symbol]] = None 78 79 80def error(message: str) -> NoReturn: 81 """Emit an error message and kill the script.""" 82 print(message) 83 sys.exit(STATUS_SCRIPT_ERROR) 84 85 86def get_tool_output(args: List[str]) -> str: 87 """Execute a command in the shell and return the output.""" 88 result = subprocess.run(args, stdout=subprocess.PIPE) 89 output = result.stdout.decode("utf-8") 90 return output 91 92 93def read_nm( 94 nm: str, file: Path, exclude: Optional[List[str]] = None 95) -> List[Tuple[str, str]]: 96 """Read a set of symbols using the nm tool.""" 97 if exclude is None: 98 exclude = ["N"] 99 100 output = get_tool_output([nm, file]) 101 result = [] 102 for line in output.splitlines(): 103 match = re.search(NM_REGEX, line) 104 if not match: 105 continue 106 107 status = match.group("status").upper() 108 if exclude is None or status not in exclude: 109 result.append((status, match.group("symbol"))) 110 return result 111 112 113def get_object_symbols( 114 nm: str, symbols: Dict[str, Symbol], object_file: Path, source_file: Path 115) -> None: 116 """Scrape symbols from a binary object.""" 117 symbol_table = read_nm(nm, object_file) 118 for t, symbol in symbol_table: 119 if symbol not in symbols: 120 symbols[symbol] = Symbol( 121 mangled=symbol, 122 demangled="", 123 defined=(t != "U"), 124 disallowed=False, 125 sources=[], 126 ) 127 if source_file in symbols[symbol].sources: 128 continue 129 symbols[symbol].sources.append(source_file) 130 131 132def get_elf_dependencies(readelf: str, binary_file: Path) -> List[str]: 133 """Get the shared object dependencies of a binary executable.""" 134 shared_objects = [] 135 output = get_tool_output([readelf, "-d", binary_file]) 136 for line in output.splitlines(): 137 match = re.search(READELF_DEP_REGEX, line) 138 if not match: 139 continue 140 shared_objects.append(match.group("so")) 141 142 return shared_objects 143 144 145def get_binary_dynamic_symbols(readelf: str, binary_file: Path) -> List[str]: 146 """Get the dynamic symbols required by a binary executable.""" 147 dynamic_symbols = [] 148 output = get_tool_output([readelf, "--dyn-syms", "--wide", binary_file]) 149 for line in output.splitlines(): 150 match = re.search(READELF_DYN_SYM_REGEX, line) 151 if not match: 152 continue 153 dynamic_symbols.append(match.group("symbol")) 154 return list(set(dynamic_symbols)) 155 156 157def demangle_symbols(cxxfilt: str, mangled_symbols: Iterable[Symbol]) -> None: 158 """Demangle a collection of symbols using the cxxfilt tool.""" 159 output = get_tool_output([cxxfilt] + [symbol.mangled for symbol in mangled_symbols]) 160 for symbol, demangled in zip(mangled_symbols, output.splitlines()): 161 symbol.demangled = demangled 162 163 164def check_disallowed_symbols(cxxfilt: str, symbols: Iterable[Symbol]) -> None: 165 """Check a collection of symbols for disallowed prefixes.""" 166 for symbol in symbols: 167 assert len(symbol.demangled) > 0 168 if symbol.demangled.startswith(tuple(DISALLOW_LIST)): 169 symbol.disallowed = True 170 171 172def get_cached_symbols(nm: str, build_root: Path) -> Dict[str, Symbol]: 173 """Return a dictionary of symbols scraped from build files""" 174 global symbols_cache 175 176 if symbols_cache is not None: 177 return symbols_cache 178 symbols = {} 179 180 if not build_root.is_dir(): 181 error("Specified buck-out is not a directory") 182 183 for root, _, files in os.walk(build_root): 184 root_path = Path(root) 185 for file_name in files: 186 file_path = root_path / file_name 187 if file_path.suffix == OBJECT_SUFFIX: 188 object_file_path = file_path 189 source_file_name = object_file_path.name[: -len(OBJECT_SUFFIX)] 190 191 object_file_rel = Path(os.path.relpath(object_file_path, build_root)) 192 if "codegen" in str(object_file_path): 193 source_file_path = source_file_name + " (generated)" 194 else: 195 source_file_path = ( 196 PROJECT_ROOT / object_file_rel.parent.parent / source_file_name 197 ) 198 get_object_symbols(nm, symbols, object_file_path, source_file_path) 199 200 symbols_cache = symbols 201 return symbols_cache 202 203 204def check_dependencies(readelf: str, binary_file: Path) -> int: 205 """Check that there are no shared object dependencies of a binary executable.""" 206 elf_dependencies = get_elf_dependencies(readelf, binary_file) 207 if len(elf_dependencies) > 0: 208 print("Found the following shared object dependencies:") 209 for dependency in elf_dependencies: 210 print(" *", dependency) 211 print() 212 return STATUS_ERROR 213 return STATUS_OK 214 215 216def check_disallowed_symbols_build_dir(nm: str, cxxfilt: str, build_root: Path) -> int: 217 """Check that there are no disallowed symbols used in intermediate build files.""" 218 symbols = get_cached_symbols(nm, build_root) 219 symbol_list = list(symbols.values()) 220 demangle_symbols(cxxfilt, symbol_list) 221 check_disallowed_symbols(cxxfilt, symbol_list) 222 disallowed_symbols = filter(lambda symbol: symbol.disallowed, symbol_list) 223 224 disallowed_by_file = {} 225 for symbol in disallowed_symbols: 226 for file in symbol.sources: 227 if file not in disallowed_by_file: 228 disallowed_by_file[file] = [] 229 disallowed_by_file[file].append(symbol) 230 231 for file, symbols in disallowed_by_file.items(): 232 print(f"{file} contains disallowed symbols:") 233 for symbol in symbols: 234 print(" *", symbol.demangled) 235 print() 236 237 if len(disallowed_by_file) > 0: 238 return STATUS_ERROR 239 240 return STATUS_OK 241 242 243def check_dynamic( 244 nm: str, readelf: str, cxxfilt: str, binary_file: Path, build_root: Optional[Path] 245) -> int: 246 """Check for dynamic symbols required by an executable, categorizing them from the 247 intermediate files that may have included those symbols. 248 """ 249 symbols = get_cached_symbols(nm, build_root) if build_root is not None else {} 250 251 dynamic_symbols = [] 252 binary_dyn_sym = get_binary_dynamic_symbols(readelf, binary_file) 253 for symbol in binary_dyn_sym: 254 if symbols is not None and symbol in symbols: 255 dynamic_symbols.append(symbols[symbol]) 256 else: 257 dynamic_symbols.append(Symbol(symbol, "", False, False, [])) 258 demangle_symbols(cxxfilt, dynamic_symbols) 259 check_disallowed_symbols(cxxfilt, dynamic_symbols) 260 261 dynamic_by_file = {} 262 global_dynamic = [] 263 for symbol in dynamic_symbols: 264 if len(symbol.sources) == 0: 265 global_dynamic.append(symbol) 266 continue 267 268 for file in symbol.sources: 269 if file not in dynamic_by_file: 270 dynamic_by_file[file] = [] 271 dynamic_by_file[file].append(symbol) 272 273 print("Executable relies on the following dynamic symbols:") 274 for file, symbols in dynamic_by_file.items(): 275 print(f"{file} contains dynamic symbols:") 276 for symbol in symbols: 277 print(" *", symbol.demangled) 278 print() 279 280 if len(dynamic_by_file) > 0: 281 return STATUS_ERROR 282 283 return STATUS_OK 284 285 286def bubble_error(program_status, routine_status) -> int: 287 """Bubble a routine's error status up to the program status.""" 288 # A non-OK error status overrides an OK error status. 289 if routine_status == STATUS_OK: 290 return program_status 291 elif program_status == STATUS_OK: 292 return routine_status 293 else: 294 return min(program_status, routine_status) 295 296 297def main() -> int: 298 """Parse command line arguments and execute tool.""" 299 parser = argparse.ArgumentParser( 300 description="A tool to help check binary dependencies and statically included symbols." 301 ) 302 parser.add_argument( 303 "--nm", 304 metavar="executable", 305 type=str, 306 help="Path of the nm tool executable", 307 default="nm", 308 ) 309 parser.add_argument( 310 "--readelf", 311 metavar="executable", 312 type=str, 313 help="Path of the readelf tool executable", 314 default="readelf", 315 ) 316 parser.add_argument( 317 "--cxxfilt", 318 metavar="executable", 319 type=str, 320 help="Path of the cxxfilt tool executable", 321 default="c++filt", 322 ) 323 parser.add_argument("--binary", metavar="binary", type=str, help="Binary to check") 324 parser.add_argument( 325 "--buck-out", metavar="dir", type=str, help="Buck output directory" 326 ) 327 parser.add_argument( 328 "--check-dependencies", 329 action="store_true", 330 help="Check shared library dependencies for a binary", 331 ) 332 parser.add_argument( 333 "--check-disallowed-symbols", 334 action="store_true", 335 help="Check for usage of disallowed symbols", 336 ) 337 parser.add_argument( 338 "--check-dynamic", 339 action="store_true", 340 help="Check for usage of dynamic symbols", 341 ) 342 343 args = parser.parse_args() 344 345 exit_status = STATUS_OK 346 347 if args.check_dependencies: 348 if args.binary is None: 349 error("--binary flag must be specified when checking dependencies") 350 status = check_dependencies(args.readelf, Path(args.binary)) 351 exit_status = bubble_error(exit_status, status) 352 353 if args.check_disallowed_symbols: 354 if args.buck_out is None: 355 error("--buck-out flag must be specified when checking disallowed symbols") 356 status = check_disallowed_symbols_build_dir( 357 args.nm, args.cxxfilt, Path(args.buck_out) 358 ) 359 exit_status = bubble_error(exit_status, status) 360 361 if args.check_dynamic: 362 if args.binary is None: 363 error("--binary flag must be specified when checking dynamic symbol usage") 364 status = check_dynamic( 365 args.nm, 366 args.readelf, 367 args.cxxfilt, 368 Path(args.binary), 369 Path(args.buck_out) if args.buck_out is not None else None, 370 ) 371 exit_status = bubble_error(exit_status, status) 372 373 return exit_status 374 375 376if __name__ == "__main__": 377 sys.exit(main()) 378