1*01826a49SYabin Cui#!/usr/bin/env python3 2*01826a49SYabin Cui 3*01826a49SYabin Cui# Tool to bundle multiple C/C++ source files, inlining any includes. 4*01826a49SYabin Cui# 5*01826a49SYabin Cui# Note: there are two types of exclusion options: the '-x' flag, which besides 6*01826a49SYabin Cui# excluding a file also adds an #error directive in place of the #include, and 7*01826a49SYabin Cui# the '-k' flag, which keeps the #include and doesn't inline the file. The 8*01826a49SYabin Cui# intended use cases are: '-x' for files that would normally be #if'd out, so 9*01826a49SYabin Cui# features that 100% won't be used in the amalgamated file, for which every 10*01826a49SYabin Cui# occurrence adds the error, and '-k' for headers that we wish to manually 11*01826a49SYabin Cui# include, such as a project's public API, for which occurrences after the first 12*01826a49SYabin Cui# are removed. 13*01826a49SYabin Cui# 14*01826a49SYabin Cui# Todo: the error handling could be better, which currently throws and halts 15*01826a49SYabin Cui# (which is functional just not very friendly). 16*01826a49SYabin Cui# 17*01826a49SYabin Cui# Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain) 18*01826a49SYabin Cui 19*01826a49SYabin Cuiimport argparse, re, sys 20*01826a49SYabin Cui 21*01826a49SYabin Cuifrom pathlib import Path 22*01826a49SYabin Cuifrom typing import Any, List, Optional, Pattern, Set, TextIO 23*01826a49SYabin Cui 24*01826a49SYabin Cui# Set of file roots when searching (equivalent to -I paths for the compiler). 25*01826a49SYabin Cuiroots: Set[Path] = set() 26*01826a49SYabin Cui 27*01826a49SYabin Cui# Set of (canonical) file Path objects to exclude from inlining (and not only 28*01826a49SYabin Cui# exclude but to add a compiler error directive when they're encountered). 29*01826a49SYabin Cuiexcludes: Set[Path] = set() 30*01826a49SYabin Cui 31*01826a49SYabin Cui# Set of (canonical) file Path objects to keep as include directives. 32*01826a49SYabin Cuikeeps: Set[Path] = set() 33*01826a49SYabin Cui 34*01826a49SYabin Cui# Whether to keep the #pragma once directives (unlikely, since this will result 35*01826a49SYabin Cui# in a warning, but the option is there). 36*01826a49SYabin Cuikeep_pragma: bool = False 37*01826a49SYabin Cui 38*01826a49SYabin Cui# Destination file object (or stdout if no output file was supplied). 39*01826a49SYabin Cuidestn: TextIO = sys.stdout 40*01826a49SYabin Cui 41*01826a49SYabin Cui# Set of file Path objects previously inlined (and to ignore if reencountering). 42*01826a49SYabin Cuifound: Set[Path] = set() 43*01826a49SYabin Cui 44*01826a49SYabin Cui# Compiled regex Pattern to handle "#pragma once" in various formats: 45*01826a49SYabin Cui# 46*01826a49SYabin Cui# #pragma once 47*01826a49SYabin Cui# #pragma once 48*01826a49SYabin Cui# # pragma once 49*01826a49SYabin Cui# #pragma once 50*01826a49SYabin Cui# #pragma once // comment 51*01826a49SYabin Cui# 52*01826a49SYabin Cui# Ignoring commented versions, same as include_regex. 53*01826a49SYabin Cui# 54*01826a49SYabin Cuipragma_regex: Pattern = re.compile(r'^\s*#\s*pragma\s*once\s*') 55*01826a49SYabin Cui 56*01826a49SYabin Cui# Compiled regex Pattern to handle the following type of file includes: 57*01826a49SYabin Cui# 58*01826a49SYabin Cui# #include "file" 59*01826a49SYabin Cui# #include "file" 60*01826a49SYabin Cui# # include "file" 61*01826a49SYabin Cui# #include "file" 62*01826a49SYabin Cui# #include "file" // comment 63*01826a49SYabin Cui# #include "file" // comment with quote " 64*01826a49SYabin Cui# 65*01826a49SYabin Cui# And all combinations of, as well as ignoring the following: 66*01826a49SYabin Cui# 67*01826a49SYabin Cui# #include <file> 68*01826a49SYabin Cui# //#include "file" 69*01826a49SYabin Cui# /*#include "file"*/ 70*01826a49SYabin Cui# 71*01826a49SYabin Cui# We don't try to catch errors since the compiler will do this (and the code is 72*01826a49SYabin Cui# expected to be valid before processing) and we don't care what follows the 73*01826a49SYabin Cui# file (whether it's a valid comment or not, since anything after the quoted 74*01826a49SYabin Cui# string is ignored) 75*01826a49SYabin Cui# 76*01826a49SYabin Cuiinclude_regex: Pattern = re.compile(r'^\s*#\s*include\s*"(.+?)"') 77*01826a49SYabin Cui 78*01826a49SYabin Cui# Simple tests to prove include_regex's cases. 79*01826a49SYabin Cui# 80*01826a49SYabin Cuidef test_match_include() -> bool: 81*01826a49SYabin Cui if (include_regex.match('#include "file"') and 82*01826a49SYabin Cui include_regex.match(' #include "file"') and 83*01826a49SYabin Cui include_regex.match('# include "file"') and 84*01826a49SYabin Cui include_regex.match('#include "file"') and 85*01826a49SYabin Cui include_regex.match('#include "file" // comment')): 86*01826a49SYabin Cui if (not include_regex.match('#include <file>') and 87*01826a49SYabin Cui not include_regex.match('//#include "file"') and 88*01826a49SYabin Cui not include_regex.match('/*#include "file"*/')): 89*01826a49SYabin Cui found = include_regex.match('#include "file" // "') 90*01826a49SYabin Cui if (found and found.group(1) == 'file'): 91*01826a49SYabin Cui print('#include match valid') 92*01826a49SYabin Cui return True 93*01826a49SYabin Cui return False 94*01826a49SYabin Cui 95*01826a49SYabin Cui# Simple tests to prove pragma_regex's cases. 96*01826a49SYabin Cui# 97*01826a49SYabin Cuidef test_match_pragma() -> bool: 98*01826a49SYabin Cui if (pragma_regex.match('#pragma once') and 99*01826a49SYabin Cui pragma_regex.match(' #pragma once') and 100*01826a49SYabin Cui pragma_regex.match('# pragma once') and 101*01826a49SYabin Cui pragma_regex.match('#pragma once') and 102*01826a49SYabin Cui pragma_regex.match('#pragma once // comment')): 103*01826a49SYabin Cui if (not pragma_regex.match('//#pragma once') and 104*01826a49SYabin Cui not pragma_regex.match('/*#pragma once*/')): 105*01826a49SYabin Cui print('#pragma once match valid') 106*01826a49SYabin Cui return True 107*01826a49SYabin Cui return False 108*01826a49SYabin Cui 109*01826a49SYabin Cui# Finds 'file'. First the list of 'root' paths are searched, followed by the 110*01826a49SYabin Cui# currently processing file's 'parent' path, returning a valid Path in 111*01826a49SYabin Cui# canonical form. If no match is found None is returned. 112*01826a49SYabin Cui# 113*01826a49SYabin Cuidef resolve_include(file: str, parent: Optional[Path] = None) -> Optional[Path]: 114*01826a49SYabin Cui for root in roots: 115*01826a49SYabin Cui found = root.joinpath(file).resolve() 116*01826a49SYabin Cui if (found.is_file()): 117*01826a49SYabin Cui return found 118*01826a49SYabin Cui if (parent): 119*01826a49SYabin Cui found = parent.joinpath(file).resolve(); 120*01826a49SYabin Cui else: 121*01826a49SYabin Cui found = Path(file) 122*01826a49SYabin Cui if (found.is_file()): 123*01826a49SYabin Cui return found 124*01826a49SYabin Cui return None 125*01826a49SYabin Cui 126*01826a49SYabin Cui# Helper to resolve lists of files. 'file_list' is passed in from the arguments 127*01826a49SYabin Cui# and each entry resolved to its canonical path (like any include entry, either 128*01826a49SYabin Cui# from the list of root paths or the owning file's 'parent', which in this case 129*01826a49SYabin Cui# is case is the input file). The results are stored in 'resolved'. 130*01826a49SYabin Cui# 131*01826a49SYabin Cuidef resolve_excluded_files(file_list: Optional[List[str]], resolved: Set[Path], parent: Optional[Path] = None) -> None: 132*01826a49SYabin Cui if (file_list): 133*01826a49SYabin Cui for filename in file_list: 134*01826a49SYabin Cui found = resolve_include(filename, parent) 135*01826a49SYabin Cui if (found): 136*01826a49SYabin Cui resolved.add(found) 137*01826a49SYabin Cui else: 138*01826a49SYabin Cui error_line(f'Warning: excluded file not found: {filename}') 139*01826a49SYabin Cui 140*01826a49SYabin Cui# Writes 'line' to the open 'destn' (or stdout). 141*01826a49SYabin Cui# 142*01826a49SYabin Cuidef write_line(line: str) -> None: 143*01826a49SYabin Cui print(line, file=destn) 144*01826a49SYabin Cui 145*01826a49SYabin Cui# Logs 'line' to stderr. This is also used for general notifications that we 146*01826a49SYabin Cui# don't want to go to stdout (so the source can be piped). 147*01826a49SYabin Cui# 148*01826a49SYabin Cuidef error_line(line: Any) -> None: 149*01826a49SYabin Cui print(line, file=sys.stderr) 150*01826a49SYabin Cui 151*01826a49SYabin Cui# Inline the contents of 'file' (with any of its includes also inlined, etc.). 152*01826a49SYabin Cui# 153*01826a49SYabin Cui# Note: text encoding errors are ignored and replaced with ? when reading the 154*01826a49SYabin Cui# input files. This isn't ideal, but it's more than likely in the comments than 155*01826a49SYabin Cui# code and a) the text editor has probably also failed to read the same content, 156*01826a49SYabin Cui# and b) the compiler probably did too. 157*01826a49SYabin Cui# 158*01826a49SYabin Cuidef add_file(file: Path, file_name: str = None) -> None: 159*01826a49SYabin Cui if (file.is_file()): 160*01826a49SYabin Cui if (not file_name): 161*01826a49SYabin Cui file_name = file.name 162*01826a49SYabin Cui error_line(f'Processing: {file_name}') 163*01826a49SYabin Cui with file.open('r', errors='replace') as opened: 164*01826a49SYabin Cui for line in opened: 165*01826a49SYabin Cui line = line.rstrip('\n') 166*01826a49SYabin Cui match_include = include_regex.match(line); 167*01826a49SYabin Cui if (match_include): 168*01826a49SYabin Cui # We have a quoted include directive so grab the file 169*01826a49SYabin Cui inc_name = match_include.group(1) 170*01826a49SYabin Cui resolved = resolve_include(inc_name, file.parent) 171*01826a49SYabin Cui if (resolved): 172*01826a49SYabin Cui if (resolved in excludes): 173*01826a49SYabin Cui # The file was excluded so error if the compiler uses it 174*01826a49SYabin Cui write_line(f'#error Using excluded file: {inc_name} (re-amalgamate source to fix)') 175*01826a49SYabin Cui error_line(f'Excluding: {inc_name}') 176*01826a49SYabin Cui else: 177*01826a49SYabin Cui if (resolved not in found): 178*01826a49SYabin Cui # The file was not previously encountered 179*01826a49SYabin Cui found.add(resolved) 180*01826a49SYabin Cui if (resolved in keeps): 181*01826a49SYabin Cui # But the include was flagged to keep as included 182*01826a49SYabin Cui write_line(f'/**** *NOT* inlining {inc_name} ****/') 183*01826a49SYabin Cui write_line(line) 184*01826a49SYabin Cui error_line(f'Not inlining: {inc_name}') 185*01826a49SYabin Cui else: 186*01826a49SYabin Cui # The file was neither excluded nor seen before so inline it 187*01826a49SYabin Cui write_line(f'/**** start inlining {inc_name} ****/') 188*01826a49SYabin Cui add_file(resolved, inc_name) 189*01826a49SYabin Cui write_line(f'/**** ended inlining {inc_name} ****/') 190*01826a49SYabin Cui else: 191*01826a49SYabin Cui write_line(f'/**** skipping file: {inc_name} ****/') 192*01826a49SYabin Cui else: 193*01826a49SYabin Cui # The include file didn't resolve to a file 194*01826a49SYabin Cui write_line(f'#error Unable to find: {inc_name}') 195*01826a49SYabin Cui error_line(f'Error: Unable to find: {inc_name}') 196*01826a49SYabin Cui else: 197*01826a49SYabin Cui # Skip any 'pragma once' directives, otherwise write the source line 198*01826a49SYabin Cui if (keep_pragma or not pragma_regex.match(line)): 199*01826a49SYabin Cui write_line(line) 200*01826a49SYabin Cui else: 201*01826a49SYabin Cui error_line(f'Error: Invalid file: {file}') 202*01826a49SYabin Cui 203*01826a49SYabin Cui# Start here 204*01826a49SYabin Cuiparser = argparse.ArgumentParser(description='Amalgamate Tool', epilog=f'example: {sys.argv[0]} -r ../my/path -r ../other/path -o out.c in.c') 205*01826a49SYabin Cuiparser.add_argument('-r', '--root', action='append', type=Path, help='file root search path') 206*01826a49SYabin Cuiparser.add_argument('-x', '--exclude', action='append', help='file to completely exclude from inlining') 207*01826a49SYabin Cuiparser.add_argument('-k', '--keep', action='append', help='file to exclude from inlining but keep the include directive') 208*01826a49SYabin Cuiparser.add_argument('-p', '--pragma', action='store_true', default=False, help='keep any "#pragma once" directives (removed by default)') 209*01826a49SYabin Cuiparser.add_argument('-o', '--output', type=argparse.FileType('w'), help='output file (otherwise stdout)') 210*01826a49SYabin Cuiparser.add_argument('input', type=Path, help='input file') 211*01826a49SYabin Cuiargs = parser.parse_args() 212*01826a49SYabin Cui 213*01826a49SYabin Cui# Fail early on an invalid input (and store it so we don't recurse) 214*01826a49SYabin Cuiargs.input = args.input.resolve(strict=True) 215*01826a49SYabin Cuifound.add(args.input) 216*01826a49SYabin Cui 217*01826a49SYabin Cui# Resolve all of the root paths upfront (we'll halt here on invalid roots) 218*01826a49SYabin Cuiif (args.root): 219*01826a49SYabin Cui for path in args.root: 220*01826a49SYabin Cui roots.add(path.resolve(strict=True)) 221*01826a49SYabin Cui 222*01826a49SYabin Cui# The remaining params: so resolve the excluded files and #pragma once directive 223*01826a49SYabin Cuiresolve_excluded_files(args.exclude, excludes, args.input.parent) 224*01826a49SYabin Cuiresolve_excluded_files(args.keep, keeps, args.input.parent) 225*01826a49SYabin Cuikeep_pragma = args.pragma; 226*01826a49SYabin Cui 227*01826a49SYabin Cui# Then recursively process the input file 228*01826a49SYabin Cuitry: 229*01826a49SYabin Cui if (args.output): 230*01826a49SYabin Cui destn = args.output 231*01826a49SYabin Cui add_file(args.input) 232*01826a49SYabin Cuifinally: 233*01826a49SYabin Cui if (destn): 234*01826a49SYabin Cui destn.close() 235