1#!/usr/bin/env python 2# 3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9#===------------------------------------------------------------------------===# 10 11r""" 12clang-format git integration 13============================ 14 15This file provides a clang-format integration for git. Put it somewhere in your 16path and ensure that it is executable. Then, "git clang-format" will invoke 17clang-format on the changes in current files or a specific commit. 18 19For further details, run: 20git clang-format -h 21 22Requires Python 2.7 or Python 3 23""" 24 25from __future__ import absolute_import, division, print_function 26import argparse 27import collections 28import contextlib 29import errno 30import os 31import re 32import subprocess 33import sys 34 35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]' 36 37desc = ''' 38If zero or one commits are given, run clang-format on all lines that differ 39between the working directory and <commit>, which defaults to HEAD. Changes are 40only applied to the working directory. 41 42If two commits are given (requires --diff), run clang-format on all lines in the 43second <commit> that differ from the first <commit>. 44 45The following git-config settings set the default of the corresponding option: 46 clangFormat.binary 47 clangFormat.commit 48 clangFormat.extension 49 clangFormat.style 50''' 51 52# Name of the temporary index file in which save the output of clang-format. 53# This file is created within the .git directory. 54temp_index_basename = 'clang-format-index' 55 56 57Range = collections.namedtuple('Range', 'start, count') 58 59 60def main(): 61 config = load_git_config() 62 63 # In order to keep '--' yet allow options after positionals, we need to 64 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 65 # nargs=argparse.REMAINDER disallows options after positionals.) 66 argv = sys.argv[1:] 67 try: 68 idx = argv.index('--') 69 except ValueError: 70 dash_dash = [] 71 else: 72 dash_dash = argv[idx:] 73 argv = argv[:idx] 74 75 default_extensions = ','.join([ 76 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 77 'c', 'h', # C 78 'm', # ObjC 79 'mm', # ObjC++ 80 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', # C++ 81 'cu', # CUDA 82 # Other languages that clang-format supports 83 'proto', 'protodevel', # Protocol Buffers 84 'java', # Java 85 'js', # JavaScript 86 'ts', # TypeScript 87 'cs', # C Sharp 88 ]) 89 90 p = argparse.ArgumentParser( 91 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 92 description=desc) 93 p.add_argument('--binary', 94 default=config.get('clangformat.binary', 'clang-format'), 95 help='path to clang-format'), 96 p.add_argument('--commit', 97 default=config.get('clangformat.commit', 'HEAD'), 98 help='default commit to use if none is specified'), 99 p.add_argument('--diff', action='store_true', 100 help='print a diff instead of applying the changes') 101 p.add_argument('--extensions', 102 default=config.get('clangformat.extensions', 103 default_extensions), 104 help=('comma-separated list of file extensions to format, ' 105 'excluding the period and case-insensitive')), 106 p.add_argument('-f', '--force', action='store_true', 107 help='allow changes to unstaged files') 108 p.add_argument('-p', '--patch', action='store_true', 109 help='select hunks interactively') 110 p.add_argument('-q', '--quiet', action='count', default=0, 111 help='print less information') 112 p.add_argument('--style', 113 default=config.get('clangformat.style', None), 114 help='passed to clang-format'), 115 p.add_argument('-v', '--verbose', action='count', default=0, 116 help='print extra information') 117 # We gather all the remaining positional arguments into 'args' since we need 118 # to use some heuristics to determine whether or not <commit> was present. 119 # However, to print pretty messages, we make use of metavar and help. 120 p.add_argument('args', nargs='*', metavar='<commit>', 121 help='revision from which to compute the diff') 122 p.add_argument('ignored', nargs='*', metavar='<file>...', 123 help='if specified, only consider differences in these files') 124 opts = p.parse_args(argv) 125 126 opts.verbose -= opts.quiet 127 del opts.quiet 128 129 commits, files = interpret_args(opts.args, dash_dash, opts.commit) 130 if len(commits) > 1: 131 if not opts.diff: 132 die('--diff is required when two commits are given') 133 else: 134 if len(commits) > 2: 135 die('at most two commits allowed; %d given' % len(commits)) 136 changed_lines = compute_diff_and_extract_lines(commits, files) 137 if opts.verbose >= 1: 138 ignored_files = set(changed_lines) 139 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 140 if opts.verbose >= 1: 141 ignored_files.difference_update(changed_lines) 142 if ignored_files: 143 print('Ignoring changes in the following files (wrong extension):') 144 for filename in ignored_files: 145 print(' %s' % filename) 146 if changed_lines: 147 print('Running clang-format on the following files:') 148 for filename in changed_lines: 149 print(' %s' % filename) 150 if not changed_lines: 151 print('no modified files to format') 152 return 153 # The computed diff outputs absolute paths, so we must cd before accessing 154 # those files. 155 cd_to_toplevel() 156 if len(commits) > 1: 157 old_tree = commits[1] 158 new_tree = run_clang_format_and_save_to_tree(changed_lines, 159 revision=commits[1], 160 binary=opts.binary, 161 style=opts.style) 162 else: 163 old_tree = create_tree_from_workdir(changed_lines) 164 new_tree = run_clang_format_and_save_to_tree(changed_lines, 165 binary=opts.binary, 166 style=opts.style) 167 if opts.verbose >= 1: 168 print('old tree: %s' % old_tree) 169 print('new tree: %s' % new_tree) 170 if old_tree == new_tree: 171 if opts.verbose >= 0: 172 print('clang-format did not modify any files') 173 elif opts.diff: 174 print_diff(old_tree, new_tree) 175 else: 176 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 177 patch_mode=opts.patch) 178 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 179 print('changed files:') 180 for filename in changed_files: 181 print(' %s' % filename) 182 183 184def load_git_config(non_string_options=None): 185 """Return the git configuration as a dictionary. 186 187 All options are assumed to be strings unless in `non_string_options`, in which 188 is a dictionary mapping option name (in lower case) to either "--bool" or 189 "--int".""" 190 if non_string_options is None: 191 non_string_options = {} 192 out = {} 193 for entry in run('git', 'config', '--list', '--null').split('\0'): 194 if entry: 195 name, value = entry.split('\n', 1) 196 if name in non_string_options: 197 value = run('git', 'config', non_string_options[name], name) 198 out[name] = value 199 return out 200 201 202def interpret_args(args, dash_dash, default_commit): 203 """Interpret `args` as "[commits] [--] [files]" and return (commits, files). 204 205 It is assumed that "--" and everything that follows has been removed from 206 args and placed in `dash_dash`. 207 208 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its 209 left (if present) are taken as commits. Otherwise, the arguments are checked 210 from left to right if they are commits or files. If commits are not given, 211 a list with `default_commit` is used.""" 212 if dash_dash: 213 if len(args) == 0: 214 commits = [default_commit] 215 else: 216 commits = args 217 for commit in commits: 218 object_type = get_object_type(commit) 219 if object_type not in ('commit', 'tag'): 220 if object_type is None: 221 die("'%s' is not a commit" % commit) 222 else: 223 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 224 files = dash_dash[1:] 225 elif args: 226 commits = [] 227 while args: 228 if not disambiguate_revision(args[0]): 229 break 230 commits.append(args.pop(0)) 231 if not commits: 232 commits = [default_commit] 233 files = args 234 else: 235 commits = [default_commit] 236 files = [] 237 return commits, files 238 239 240def disambiguate_revision(value): 241 """Returns True if `value` is a revision, False if it is a file, or dies.""" 242 # If `value` is ambiguous (neither a commit nor a file), the following 243 # command will die with an appropriate error message. 244 run('git', 'rev-parse', value, verbose=False) 245 object_type = get_object_type(value) 246 if object_type is None: 247 return False 248 if object_type in ('commit', 'tag'): 249 return True 250 die('`%s` is a %s, but a commit or filename was expected' % 251 (value, object_type)) 252 253 254def get_object_type(value): 255 """Returns a string description of an object's type, or None if it is not 256 a valid git object.""" 257 cmd = ['git', 'cat-file', '-t', value] 258 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 259 stdout, stderr = p.communicate() 260 if p.returncode != 0: 261 return None 262 return convert_string(stdout.strip()) 263 264 265def compute_diff_and_extract_lines(commits, files): 266 """Calls compute_diff() followed by extract_lines().""" 267 diff_process = compute_diff(commits, files) 268 changed_lines = extract_lines(diff_process.stdout) 269 diff_process.stdout.close() 270 diff_process.wait() 271 if diff_process.returncode != 0: 272 # Assume error was already printed to stderr. 273 sys.exit(2) 274 return changed_lines 275 276 277def compute_diff(commits, files): 278 """Return a subprocess object producing the diff from `commits`. 279 280 The return value's `stdin` file object will produce a patch with the 281 differences between the working directory and the first commit if a single 282 one was specified, or the difference between both specified commits, filtered 283 on `files` (if non-empty). Zero context lines are used in the patch.""" 284 git_tool = 'diff-index' 285 if len(commits) > 1: 286 git_tool = 'diff-tree' 287 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--'] 288 cmd.extend(files) 289 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 290 p.stdin.close() 291 return p 292 293 294def extract_lines(patch_file): 295 """Extract the changed lines in `patch_file`. 296 297 The return value is a dictionary mapping filename to a list of (start_line, 298 line_count) pairs. 299 300 The input must have been produced with ``-U0``, meaning unidiff format with 301 zero lines of context. The return value is a dict mapping filename to a 302 list of line `Range`s.""" 303 matches = {} 304 for line in patch_file: 305 line = convert_string(line) 306 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 307 if match: 308 filename = match.group(1).rstrip('\r\n') 309 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 310 if match: 311 start_line = int(match.group(1)) 312 line_count = 1 313 if match.group(3): 314 line_count = int(match.group(3)) 315 if line_count > 0: 316 matches.setdefault(filename, []).append(Range(start_line, line_count)) 317 return matches 318 319 320def filter_by_extension(dictionary, allowed_extensions): 321 """Delete every key in `dictionary` that doesn't have an allowed extension. 322 323 `allowed_extensions` must be a collection of lowercase file extensions, 324 excluding the period.""" 325 allowed_extensions = frozenset(allowed_extensions) 326 for filename in list(dictionary.keys()): 327 base_ext = filename.rsplit('.', 1) 328 if len(base_ext) == 1 and '' in allowed_extensions: 329 continue 330 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 331 del dictionary[filename] 332 333 334def cd_to_toplevel(): 335 """Change to the top level of the git repository.""" 336 toplevel = run('git', 'rev-parse', '--show-toplevel') 337 os.chdir(toplevel) 338 339 340def create_tree_from_workdir(filenames): 341 """Create a new git tree with the given files from the working directory. 342 343 Returns the object ID (SHA-1) of the created tree.""" 344 return create_tree(filenames, '--stdin') 345 346 347def run_clang_format_and_save_to_tree(changed_lines, revision=None, 348 binary='clang-format', style=None): 349 """Run clang-format on each file and save the result to a git tree. 350 351 Returns the object ID (SHA-1) of the created tree.""" 352 def iteritems(container): 353 try: 354 return container.iteritems() # Python 2 355 except AttributeError: 356 return container.items() # Python 3 357 def index_info_generator(): 358 for filename, line_ranges in iteritems(changed_lines): 359 if revision: 360 git_metadata_cmd = ['git', 'ls-tree', 361 '%s:%s' % (revision, os.path.dirname(filename)), 362 os.path.basename(filename)] 363 git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE, 364 stdout=subprocess.PIPE) 365 stdout = git_metadata.communicate()[0] 366 mode = oct(int(stdout.split()[0], 8)) 367 else: 368 mode = oct(os.stat(filename).st_mode) 369 # Adjust python3 octal format so that it matches what git expects 370 if mode.startswith('0o'): 371 mode = '0' + mode[2:] 372 blob_id = clang_format_to_blob(filename, line_ranges, 373 revision=revision, 374 binary=binary, 375 style=style) 376 yield '%s %s\t%s' % (mode, blob_id, filename) 377 return create_tree(index_info_generator(), '--index-info') 378 379 380def create_tree(input_lines, mode): 381 """Create a tree object from the given input. 382 383 If mode is '--stdin', it must be a list of filenames. If mode is 384 '--index-info' is must be a list of values suitable for "git update-index 385 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 386 is invalid.""" 387 assert mode in ('--stdin', '--index-info') 388 cmd = ['git', 'update-index', '--add', '-z', mode] 389 with temporary_index_file(): 390 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 391 for line in input_lines: 392 p.stdin.write(to_bytes('%s\0' % line)) 393 p.stdin.close() 394 if p.wait() != 0: 395 die('`%s` failed' % ' '.join(cmd)) 396 tree_id = run('git', 'write-tree') 397 return tree_id 398 399 400def clang_format_to_blob(filename, line_ranges, revision=None, 401 binary='clang-format', style=None): 402 """Run clang-format on the given file and save the result to a git blob. 403 404 Runs on the file in `revision` if not None, or on the file in the working 405 directory if `revision` is None. 406 407 Returns the object ID (SHA-1) of the created blob.""" 408 clang_format_cmd = [binary] 409 if style: 410 clang_format_cmd.extend(['-style='+style]) 411 clang_format_cmd.extend([ 412 '-lines=%s:%s' % (start_line, start_line+line_count-1) 413 for start_line, line_count in line_ranges]) 414 if revision: 415 clang_format_cmd.extend(['-assume-filename='+filename]) 416 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)] 417 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE, 418 stdout=subprocess.PIPE) 419 git_show.stdin.close() 420 clang_format_stdin = git_show.stdout 421 else: 422 clang_format_cmd.extend([filename]) 423 git_show = None 424 clang_format_stdin = subprocess.PIPE 425 try: 426 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin, 427 stdout=subprocess.PIPE) 428 if clang_format_stdin == subprocess.PIPE: 429 clang_format_stdin = clang_format.stdin 430 except OSError as e: 431 if e.errno == errno.ENOENT: 432 die('cannot find executable "%s"' % binary) 433 else: 434 raise 435 clang_format_stdin.close() 436 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 437 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 438 stdout=subprocess.PIPE) 439 clang_format.stdout.close() 440 stdout = hash_object.communicate()[0] 441 if hash_object.returncode != 0: 442 die('`%s` failed' % ' '.join(hash_object_cmd)) 443 if clang_format.wait() != 0: 444 die('`%s` failed' % ' '.join(clang_format_cmd)) 445 if git_show and git_show.wait() != 0: 446 die('`%s` failed' % ' '.join(git_show_cmd)) 447 return convert_string(stdout).rstrip('\r\n') 448 449 450@contextlib.contextmanager 451def temporary_index_file(tree=None): 452 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 453 the file afterward.""" 454 index_path = create_temporary_index(tree) 455 old_index_path = os.environ.get('GIT_INDEX_FILE') 456 os.environ['GIT_INDEX_FILE'] = index_path 457 try: 458 yield 459 finally: 460 if old_index_path is None: 461 del os.environ['GIT_INDEX_FILE'] 462 else: 463 os.environ['GIT_INDEX_FILE'] = old_index_path 464 os.remove(index_path) 465 466 467def create_temporary_index(tree=None): 468 """Create a temporary index file and return the created file's path. 469 470 If `tree` is not None, use that as the tree to read in. Otherwise, an 471 empty index is created.""" 472 gitdir = run('git', 'rev-parse', '--git-dir') 473 path = os.path.join(gitdir, temp_index_basename) 474 if tree is None: 475 tree = '--empty' 476 run('git', 'read-tree', '--index-output='+path, tree) 477 return path 478 479 480def print_diff(old_tree, new_tree): 481 """Print the diff between the two trees to stdout.""" 482 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 483 # is expected to be viewed by the user, and only the former does nice things 484 # like color and pagination. 485 # 486 # We also only print modified files since `new_tree` only contains the files 487 # that were modified, so unmodified files would show as deleted without the 488 # filter. 489 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree, 490 '--']) 491 492 493def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 494 """Apply the changes in `new_tree` to the working directory. 495 496 Bails if there are local changes in those files and not `force`. If 497 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 498 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z', 499 '--name-only', old_tree, 500 new_tree).rstrip('\0').split('\0') 501 if not force: 502 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 503 if unstaged_files: 504 print('The following files would be modified but ' 505 'have unstaged changes:', file=sys.stderr) 506 print(unstaged_files, file=sys.stderr) 507 print('Please commit, stage, or stash them first.', file=sys.stderr) 508 sys.exit(2) 509 if patch_mode: 510 # In patch mode, we could just as well create an index from the new tree 511 # and checkout from that, but then the user will be presented with a 512 # message saying "Discard ... from worktree". Instead, we use the old 513 # tree as the index and checkout from new_tree, which gives the slightly 514 # better message, "Apply ... to index and worktree". This is not quite 515 # right, since it won't be applied to the user's index, but oh well. 516 with temporary_index_file(old_tree): 517 subprocess.check_call(['git', 'checkout', '--patch', new_tree]) 518 index_tree = old_tree 519 else: 520 with temporary_index_file(new_tree): 521 run('git', 'checkout-index', '-a', '-f') 522 return changed_files 523 524 525def run(*args, **kwargs): 526 stdin = kwargs.pop('stdin', '') 527 verbose = kwargs.pop('verbose', True) 528 strip = kwargs.pop('strip', True) 529 for name in kwargs: 530 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 531 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 532 stdin=subprocess.PIPE) 533 stdout, stderr = p.communicate(input=stdin) 534 535 stdout = convert_string(stdout) 536 stderr = convert_string(stderr) 537 538 if p.returncode == 0: 539 if stderr: 540 if verbose: 541 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr) 542 print(stderr.rstrip(), file=sys.stderr) 543 if strip: 544 stdout = stdout.rstrip('\r\n') 545 return stdout 546 if verbose: 547 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr) 548 if stderr: 549 print(stderr.rstrip(), file=sys.stderr) 550 sys.exit(2) 551 552 553def die(message): 554 print('error:', message, file=sys.stderr) 555 sys.exit(2) 556 557 558def to_bytes(str_input): 559 # Encode to UTF-8 to get binary data. 560 if isinstance(str_input, bytes): 561 return str_input 562 return str_input.encode('utf-8') 563 564 565def to_string(bytes_input): 566 if isinstance(bytes_input, str): 567 return bytes_input 568 return bytes_input.encode('utf-8') 569 570 571def convert_string(bytes_input): 572 try: 573 return to_string(bytes_input.decode('utf-8')) 574 except AttributeError: # 'str' object has no attribute 'decode'. 575 return str(bytes_input) 576 except UnicodeError: 577 return str(bytes_input) 578 579if __name__ == '__main__': 580 main() 581