xref: /aosp_15_r20/external/bcc/scripts/git-clang-format (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env python
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===------------------------------------------------------------------------===#
10
11r"""
12clang-format git integration
13============================
14
15This file provides a clang-format integration for git. Put it somewhere in your
16path and ensure that it is executable. Then, "git clang-format" will invoke
17clang-format on the changes in current files or a specific commit.
18
19For further details, run:
20git clang-format -h
21
22Requires Python 2.7 or Python 3
23"""
24
25from __future__ import absolute_import, division, print_function
26import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
36
37desc = '''
38If zero or one commits are given, run clang-format on all lines that differ
39between the working directory and <commit>, which defaults to HEAD.  Changes are
40only applied to the working directory.
41
42If two commits are given (requires --diff), run clang-format on all lines in the
43second <commit> that differ from the first <commit>.
44
45The following git-config settings set the default of the corresponding option:
46  clangFormat.binary
47  clangFormat.commit
48  clangFormat.extension
49  clangFormat.style
50'''
51
52# Name of the temporary index file in which save the output of clang-format.
53# This file is created within the .git directory.
54temp_index_basename = 'clang-format-index'
55
56
57Range = collections.namedtuple('Range', 'start, count')
58
59
60def main():
61  config = load_git_config()
62
63  # In order to keep '--' yet allow options after positionals, we need to
64  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
65  # nargs=argparse.REMAINDER disallows options after positionals.)
66  argv = sys.argv[1:]
67  try:
68    idx = argv.index('--')
69  except ValueError:
70    dash_dash = []
71  else:
72    dash_dash = argv[idx:]
73    argv = argv[:idx]
74
75  default_extensions = ','.join([
76      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
77      'c', 'h',  # C
78      'm',  # ObjC
79      'mm',  # ObjC++
80      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx',  # C++
81      'cu',  # CUDA
82      # Other languages that clang-format supports
83      'proto', 'protodevel',  # Protocol Buffers
84      'java',  # Java
85      'js',  # JavaScript
86      'ts',  # TypeScript
87      'cs',  # C Sharp
88      ])
89
90  p = argparse.ArgumentParser(
91    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
92    description=desc)
93  p.add_argument('--binary',
94                 default=config.get('clangformat.binary', 'clang-format'),
95                 help='path to clang-format'),
96  p.add_argument('--commit',
97                 default=config.get('clangformat.commit', 'HEAD'),
98                 help='default commit to use if none is specified'),
99  p.add_argument('--diff', action='store_true',
100                 help='print a diff instead of applying the changes')
101  p.add_argument('--extensions',
102                 default=config.get('clangformat.extensions',
103                                    default_extensions),
104                 help=('comma-separated list of file extensions to format, '
105                       'excluding the period and case-insensitive')),
106  p.add_argument('-f', '--force', action='store_true',
107                 help='allow changes to unstaged files')
108  p.add_argument('-p', '--patch', action='store_true',
109                 help='select hunks interactively')
110  p.add_argument('-q', '--quiet', action='count', default=0,
111                 help='print less information')
112  p.add_argument('--style',
113                 default=config.get('clangformat.style', None),
114                 help='passed to clang-format'),
115  p.add_argument('-v', '--verbose', action='count', default=0,
116                 help='print extra information')
117  # We gather all the remaining positional arguments into 'args' since we need
118  # to use some heuristics to determine whether or not <commit> was present.
119  # However, to print pretty messages, we make use of metavar and help.
120  p.add_argument('args', nargs='*', metavar='<commit>',
121                 help='revision from which to compute the diff')
122  p.add_argument('ignored', nargs='*', metavar='<file>...',
123                 help='if specified, only consider differences in these files')
124  opts = p.parse_args(argv)
125
126  opts.verbose -= opts.quiet
127  del opts.quiet
128
129  commits, files = interpret_args(opts.args, dash_dash, opts.commit)
130  if len(commits) > 1:
131    if not opts.diff:
132      die('--diff is required when two commits are given')
133  else:
134    if len(commits) > 2:
135      die('at most two commits allowed; %d given' % len(commits))
136  changed_lines = compute_diff_and_extract_lines(commits, files)
137  if opts.verbose >= 1:
138    ignored_files = set(changed_lines)
139  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
140  if opts.verbose >= 1:
141    ignored_files.difference_update(changed_lines)
142    if ignored_files:
143      print('Ignoring changes in the following files (wrong extension):')
144      for filename in ignored_files:
145        print('    %s' % filename)
146    if changed_lines:
147      print('Running clang-format on the following files:')
148      for filename in changed_lines:
149        print('    %s' % filename)
150  if not changed_lines:
151    print('no modified files to format')
152    return
153  # The computed diff outputs absolute paths, so we must cd before accessing
154  # those files.
155  cd_to_toplevel()
156  if len(commits) > 1:
157    old_tree = commits[1]
158    new_tree = run_clang_format_and_save_to_tree(changed_lines,
159                                                 revision=commits[1],
160                                                 binary=opts.binary,
161                                                 style=opts.style)
162  else:
163    old_tree = create_tree_from_workdir(changed_lines)
164    new_tree = run_clang_format_and_save_to_tree(changed_lines,
165                                                 binary=opts.binary,
166                                                 style=opts.style)
167  if opts.verbose >= 1:
168    print('old tree: %s' % old_tree)
169    print('new tree: %s' % new_tree)
170  if old_tree == new_tree:
171    if opts.verbose >= 0:
172      print('clang-format did not modify any files')
173  elif opts.diff:
174    print_diff(old_tree, new_tree)
175  else:
176    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
177                                  patch_mode=opts.patch)
178    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
179      print('changed files:')
180      for filename in changed_files:
181        print('    %s' % filename)
182
183
184def load_git_config(non_string_options=None):
185  """Return the git configuration as a dictionary.
186
187  All options are assumed to be strings unless in `non_string_options`, in which
188  is a dictionary mapping option name (in lower case) to either "--bool" or
189  "--int"."""
190  if non_string_options is None:
191    non_string_options = {}
192  out = {}
193  for entry in run('git', 'config', '--list', '--null').split('\0'):
194    if entry:
195      name, value = entry.split('\n', 1)
196      if name in non_string_options:
197        value = run('git', 'config', non_string_options[name], name)
198      out[name] = value
199  return out
200
201
202def interpret_args(args, dash_dash, default_commit):
203  """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
204
205  It is assumed that "--" and everything that follows has been removed from
206  args and placed in `dash_dash`.
207
208  If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
209  left (if present) are taken as commits.  Otherwise, the arguments are checked
210  from left to right if they are commits or files.  If commits are not given,
211  a list with `default_commit` is used."""
212  if dash_dash:
213    if len(args) == 0:
214      commits = [default_commit]
215    else:
216      commits = args
217    for commit in commits:
218      object_type = get_object_type(commit)
219      if object_type not in ('commit', 'tag'):
220        if object_type is None:
221          die("'%s' is not a commit" % commit)
222        else:
223          die("'%s' is a %s, but a commit was expected" % (commit, object_type))
224    files = dash_dash[1:]
225  elif args:
226    commits = []
227    while args:
228      if not disambiguate_revision(args[0]):
229        break
230      commits.append(args.pop(0))
231    if not commits:
232      commits = [default_commit]
233    files = args
234  else:
235    commits = [default_commit]
236    files = []
237  return commits, files
238
239
240def disambiguate_revision(value):
241  """Returns True if `value` is a revision, False if it is a file, or dies."""
242  # If `value` is ambiguous (neither a commit nor a file), the following
243  # command will die with an appropriate error message.
244  run('git', 'rev-parse', value, verbose=False)
245  object_type = get_object_type(value)
246  if object_type is None:
247    return False
248  if object_type in ('commit', 'tag'):
249    return True
250  die('`%s` is a %s, but a commit or filename was expected' %
251      (value, object_type))
252
253
254def get_object_type(value):
255  """Returns a string description of an object's type, or None if it is not
256  a valid git object."""
257  cmd = ['git', 'cat-file', '-t', value]
258  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
259  stdout, stderr = p.communicate()
260  if p.returncode != 0:
261    return None
262  return convert_string(stdout.strip())
263
264
265def compute_diff_and_extract_lines(commits, files):
266  """Calls compute_diff() followed by extract_lines()."""
267  diff_process = compute_diff(commits, files)
268  changed_lines = extract_lines(diff_process.stdout)
269  diff_process.stdout.close()
270  diff_process.wait()
271  if diff_process.returncode != 0:
272    # Assume error was already printed to stderr.
273    sys.exit(2)
274  return changed_lines
275
276
277def compute_diff(commits, files):
278  """Return a subprocess object producing the diff from `commits`.
279
280  The return value's `stdin` file object will produce a patch with the
281  differences between the working directory and the first commit if a single
282  one was specified, or the difference between both specified commits, filtered
283  on `files` (if non-empty).  Zero context lines are used in the patch."""
284  git_tool = 'diff-index'
285  if len(commits) > 1:
286    git_tool = 'diff-tree'
287  cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
288  cmd.extend(files)
289  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
290  p.stdin.close()
291  return p
292
293
294def extract_lines(patch_file):
295  """Extract the changed lines in `patch_file`.
296
297  The return value is a dictionary mapping filename to a list of (start_line,
298  line_count) pairs.
299
300  The input must have been produced with ``-U0``, meaning unidiff format with
301  zero lines of context.  The return value is a dict mapping filename to a
302  list of line `Range`s."""
303  matches = {}
304  for line in patch_file:
305    line = convert_string(line)
306    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
307    if match:
308      filename = match.group(1).rstrip('\r\n')
309    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
310    if match:
311      start_line = int(match.group(1))
312      line_count = 1
313      if match.group(3):
314        line_count = int(match.group(3))
315      if line_count > 0:
316        matches.setdefault(filename, []).append(Range(start_line, line_count))
317  return matches
318
319
320def filter_by_extension(dictionary, allowed_extensions):
321  """Delete every key in `dictionary` that doesn't have an allowed extension.
322
323  `allowed_extensions` must be a collection of lowercase file extensions,
324  excluding the period."""
325  allowed_extensions = frozenset(allowed_extensions)
326  for filename in list(dictionary.keys()):
327    base_ext = filename.rsplit('.', 1)
328    if len(base_ext) == 1 and '' in allowed_extensions:
329        continue
330    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
331      del dictionary[filename]
332
333
334def cd_to_toplevel():
335  """Change to the top level of the git repository."""
336  toplevel = run('git', 'rev-parse', '--show-toplevel')
337  os.chdir(toplevel)
338
339
340def create_tree_from_workdir(filenames):
341  """Create a new git tree with the given files from the working directory.
342
343  Returns the object ID (SHA-1) of the created tree."""
344  return create_tree(filenames, '--stdin')
345
346
347def run_clang_format_and_save_to_tree(changed_lines, revision=None,
348                                      binary='clang-format', style=None):
349  """Run clang-format on each file and save the result to a git tree.
350
351  Returns the object ID (SHA-1) of the created tree."""
352  def iteritems(container):
353      try:
354          return container.iteritems() # Python 2
355      except AttributeError:
356          return container.items() # Python 3
357  def index_info_generator():
358    for filename, line_ranges in iteritems(changed_lines):
359      if revision:
360        git_metadata_cmd = ['git', 'ls-tree',
361                            '%s:%s' % (revision, os.path.dirname(filename)),
362                            os.path.basename(filename)]
363        git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
364                                        stdout=subprocess.PIPE)
365        stdout = git_metadata.communicate()[0]
366        mode = oct(int(stdout.split()[0], 8))
367      else:
368        mode = oct(os.stat(filename).st_mode)
369      # Adjust python3 octal format so that it matches what git expects
370      if mode.startswith('0o'):
371          mode = '0' + mode[2:]
372      blob_id = clang_format_to_blob(filename, line_ranges,
373                                     revision=revision,
374                                     binary=binary,
375                                     style=style)
376      yield '%s %s\t%s' % (mode, blob_id, filename)
377  return create_tree(index_info_generator(), '--index-info')
378
379
380def create_tree(input_lines, mode):
381  """Create a tree object from the given input.
382
383  If mode is '--stdin', it must be a list of filenames.  If mode is
384  '--index-info' is must be a list of values suitable for "git update-index
385  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
386  is invalid."""
387  assert mode in ('--stdin', '--index-info')
388  cmd = ['git', 'update-index', '--add', '-z', mode]
389  with temporary_index_file():
390    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
391    for line in input_lines:
392      p.stdin.write(to_bytes('%s\0' % line))
393    p.stdin.close()
394    if p.wait() != 0:
395      die('`%s` failed' % ' '.join(cmd))
396    tree_id = run('git', 'write-tree')
397    return tree_id
398
399
400def clang_format_to_blob(filename, line_ranges, revision=None,
401                         binary='clang-format', style=None):
402  """Run clang-format on the given file and save the result to a git blob.
403
404  Runs on the file in `revision` if not None, or on the file in the working
405  directory if `revision` is None.
406
407  Returns the object ID (SHA-1) of the created blob."""
408  clang_format_cmd = [binary]
409  if style:
410    clang_format_cmd.extend(['-style='+style])
411  clang_format_cmd.extend([
412      '-lines=%s:%s' % (start_line, start_line+line_count-1)
413      for start_line, line_count in line_ranges])
414  if revision:
415    clang_format_cmd.extend(['-assume-filename='+filename])
416    git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
417    git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
418                                stdout=subprocess.PIPE)
419    git_show.stdin.close()
420    clang_format_stdin = git_show.stdout
421  else:
422    clang_format_cmd.extend([filename])
423    git_show = None
424    clang_format_stdin = subprocess.PIPE
425  try:
426    clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
427                                    stdout=subprocess.PIPE)
428    if clang_format_stdin == subprocess.PIPE:
429      clang_format_stdin = clang_format.stdin
430  except OSError as e:
431    if e.errno == errno.ENOENT:
432      die('cannot find executable "%s"' % binary)
433    else:
434      raise
435  clang_format_stdin.close()
436  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
437  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
438                                 stdout=subprocess.PIPE)
439  clang_format.stdout.close()
440  stdout = hash_object.communicate()[0]
441  if hash_object.returncode != 0:
442    die('`%s` failed' % ' '.join(hash_object_cmd))
443  if clang_format.wait() != 0:
444    die('`%s` failed' % ' '.join(clang_format_cmd))
445  if git_show and git_show.wait() != 0:
446    die('`%s` failed' % ' '.join(git_show_cmd))
447  return convert_string(stdout).rstrip('\r\n')
448
449
450@contextlib.contextmanager
451def temporary_index_file(tree=None):
452  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
453  the file afterward."""
454  index_path = create_temporary_index(tree)
455  old_index_path = os.environ.get('GIT_INDEX_FILE')
456  os.environ['GIT_INDEX_FILE'] = index_path
457  try:
458    yield
459  finally:
460    if old_index_path is None:
461      del os.environ['GIT_INDEX_FILE']
462    else:
463      os.environ['GIT_INDEX_FILE'] = old_index_path
464    os.remove(index_path)
465
466
467def create_temporary_index(tree=None):
468  """Create a temporary index file and return the created file's path.
469
470  If `tree` is not None, use that as the tree to read in.  Otherwise, an
471  empty index is created."""
472  gitdir = run('git', 'rev-parse', '--git-dir')
473  path = os.path.join(gitdir, temp_index_basename)
474  if tree is None:
475    tree = '--empty'
476  run('git', 'read-tree', '--index-output='+path, tree)
477  return path
478
479
480def print_diff(old_tree, new_tree):
481  """Print the diff between the two trees to stdout."""
482  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
483  # is expected to be viewed by the user, and only the former does nice things
484  # like color and pagination.
485  #
486  # We also only print modified files since `new_tree` only contains the files
487  # that were modified, so unmodified files would show as deleted without the
488  # filter.
489  subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
490                         '--'])
491
492
493def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
494  """Apply the changes in `new_tree` to the working directory.
495
496  Bails if there are local changes in those files and not `force`.  If
497  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
498  changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
499                      '--name-only', old_tree,
500                      new_tree).rstrip('\0').split('\0')
501  if not force:
502    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
503    if unstaged_files:
504      print('The following files would be modified but '
505                'have unstaged changes:', file=sys.stderr)
506      print(unstaged_files, file=sys.stderr)
507      print('Please commit, stage, or stash them first.', file=sys.stderr)
508      sys.exit(2)
509  if patch_mode:
510    # In patch mode, we could just as well create an index from the new tree
511    # and checkout from that, but then the user will be presented with a
512    # message saying "Discard ... from worktree".  Instead, we use the old
513    # tree as the index and checkout from new_tree, which gives the slightly
514    # better message, "Apply ... to index and worktree".  This is not quite
515    # right, since it won't be applied to the user's index, but oh well.
516    with temporary_index_file(old_tree):
517      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
518    index_tree = old_tree
519  else:
520    with temporary_index_file(new_tree):
521      run('git', 'checkout-index', '-a', '-f')
522  return changed_files
523
524
525def run(*args, **kwargs):
526  stdin = kwargs.pop('stdin', '')
527  verbose = kwargs.pop('verbose', True)
528  strip = kwargs.pop('strip', True)
529  for name in kwargs:
530    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
531  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
532                       stdin=subprocess.PIPE)
533  stdout, stderr = p.communicate(input=stdin)
534
535  stdout = convert_string(stdout)
536  stderr = convert_string(stderr)
537
538  if p.returncode == 0:
539    if stderr:
540      if verbose:
541        print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
542      print(stderr.rstrip(), file=sys.stderr)
543    if strip:
544      stdout = stdout.rstrip('\r\n')
545    return stdout
546  if verbose:
547    print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
548  if stderr:
549    print(stderr.rstrip(), file=sys.stderr)
550  sys.exit(2)
551
552
553def die(message):
554  print('error:', message, file=sys.stderr)
555  sys.exit(2)
556
557
558def to_bytes(str_input):
559    # Encode to UTF-8 to get binary data.
560    if isinstance(str_input, bytes):
561        return str_input
562    return str_input.encode('utf-8')
563
564
565def to_string(bytes_input):
566    if isinstance(bytes_input, str):
567        return bytes_input
568    return bytes_input.encode('utf-8')
569
570
571def convert_string(bytes_input):
572    try:
573        return to_string(bytes_input.decode('utf-8'))
574    except AttributeError: # 'str' object has no attribute 'decode'.
575        return str(bytes_input)
576    except UnicodeError:
577        return str(bytes_input)
578
579if __name__ == '__main__':
580  main()
581