1*760c253cSXin Li#!/usr/bin/env python3 2*760c253cSXin Li# -*- coding: utf-8 -*- 3*760c253cSXin Li# ===----------------------------------------------------------------------===## 4*760c253cSXin Li# 5*760c253cSXin Li# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6*760c253cSXin Li# See https://llvm.org/LICENSE.txt for license information. 7*760c253cSXin Li# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8*760c253cSXin Li# 9*760c253cSXin Li# ===----------------------------------------------------------------------===## 10*760c253cSXin Li# 11*760c253cSXin Li# !!!!!!!!!!!! NOTE !!!!!!!!!!!! 12*760c253cSXin Li# This is copied directly from upstream LLVM. Please make any changes upstream, 13*760c253cSXin Li# rather than to this file directly. Once changes are made there, you're free 14*760c253cSXin Li# to integrate them here. 15*760c253cSXin Li 16*760c253cSXin Li"""Checks for reverts of commits across a given git commit. 17*760c253cSXin Li 18*760c253cSXin LiTo clarify the meaning of 'across' with an example, if we had the following 19*760c253cSXin Licommit history (where `a -> b` notes that `b` is a direct child of `a`): 20*760c253cSXin Li 21*760c253cSXin Li123abc -> 223abc -> 323abc -> 423abc -> 523abc 22*760c253cSXin Li 23*760c253cSXin LiAnd where 423abc is a revert of 223abc, this revert is considered to be 'across' 24*760c253cSXin Li323abc. More generally, a revert A of a parent commit B is considered to be 25*760c253cSXin Li'across' a commit C if C is a parent of A and B is a parent of C. 26*760c253cSXin Li 27*760c253cSXin LiPlease note that revert detection in general is really difficult, since merge 28*760c253cSXin Liconflicts/etc always introduce _some_ amount of fuzziness. This script just 29*760c253cSXin Liuses a bundle of heuristics, and is bound to ignore / incorrectly flag some 30*760c253cSXin Lireverts. The hope is that it'll easily catch the vast majority (>90%) of them, 31*760c253cSXin Lithough. 32*760c253cSXin Li 33*760c253cSXin LiThis is designed to be used in one of two ways: an import in Python, or run 34*760c253cSXin Lidirectly from a shell. If you want to import this, the `find_reverts` 35*760c253cSXin Lifunction is the thing to look at. If you'd rather use this from a shell, have a 36*760c253cSXin Liusage example: 37*760c253cSXin Li 38*760c253cSXin Li``` 39*760c253cSXin Li./revert_checker.py c47f97169 origin/main origin/release/12.x 40*760c253cSXin Li``` 41*760c253cSXin Li 42*760c253cSXin LiThis checks for all reverts from the tip of origin/main to c47f97169, which are 43*760c253cSXin Liacross the latter. It then does the same for origin/release/12.x to c47f97169. 44*760c253cSXin LiDuplicate reverts discovered when walking both roots (origin/main and 45*760c253cSXin Liorigin/release/12.x) are deduplicated in output. 46*760c253cSXin Li""" 47*760c253cSXin Li 48*760c253cSXin Liimport argparse 49*760c253cSXin Liimport collections 50*760c253cSXin Liimport logging 51*760c253cSXin Liimport re 52*760c253cSXin Liimport subprocess 53*760c253cSXin Liimport sys 54*760c253cSXin Lifrom typing import Generator, Iterable, List, NamedTuple 55*760c253cSXin Li 56*760c253cSXin Li 57*760c253cSXin Liassert sys.version_info >= (3, 6), "Only Python 3.6+ is supported." 58*760c253cSXin Li 59*760c253cSXin Li# People are creative with their reverts, and heuristics are a bit difficult. 60*760c253cSXin Li# Like 90% of of reverts have "This reverts commit ${full_sha}". 61*760c253cSXin Li# Some lack that entirely, while others have many of them specified in ad-hoc 62*760c253cSXin Li# ways, while others use short SHAs and whatever. 63*760c253cSXin Li# 64*760c253cSXin Li# The 90% case is trivial to handle (and 100% free + automatic). The extra 10% 65*760c253cSXin Li# starts involving human intervention, which is probably not worth it for now. 66*760c253cSXin Li 67*760c253cSXin Li 68*760c253cSXin Lidef _try_parse_reverts_from_commit_message(commit_message: str) -> List[str]: 69*760c253cSXin Li if not commit_message: 70*760c253cSXin Li return [] 71*760c253cSXin Li 72*760c253cSXin Li results = re.findall( 73*760c253cSXin Li r"This reverts commit ([a-f0-9]{40})\b", commit_message 74*760c253cSXin Li ) 75*760c253cSXin Li 76*760c253cSXin Li first_line = commit_message.splitlines()[0] 77*760c253cSXin Li initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line) 78*760c253cSXin Li if initial_revert: 79*760c253cSXin Li results.append(initial_revert.group(1)) 80*760c253cSXin Li return results 81*760c253cSXin Li 82*760c253cSXin Li 83*760c253cSXin Lidef _stream_stdout(command: List[str]) -> Generator[str, None, None]: 84*760c253cSXin Li with subprocess.Popen( 85*760c253cSXin Li command, stdout=subprocess.PIPE, encoding="utf-8", errors="replace" 86*760c253cSXin Li ) as p: 87*760c253cSXin Li assert p.stdout is not None # for mypy's happiness. 88*760c253cSXin Li yield from p.stdout 89*760c253cSXin Li 90*760c253cSXin Li 91*760c253cSXin Lidef _resolve_sha(git_dir: str, sha: str) -> str: 92*760c253cSXin Li if len(sha) == 40: 93*760c253cSXin Li return sha 94*760c253cSXin Li 95*760c253cSXin Li return subprocess.check_output( 96*760c253cSXin Li ["git", "-C", git_dir, "rev-parse", sha], 97*760c253cSXin Li encoding="utf-8", 98*760c253cSXin Li stderr=subprocess.DEVNULL, 99*760c253cSXin Li ).strip() 100*760c253cSXin Li 101*760c253cSXin Li 102*760c253cSXin Li_LogEntry = NamedTuple( 103*760c253cSXin Li "_LogEntry", 104*760c253cSXin Li [ 105*760c253cSXin Li ("sha", str), 106*760c253cSXin Li ("commit_message", str), 107*760c253cSXin Li ], 108*760c253cSXin Li) 109*760c253cSXin Li 110*760c253cSXin Li 111*760c253cSXin Lidef _log_stream( 112*760c253cSXin Li git_dir: str, root_sha: str, end_at_sha: str 113*760c253cSXin Li) -> Iterable[_LogEntry]: 114*760c253cSXin Li sep = 50 * "<>" 115*760c253cSXin Li log_command = [ 116*760c253cSXin Li "git", 117*760c253cSXin Li "-C", 118*760c253cSXin Li git_dir, 119*760c253cSXin Li "log", 120*760c253cSXin Li "^" + end_at_sha, 121*760c253cSXin Li root_sha, 122*760c253cSXin Li "--format=" + sep + "%n%H%n%B%n", 123*760c253cSXin Li ] 124*760c253cSXin Li 125*760c253cSXin Li stdout_stream = iter(_stream_stdout(log_command)) 126*760c253cSXin Li 127*760c253cSXin Li # Find the next separator line. If there's nothing to log, it may not exist. 128*760c253cSXin Li # It might not be the first line if git feels complainy. 129*760c253cSXin Li found_commit_header = False 130*760c253cSXin Li for line in stdout_stream: 131*760c253cSXin Li if line.rstrip() == sep: 132*760c253cSXin Li found_commit_header = True 133*760c253cSXin Li break 134*760c253cSXin Li 135*760c253cSXin Li while found_commit_header: 136*760c253cSXin Li sha = next(stdout_stream, None) 137*760c253cSXin Li assert sha is not None, "git died?" 138*760c253cSXin Li sha = sha.rstrip() 139*760c253cSXin Li 140*760c253cSXin Li commit_message = [] 141*760c253cSXin Li 142*760c253cSXin Li found_commit_header = False 143*760c253cSXin Li for line in stdout_stream: 144*760c253cSXin Li line = line.rstrip() 145*760c253cSXin Li if line.rstrip() == sep: 146*760c253cSXin Li found_commit_header = True 147*760c253cSXin Li break 148*760c253cSXin Li commit_message.append(line) 149*760c253cSXin Li 150*760c253cSXin Li yield _LogEntry(sha, "\n".join(commit_message).rstrip()) 151*760c253cSXin Li 152*760c253cSXin Li 153*760c253cSXin Lidef _shas_between(git_dir: str, base_ref: str, head_ref: str) -> Iterable[str]: 154*760c253cSXin Li rev_list = [ 155*760c253cSXin Li "git", 156*760c253cSXin Li "-C", 157*760c253cSXin Li git_dir, 158*760c253cSXin Li "rev-list", 159*760c253cSXin Li "--first-parent", 160*760c253cSXin Li f"{base_ref}..{head_ref}", 161*760c253cSXin Li ] 162*760c253cSXin Li return (x.strip() for x in _stream_stdout(rev_list)) 163*760c253cSXin Li 164*760c253cSXin Li 165*760c253cSXin Lidef _rev_parse(git_dir: str, ref: str) -> str: 166*760c253cSXin Li return subprocess.check_output( 167*760c253cSXin Li ["git", "-C", git_dir, "rev-parse", ref], 168*760c253cSXin Li encoding="utf-8", 169*760c253cSXin Li ).strip() 170*760c253cSXin Li 171*760c253cSXin Li 172*760c253cSXin LiRevert = NamedTuple( 173*760c253cSXin Li "Revert", 174*760c253cSXin Li [ 175*760c253cSXin Li ("sha", str), 176*760c253cSXin Li ("reverted_sha", str), 177*760c253cSXin Li ], 178*760c253cSXin Li) 179*760c253cSXin Li 180*760c253cSXin Li 181*760c253cSXin Lidef _find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str: 182*760c253cSXin Li """Finds the closest common parent commit between `ref_a` and `ref_b`.""" 183*760c253cSXin Li return subprocess.check_output( 184*760c253cSXin Li ["git", "-C", git_dir, "merge-base", ref_a, ref_b], 185*760c253cSXin Li encoding="utf-8", 186*760c253cSXin Li ).strip() 187*760c253cSXin Li 188*760c253cSXin Li 189*760c253cSXin Lidef find_reverts(git_dir: str, across_ref: str, root: str) -> List[Revert]: 190*760c253cSXin Li """Finds reverts across `across_ref` in `git_dir`, starting from `root`. 191*760c253cSXin Li 192*760c253cSXin Li These reverts are returned in order of oldest reverts first. 193*760c253cSXin Li """ 194*760c253cSXin Li across_sha = _rev_parse(git_dir, across_ref) 195*760c253cSXin Li root_sha = _rev_parse(git_dir, root) 196*760c253cSXin Li 197*760c253cSXin Li common_ancestor = _find_common_parent_commit(git_dir, across_sha, root_sha) 198*760c253cSXin Li if common_ancestor != across_sha: 199*760c253cSXin Li raise ValueError( 200*760c253cSXin Li f"{across_sha} isn't an ancestor of {root_sha} " 201*760c253cSXin Li "(common ancestor: {common_ancestor})" 202*760c253cSXin Li ) 203*760c253cSXin Li 204*760c253cSXin Li intermediate_commits = set(_shas_between(git_dir, across_sha, root_sha)) 205*760c253cSXin Li assert across_sha not in intermediate_commits 206*760c253cSXin Li 207*760c253cSXin Li logging.debug( 208*760c253cSXin Li "%d commits appear between %s and %s", 209*760c253cSXin Li len(intermediate_commits), 210*760c253cSXin Li across_sha, 211*760c253cSXin Li root_sha, 212*760c253cSXin Li ) 213*760c253cSXin Li 214*760c253cSXin Li all_reverts = [] 215*760c253cSXin Li for sha, commit_message in _log_stream(git_dir, root_sha, across_sha): 216*760c253cSXin Li reverts = _try_parse_reverts_from_commit_message(commit_message) 217*760c253cSXin Li if not reverts: 218*760c253cSXin Li continue 219*760c253cSXin Li 220*760c253cSXin Li resolved_reverts = sorted( 221*760c253cSXin Li set(_resolve_sha(git_dir, x) for x in reverts) 222*760c253cSXin Li ) 223*760c253cSXin Li for reverted_sha in resolved_reverts: 224*760c253cSXin Li if reverted_sha in intermediate_commits: 225*760c253cSXin Li logging.debug( 226*760c253cSXin Li "Commit %s reverts %s, which happened after %s", 227*760c253cSXin Li sha, 228*760c253cSXin Li reverted_sha, 229*760c253cSXin Li across_sha, 230*760c253cSXin Li ) 231*760c253cSXin Li continue 232*760c253cSXin Li 233*760c253cSXin Li try: 234*760c253cSXin Li object_type = subprocess.check_output( 235*760c253cSXin Li ["git", "-C", git_dir, "cat-file", "-t", reverted_sha], 236*760c253cSXin Li encoding="utf-8", 237*760c253cSXin Li stderr=subprocess.DEVNULL, 238*760c253cSXin Li ).strip() 239*760c253cSXin Li except subprocess.CalledProcessError: 240*760c253cSXin Li logging.warning( 241*760c253cSXin Li "Failed to resolve reverted object %s (claimed to be reverted " 242*760c253cSXin Li "by sha %s)", 243*760c253cSXin Li reverted_sha, 244*760c253cSXin Li sha, 245*760c253cSXin Li ) 246*760c253cSXin Li continue 247*760c253cSXin Li 248*760c253cSXin Li if object_type == "commit": 249*760c253cSXin Li all_reverts.append(Revert(sha, reverted_sha)) 250*760c253cSXin Li continue 251*760c253cSXin Li 252*760c253cSXin Li logging.error( 253*760c253cSXin Li "%s claims to revert %s -- which isn't a commit -- %s", 254*760c253cSXin Li sha, 255*760c253cSXin Li object_type, 256*760c253cSXin Li reverted_sha, 257*760c253cSXin Li ) 258*760c253cSXin Li 259*760c253cSXin Li # Since `all_reverts` contains reverts in log order (e.g., newer comes before 260*760c253cSXin Li # older), we need to reverse this to keep with our guarantee of older = 261*760c253cSXin Li # earlier in the result. 262*760c253cSXin Li all_reverts.reverse() 263*760c253cSXin Li return all_reverts 264*760c253cSXin Li 265*760c253cSXin Li 266*760c253cSXin Lidef _main() -> None: 267*760c253cSXin Li parser = argparse.ArgumentParser( 268*760c253cSXin Li description=__doc__, 269*760c253cSXin Li formatter_class=argparse.RawDescriptionHelpFormatter, 270*760c253cSXin Li ) 271*760c253cSXin Li parser.add_argument( 272*760c253cSXin Li "base_ref", help="Git ref or sha to check for reverts around." 273*760c253cSXin Li ) 274*760c253cSXin Li parser.add_argument( 275*760c253cSXin Li "-C", "--git_dir", default=".", help="Git directory to use." 276*760c253cSXin Li ) 277*760c253cSXin Li parser.add_argument( 278*760c253cSXin Li "root", nargs="+", help="Root(s) to search for commits from." 279*760c253cSXin Li ) 280*760c253cSXin Li parser.add_argument("--debug", action="store_true") 281*760c253cSXin Li parser.add_argument( 282*760c253cSXin Li "-u", 283*760c253cSXin Li "--review_url", 284*760c253cSXin Li action="store_true", 285*760c253cSXin Li help="Format SHAs as llvm review URLs", 286*760c253cSXin Li ) 287*760c253cSXin Li opts = parser.parse_args() 288*760c253cSXin Li 289*760c253cSXin Li logging.basicConfig( 290*760c253cSXin Li format="%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s", 291*760c253cSXin Li level=logging.DEBUG if opts.debug else logging.INFO, 292*760c253cSXin Li ) 293*760c253cSXin Li 294*760c253cSXin Li # `root`s can have related history, so we want to filter duplicate commits 295*760c253cSXin Li # out. The overwhelmingly common case is also to have one root, and it's way 296*760c253cSXin Li # easier to reason about output that comes in an order that's meaningful to 297*760c253cSXin Li # git. 298*760c253cSXin Li seen_reverts = set() 299*760c253cSXin Li all_reverts = [] 300*760c253cSXin Li for root in opts.root: 301*760c253cSXin Li for revert in find_reverts(opts.git_dir, opts.base_ref, root): 302*760c253cSXin Li if revert not in seen_reverts: 303*760c253cSXin Li seen_reverts.add(revert) 304*760c253cSXin Li all_reverts.append(revert) 305*760c253cSXin Li 306*760c253cSXin Li for revert in all_reverts: 307*760c253cSXin Li sha_fmt = ( 308*760c253cSXin Li f"https://reviews.llvm.org/rG{revert.sha}" 309*760c253cSXin Li if opts.review_url 310*760c253cSXin Li else revert.sha 311*760c253cSXin Li ) 312*760c253cSXin Li reverted_sha_fmt = ( 313*760c253cSXin Li f"https://reviews.llvm.org/rG{revert.reverted_sha}" 314*760c253cSXin Li if opts.review_url 315*760c253cSXin Li else revert.reverted_sha 316*760c253cSXin Li ) 317*760c253cSXin Li print(f"{sha_fmt} claims to revert {reverted_sha_fmt}") 318*760c253cSXin Li 319*760c253cSXin Li 320*760c253cSXin Liif __name__ == "__main__": 321*760c253cSXin Li _main() 322