Last active
March 30, 2019 19:47
-
-
Save mossheim/cba31152d8757e957cbf013d36bee2a8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function, absolute_import | |
import difflib | |
import glob | |
import os | |
import re | |
import string | |
import subprocess | |
import sys | |
import threading | |
from argparse import ArgumentParser | |
############################################################################## | |
# | |
# Constants | |
# | |
CLANG_FORMAT_ACCEPTABLE_VERSIONS = ["7.0.0", "7.0.1", "8.0.0"] | |
FILES_REGEX = re.compile('\\.(cpp|hpp|h|c|m|mm)$') # all the extensions we format in SC (no JS!) | |
############################################################################## | |
def callo(args): | |
"""Call a program, and capture its output | |
""" | |
return subprocess.check_output(args) | |
class ClangFormat(object): | |
"""Class encapsulates finding a suitable copy of clang-format, | |
and linting/formating an individual file | |
""" | |
def __init__(self, cf_cmd): | |
self.cf_cmd = cf_cmd | |
self._validate_version() | |
def _validate_version(self): | |
cf_version = callo([self.cf_cmd, "--version"]) | |
for version in CLANG_FORMAT_ACCEPTABLE_VERSIONS: | |
if version in cf_version: | |
print("clang-format " + version + " found: " + cf_version) | |
return | |
raise ValueError("clang-format found, but incorrect version at " + | |
self.cf_cmd + " with version: " + cf_version + "\nAcceptable versions: " + | |
str(CLANG_FORMAT_ACCEPTABLE_VERSIONS)) | |
sys.exit(1) | |
def _lint(self, file_name, print_diff): | |
"""Check the specified file has the correct format | |
""" | |
with open(file_name, 'rb') as original_text: | |
original_file = original_text.read() | |
# Get formatted file as clang-format would format the file | |
formatted_file = callo([self.cf_cmd, "--style=file", file_name]) | |
if original_file != formatted_file: | |
if print_diff: | |
original_lines = original_file.splitlines() | |
formatted_lines = formatted_file.splitlines() | |
result = difflib.unified_diff(original_lines, formatted_lines) | |
print("ERROR: Found diff for " + file_name) | |
print("To fix formatting errors, run %s --style=file -i %s" % (self.cf_cmd, file_name)) | |
for line in result: | |
print(line.rstrip()) | |
return False | |
return True | |
def format(self, file_name): | |
"""Update the format of the specified file | |
""" | |
if self._lint(file_name, print_diff=False): | |
return True | |
# Update the file with clang-format | |
formatted = not subprocess.call([self.cf_cmd, "--style=file", "-i", file_name]) | |
# Version 3.8 generates files like foo.cpp~RF83372177.TMP when it formats foo.cpp | |
# on Windows, we must clean these up | |
if sys.platform == "win32": | |
glob_pattern = file_name + "*.TMP" | |
for fglob in glob.glob(glob_pattern): | |
os.unlink(fglob) | |
return formatted | |
def get_base_dir(): | |
"""Get the base directory for mongo repo. | |
This script assumes that it is running in buildscripts/, and uses | |
that to find the base directory. | |
""" | |
try: | |
return subprocess.check_output(['git', 'rev-parse', '--show-toplevel']).rstrip() | |
except: | |
print("This script must be running in a git repo") | |
sys.exit(2) | |
class Repo(object): | |
"""Class encapsulates all knowledge about a git repository, and its metadata | |
to run clang-format. | |
""" | |
def __init__(self, path): | |
self.path = path | |
self.root = self._get_root() | |
def _callgito(self, args): | |
"""Call git for this repository, and return the captured output | |
""" | |
# These two flags are the equivalent of -C in newer versions of Git | |
# but we use these to support versions pre 1.8.5 but it depends on the command | |
# and what the current directory is | |
return callo(['git', '--git-dir', os.path.join(self.path, ".git"), | |
'--work-tree', self.path] + args) | |
def _callgit(self, args, stdout=None): | |
"""Call git for this repository without capturing output | |
This is designed to be used when git returns non-zero exit codes. | |
""" | |
# These two flags are the equivalent of -C in newer versions of Git | |
# but we use these to support versions pre 1.8.5 but it depends on the command | |
# and what the current directory is | |
return subprocess.call(['git', '--git-dir', os.path.join(self.path, ".git"), | |
'--work-tree', self.path] + args, stdout=stdout) | |
def _get_local_dir(self, path): | |
"""Get a directory path relative to the git root directory | |
""" | |
if os.path.isabs(path): | |
return os.path.relpath(path, self.root) | |
return path | |
def get_candidates(self, candidates): | |
"""Get the set of candidate files to check by querying the repository | |
Returns the full path to the file for clang-format to consume. | |
""" | |
if candidates is not None and len(candidates) > 0: | |
candidates = [self._get_local_dir(f) for f in candidates] | |
valid_files = list(set(candidates).intersection(self.get_candidate_files())) | |
else: | |
valid_files = list(self.get_candidate_files()) | |
# Get the full file name here | |
valid_files = [os.path.normpath(os.path.join(self.root, f)) for f in valid_files] | |
return valid_files | |
def get_root(self): | |
return self.root | |
def _get_root(self): | |
gito = self._callgito(['rev-parse', '--show-toplevel']) | |
return gito.rstrip() | |
def _git_ls_files(self, cmd): | |
"""Run git-ls-files and filter the list of files to a valid candidate list | |
""" | |
gito = self._callgito(cmd) | |
# This allows us to pick all the interesting files | |
# in the mongo and mongo-enterprise repos | |
file_list = [line.rstrip() | |
for line in gito.splitlines() | |
if (line.startswith("jstests") or line.startswith("src")) | |
and not line.startswith("src/third_party")] | |
return [a for a in file_list if FILES_REGEX.search(a)] | |
def get_candidate_files(self): | |
"""Query git to get a list of all files in the repo to consider for analysis | |
""" | |
return self._git_ls_files(["ls-files", "--cached"]) | |
def get_working_tree_candidate_files(self): | |
"""Query git to get a list of all files in the working tree to consider for analysis | |
""" | |
return self._git_ls_files(["ls-files", "--cached", "--others"]) | |
def get_working_tree_candidates(self): | |
"""Get the set of candidate files to check by querying the repository | |
Returns the full path to the file for clang-format to consume. | |
""" | |
valid_files = list(self.get_working_tree_candidate_files()) | |
# Get the full file name here | |
return [os.path.normpath(os.path.join(self.root, f)) for f in valid_files] | |
def is_detached(self): | |
# symbolic-ref returns 1 if the repo is in a detached HEAD state | |
with open(os.devnull, 'w') as DEVNULL: | |
return self._callgit(["symbolic-ref", "--quiet", "HEAD"], stdout=DEVNULL) | |
def is_ancestor(self, parent, child): | |
# merge base returns 0 if parent is an ancestor of child | |
return not self._callgit(["merge-base", "--is-ancestor", parent, child]) | |
def is_commit(self, sha1): | |
# cat-file -e returns 0 if it is a valid hash | |
return not self._callgit(["cat-file", "-e", "%s^{commit}" % sha1]) | |
def is_working_tree_dirty(self): | |
# diff returns 1 if the working tree has local changes | |
return self._callgit(["diff", "--quiet"]) | |
def does_branch_exist(self, branch): | |
# rev-parse returns 0 if the branch exists | |
return not self._callgit(["rev-parse", "--verify", "--quiet", branch]) | |
def get_merge_base(self, commit): | |
return self._callgito(["merge-base", "HEAD", commit]).rstrip() | |
def get_branch_name(self): | |
"""Get the current branch name, short form | |
This returns "master", not "refs/head/master" | |
Will not work if the current branch is detached | |
""" | |
branch = self.rev_parse(["--abbrev-ref", "HEAD"]) | |
if branch == "HEAD": | |
raise ValueError("Branch is currently detached") | |
return branch | |
def add(self, command): return self._callgito(["add"] + command) | |
def checkout(self, command): return self._callgito(["checkout"] + command) | |
def commit(self, command): return self._callgito(["commit"] + command) | |
def diff(self, command): return self._callgito(["diff"] + command) | |
def log(self, command): return self._callgito(["log"] + command) | |
def rev_parse(self, command): return self._callgito(["rev-parse"] + command).rstrip() | |
def rm(self, command): return self._callgito(["rm"] + command) | |
def show(self, command): return self._callgito(["show"] + command) | |
def get_list_from_lines(lines): | |
""""Convert a string containing a series of lines into a list of strings | |
""" | |
return [line.rstrip() for line in lines.splitlines()] | |
def validate_repo_state(commit_before_reformat, commit_after_reformat, target_branch): | |
repo = Repo(get_base_dir()) | |
if not repo.is_commit(commit_before_reformat): | |
raise ValueError("Commit before reformat '%s' is not a valid commit in this repo" % | |
commit_before_reformat) | |
if not repo.is_commit(commit_after_reformat): | |
raise ValueError("Commit after reformat '%s' is not a valid commit in this repo" % | |
commit_after_reformat) | |
if not repo.is_ancestor(commit_before_reformat, commit_after_reformat): | |
raise ValueError(("Commit before reformat '%s' is not a valid ancestor of commit after" + | |
" reformat '%s' in this repo") % (commit_before_reformat, commit_after_reformat)) | |
if repo.is_detached(): | |
raise ValueError("You must not run this script in a detached HEAD state") | |
if repo.is_working_tree_dirty(): | |
raise ValueError("Your working tree has pending changes. You must have a clean working" + | |
" tree before proceeding.\n\nRun `git status` to see your pending changes, and then" + | |
" try `git stash save`, `git reset --hard`, `git submodule update` and/or committing" + | |
" your changes.") | |
merge_base = repo.get_merge_base(commit_before_reformat) | |
if not merge_base == repo.rev_parse([commit_before_reformat]): | |
raise ValueError(("Merge base is '%s'. Please rebase to '%s' and resolve all conflicts" + | |
" before running this script.\n\nTo interactively rebase, use `git rebase -i %s`") % | |
(merge_base, commit_before_reformat, commit_before_reformat)) | |
# We assume the target branch is master, it could be a different branch if needed for testing | |
merge_base = repo.get_merge_base(target_branch) | |
if not merge_base == repo.rev_parse([commit_before_reformat]): | |
raise ValueError("This branch appears to already have advanced too far through the merge process") | |
return repo | |
def get_branch_names(repo): | |
# Everything looks good so lets start going through all the commits | |
branch_name = repo.get_branch_name() | |
new_branch = branch_name + "-reformatted" | |
if repo.does_branch_exist(new_branch): | |
raise ValueError("The branch '%s' already exists. Please delete the branch '%s', or rename the current branch." % (new_branch, new_branch)) | |
return (branch_name, new_branch) | |
def reformat_branch(clang_format, commit_before_reformat, commit_after_reformat, target_branch): | |
"""Reformat a branch made before a clang-format run | |
""" | |
clang_format = ClangFormat(clang_format) | |
if os.getcwd() != get_base_dir(): | |
raise ValueError("reformat-branch must be run from the repo root") | |
repo = validate_repo_state(commit_before_reformat, commit_after_reformat, target_branch) | |
old_branch, new_branch = get_branch_names(repo) | |
commits = get_list_from_lines(repo.log(["--reverse", "--pretty=format:%H", "%s..HEAD" % commit_before_reformat])) | |
previous_commit_base = commit_after_reformat | |
# Go through all the commits the user made on the local branch and migrate to a new branch | |
# that is based on post_reformat commits instead | |
for idx, commit_hash in enumerate(commits): | |
print("--- Formatting " + commit_hash + (" (%s of %s)" % (idx + 1, len(commits)))) | |
repo.checkout(["--quiet", "--detach", commit_hash]) | |
deleted_files = [] | |
# Format each of the files by checking out just a single commit from the user's branch | |
commit_files = get_list_from_lines(repo.diff(["HEAD~", "--name-only"])) | |
for commit_file in commit_files: | |
# Format each file needed if it was not deleted | |
if not os.path.exists(commit_file): | |
print("\tSkipping file '%s' since it has been deleted in commit '%s'" % ( | |
commit_file, commit_hash)) | |
deleted_files.append(commit_file) | |
continue | |
if FILES_REGEX.search(commit_file): | |
clang_format.format(commit_file) | |
else: | |
print("\tSkipping file '%s' since it is not a file clang_format should format" % | |
commit_file) | |
# Check if anything needed reformatting, and if so amend the commit | |
if not repo.is_working_tree_dirty(): | |
print ("Commit %s needed no reformatting" % commit_hash) | |
else: | |
repo.commit(["--all", "--amend", "--no-edit"]) | |
# Rebase our new commit on top the post-reformat commit | |
previous_commit = repo.rev_parse(["HEAD"]) | |
# Checkout the new branch with the reformatted commits | |
# Note: we will not name as a branch until we are done with all commits on the local branch | |
repo.checkout(["--quiet", "--detach", previous_commit_base]) | |
# Copy each file from the reformatted commit on top of the post reformat | |
diff_files = get_list_from_lines(repo.diff(["%s~..%s" % (previous_commit, previous_commit), | |
"--name-only"])) | |
for diff_file in diff_files: | |
# If the file was deleted in the commit we are reformatting, we need to delete it again | |
if diff_file in deleted_files: | |
repo.rm([diff_file]) | |
continue | |
# The file has been added or modified, continue as normal | |
file_contents = repo.show(["%s:%s" % (previous_commit, diff_file)]) | |
root_dir = os.path.dirname(diff_file) | |
if root_dir and not os.path.exists(root_dir): | |
os.makedirs(root_dir) | |
with open(diff_file, "w+") as new_file: | |
new_file.write(file_contents) | |
repo.add([diff_file]) | |
# Create a new commit onto clang-formatted branch | |
repo.commit(["--reuse-message=%s" % previous_commit]) | |
previous_commit_base = repo.rev_parse(["HEAD"]) | |
# Create a new branch to mark the hashes we have been using | |
repo.checkout(["-b", new_branch]) | |
print("reformat-branch is done running.\n") | |
print("A copy of your branch has been made named '%s', and formatted with clang-format.\n" % new_branch) | |
print("The original branch has been left unchanged.") | |
print("If you have not just done so, the next step is to rebase the new branch on '%s'.\n" % target_branch) | |
print("To undo this, run `git checkout %s && git branch -D %s`" % (old_branch, new_branch)) | |
def main(): | |
"""Main entry point | |
""" | |
parser = ArgumentParser( | |
usage='''clang_format.py -b 3.10 ~OR~ clang_format.py tag-reformat-3.10 3.10 3.10' | |
PLEASE READ. | |
This script formats a branch past the great reformatting wall. It can be run two ways: | |
1. clang_format.py commit-right-before-reformat commit-after-reformat original-branch | |
2. clang_format.py -b 3.10 # or develop | |
The first usage is better if you think you may encounter extra merge conflicts. | |
This script requires: | |
- you have a clean working directory | |
- you have rebased your branch on commit-right-before-reformat (implicitly for the second usage) | |
- you have the branch currently checked out | |
- various other logical requirements for the rebase to work | |
If there is an issue, this script will most likely detect it and provide you with | |
commands to fix it. | |
''') | |
parser.add_argument("-c", "--clang-format", dest="clang_format", default='clang-format', | |
help='Command to use for clang-format') | |
parser.add_argument("-b", "--base", dest="base_branch", help='Tries to rebase on the tip of this' | |
+ ' branch given a base branch name (experimental). This should be the main branch the' | |
+ ' current branch is based on (3.10 or develop)') | |
parser.add_argument("commit1", help="commit immediately prior to reformat", nargs='?', default='') | |
parser.add_argument("commit2", help="commit after reformat", nargs='?', default='') | |
parser.add_argument("target", help="target branch name (likely 3.10 or develop)", nargs='?', default='') | |
args = sys.argv | |
options = parser.parse_args() | |
if not options.commit1 or not options.commit2 or not options.target: | |
if not options.base_branch: | |
parser.print_help() | |
sys.exit(2) | |
# TODO update this when formatting is done | |
if options.base_branch == '3.10': | |
options.commit1 = 'tag-reformat-3.10' | |
options.commit2 = options.target = 'format-3.10' | |
elif options.base_branch == 'develop': | |
options.commit1 = 'tag-reformat-develop' | |
options.commit2 = options.target = 'format-develop' | |
else: | |
print("Don't know how to use this base branch: %s. Try using the three-argument version of this script") | |
sys.exit(3) | |
try: | |
reformat_branch(options.clang_format, options.commit1, options.commit2, options.target) | |
except ValueError as ve: | |
print("*** ERROR:\n" + str(ve) + "\n") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment