Created
May 8, 2021 19:59
-
-
Save ngbrown/577308a0bfbbd35a3c565f206011f8dd to your computer and use it in GitHub Desktop.
Filter for git-filter-repo to convert all text files to LF line endings, for example after a Mercurial conversion
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
This is a simple program that will run a linting program on all non-binary | |
files in history. It also rewrites commit hashes in commit messages to | |
refer to the new commits with the rewritten files. | |
See https://github.com/newren/git-filter-repo/issues/45 | |
and https://github.com/newren/git-filter-repo/blob/main/contrib/filter-repo-demos/lint-history | |
""" | |
""" | |
Please see the | |
***** API BACKWARD COMPATIBILITY CAVEAT ***** | |
near the top of git-filter-repo. | |
""" | |
import argparse | |
import os | |
import subprocess | |
import tempfile | |
try: | |
import git_filter_repo as fr | |
except ImportError: | |
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?") | |
example_text = '''CALLBACK | |
EXAMPLES | |
''' | |
parser = argparse.ArgumentParser(description='Run a program (e.g. code formatter or linter) on files in history', | |
epilog = example_text, | |
formatter_class=argparse.RawDescriptionHelpFormatter) | |
parser.add_argument('--replace-refs', default=None, | |
choices=['delete-no-add', 'delete-and-add', | |
'update-no-add', 'update-or-add', | |
'update-and-add']) | |
lint_args = parser.parse_args() | |
binary_extensions = [ | |
b".exe", b".pdf", b".bmp", b".cur", b".dll", b".doc", b".docx", b".ico", b".jpg", b".ocx", | |
b".xls", b".xlsx", b".png", b".gif", b".mp4", b".swf", b".o", b".od", b".gch", b".pch", | |
b".a", b".gz", b".zip", b".tar", b".bin" | |
] | |
def is_relevant(filename: bytearray): | |
just_filename = filename.rpartition(b"/")[2] | |
extension = b"".join(just_filename.rpartition(b".")[1:]).lower() | |
return not extension in binary_extensions | |
tmpdir = None | |
blobs_handled = {} | |
cat_file_process = None | |
def lint_with_real_filenames(commit, metadata): | |
for change in commit.file_changes: | |
if change.blob_id in blobs_handled: | |
change.blob_id = blobs_handled[change.blob_id] | |
elif change.type == b'D': | |
continue | |
elif not is_relevant(change.filename): | |
continue | |
else: | |
# Get the old blob contents | |
cat_file_process.stdin.write(change.blob_id + b'\n') | |
cat_file_process.stdin.flush() | |
objhash, objtype, objsize = cat_file_process.stdout.readline().split() | |
contents_plus_newline = cat_file_process.stdout.read(int(objsize)+1) | |
blob_data = contents_plus_newline[:-1] | |
# skip file if binary or if no windows newlines | |
if b"\0" in blob_data[0:8192] or blob_data.find(b"\r\n") == -1: | |
# Record as processed | |
blobs_handled[change.blob_id] = change.blob_id | |
continue | |
blob_data = blob_data.replace(b"\r\n", b"\n") | |
blob = fr.Blob(blob_data) | |
# Insert the new file into the filter's stream | |
filter.insert(blob) | |
# Record our handling of the blob and use it for this change | |
blobs_handled[change.blob_id] = blob.id | |
change.blob_id = blob.id | |
args = fr.FilteringOptions.default_options() | |
args.force = True | |
args.replace_refs = lint_args.replace_refs | |
tmpdir = tempfile.mkdtemp().encode() | |
cat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'], | |
stdin = subprocess.PIPE, | |
stdout = subprocess.PIPE) | |
filter = fr.RepoFilter(args, commit_callback=lint_with_real_filenames) | |
filter.run() | |
cat_file_process.stdin.close() | |
cat_file_process.wait() | |
#!/usr/bin/env python3 | |
""" | |
This is a simple program that will run a linting program on all non-binary | |
files in history. It also rewrites commit hashes in commit messages to | |
refer to the new commits with the rewritten files. | |
See https://github.com/newren/git-filter-repo/issues/45 | |
and https://github.com/newren/git-filter-repo/blob/main/contrib/filter-repo-demos/lint-history | |
""" | |
""" | |
Please see the | |
***** API BACKWARD COMPATIBILITY CAVEAT ***** | |
near the top of git-filter-repo. | |
""" | |
import argparse | |
import os | |
import subprocess | |
import tempfile | |
try: | |
import git_filter_repo as fr | |
except ImportError: | |
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?") | |
example_text = '''CALLBACK | |
EXAMPLES | |
''' | |
parser = argparse.ArgumentParser(description='Run a program (e.g. code formatter or linter) on files in history', | |
epilog = example_text, | |
formatter_class=argparse.RawDescriptionHelpFormatter) | |
parser.add_argument('--replace-refs', default=None, | |
choices=['delete-no-add', 'delete-and-add', | |
'update-no-add', 'update-or-add', | |
'update-and-add']) | |
lint_args = parser.parse_args() | |
binary_extensions = [ | |
b".exe", b".pdf", b".bmp", b".cur", b".dll", b".doc", b".docx", b".ico", b".jpg", b".ocx", | |
b".xls", b".xlsx", b".png", b".gif", b".mp4", b".swf", b".o", b".od", b".gch", b".pch", | |
b".a", b".gz", b".zip", b".tar", b".bin" | |
] | |
def is_relevant(filename: bytearray): | |
just_filename = filename.rpartition(b"/")[2] | |
extension = b"".join(just_filename.rpartition(b".")[1:]).lower() | |
return not extension in binary_extensions | |
tmpdir = None | |
blobs_handled = {} | |
cat_file_process = None | |
def lint_with_real_filenames(commit, metadata): | |
for change in commit.file_changes: | |
if change.blob_id in blobs_handled: | |
change.blob_id = blobs_handled[change.blob_id] | |
elif change.type == b'D': | |
continue | |
elif not is_relevant(change.filename): | |
continue | |
else: | |
# Get the old blob contents | |
cat_file_process.stdin.write(change.blob_id + b'\n') | |
cat_file_process.stdin.flush() | |
objhash, objtype, objsize = cat_file_process.stdout.readline().split() | |
contents_plus_newline = cat_file_process.stdout.read(int(objsize)+1) | |
blob_data = contents_plus_newline[:-1] | |
# skip file if binary or if no windows newlines | |
if b"\0" in blob_data[0:8192] or blob_data.find(b"\r\n") == -1: | |
# Record as processed | |
blobs_handled[change.blob_id] = change.blob_id | |
continue | |
blob_data = blob_data.replace(b"\r\n", b"\n") | |
blob = fr.Blob(blob_data) | |
# Insert the new file into the filter's stream | |
filter.insert(blob) | |
# Record our handling of the blob and use it for this change | |
blobs_handled[change.blob_id] = blob.id | |
change.blob_id = blob.id | |
args = fr.FilteringOptions.default_options() | |
args.force = True | |
args.replace_refs = lint_args.replace_refs | |
tmpdir = tempfile.mkdtemp().encode() | |
cat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'], | |
stdin = subprocess.PIPE, | |
stdout = subprocess.PIPE) | |
filter = fr.RepoFilter(args, commit_callback=lint_with_real_filenames) | |
filter.run() | |
cat_file_process.stdin.close() | |
cat_file_process.wait() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment