Last active
October 8, 2016 13:09
-
-
Save larsxschneider/b587862d1044e42d9935b7646f6536a2 to your computer and use it in GitHub Desktop.
Scripts to help migrating source code to Git / Git LFS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # | |
| # Script to identify extensions in a Git repository that should go to LFS | |
| # | |
| import os | |
| cwd = os.getcwd() | |
| min_size_mb = 0.5 | |
| result = {} | |
| def is_binary(filename): | |
| """Return true if the given filename is binary. | |
| @raise EnvironmentError: if the file does not exist or cannot be accessed. | |
| @attention: found @ http://bytes.com/topic/python/answers/21222-determine-file-type-binary-text on 6/08/2010 | |
| @author: Trent Mick <[email protected]> | |
| @author: Jorge Orpinel <[email protected]>""" | |
| fin = open(filename, 'rb') | |
| try: | |
| CHUNKSIZE = 1024 | |
| while 1: | |
| chunk = fin.read(CHUNKSIZE) | |
| if '\0' in chunk: # found null byte | |
| return True | |
| if len(chunk) < CHUNKSIZE: | |
| break # done | |
| finally: | |
| fin.close() | |
| return False | |
| def add_file(ext, size_mb): | |
| if ext not in result: | |
| result[ext] = { 'count_large' : 0, 'size_large' : 0, 'count_all' : 0, 'size_all' : 0 } | |
| result[ext]['count_all'] = result[ext]['count_all'] + 1 | |
| result[ext]['size_all'] = result[ext]['size_all'] + size_mb | |
| if size_mb > min_size_mb: | |
| result[ext]['count_large'] = result[ext]['count_large'] + 1 | |
| result[ext]['size_large'] = result[ext]['size_large'] + size_mb | |
| if not 'max' in result[ext] or size_mb > result[ext]['max']: | |
| result[ext]['max'] = size_mb | |
| if not 'min' in result[ext] or size_mb < result[ext]['min']: | |
| result[ext]['min'] = size_mb | |
| def print_line(ext, share_large, count_large, count_all, size_all, min, max): | |
| print '{}{}{}{}{}{}{}'.format( | |
| ext.ljust(30), | |
| str(share_large).rjust(10), | |
| str(count_large).rjust(10), | |
| str(count_all).rjust(10), | |
| str(size_all).rjust(10), | |
| str(min).rjust(10), | |
| str(max).rjust(10) | |
| ) | |
| for root, dirs, files in os.walk(cwd): | |
| for basename in files: | |
| filename = os.path.join(root, basename) | |
| try: | |
| size_mb = float(os.path.getsize(filename)) / 1024 / 1024 | |
| if not filename.startswith(os.path.join(cwd, '.git')) and size_mb > 0: | |
| if is_binary(filename): | |
| file_type = "bin" | |
| else: | |
| file_type = "txt" | |
| ext = filename | |
| add_file('** all **', size_mb) | |
| while ext.find('.') >= 0: | |
| ext = ext[ext.find('.')+1:] | |
| if ext.find('.') <= 0: | |
| add_file(file_type + " - " + ext, size_mb) | |
| except Exception, e: | |
| print e | |
| print_line('Extension', 'LShare', 'LCount', 'Count', 'Size', 'Min', 'Max') | |
| print_line('-------','-------','-------','-------','-------','-------','-------') | |
| for ext in sorted(result.keys(), key=lambda x: result[x]['size_large'], reverse=True): | |
| if result[ext]['count_large'] > 0: | |
| large_share = 100*result[ext]['count_large']/result[ext]['count_all'] | |
| print_line( | |
| ext, | |
| str(large_share) + ' %', | |
| result[ext]['count_large'], | |
| result[ext]['count_all'], | |
| int(result[ext]['size_all']), | |
| int(result[ext]['min']), | |
| int(result[ext]['max']) | |
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # | |
| # Script that searches the entire history of a Git repository for | |
| # (potentially) unwanted directories. E.g. 3rd party directories, | |
| # temp, build or Perforce stream directories. | |
| # The script also prints the number of files under each directory | |
| # to see the impact on the Git tree. | |
| # | |
| DIRS=$(git log --all --name-only --pretty=format: \ | |
| | awk -F'[^/]*$' '{print $1}' \ | |
| | sort -u \ | |
| | grep -i \ | |
| -e 3p \ | |
| -e 3rd \ | |
| -e artifacts \ | |
| -e assemblies \ | |
| -e backup \ | |
| -e bin \ | |
| -e build \ | |
| -e components \ | |
| -e debug \ | |
| -e deploy \ | |
| -e generated \ | |
| -e install \ | |
| -e lib \ | |
| -e modules \ | |
| -e obj \ | |
| -e output \ | |
| -e packages \ | |
| -e party \ | |
| -e recycle.bin \ | |
| -e release \ | |
| -e resources \ | |
| -e streams \ | |
| -e temp \ | |
| -e third \ | |
| -e tmp \ | |
| -e tools \ | |
| -e util \ | |
| -e vendor \ | |
| -e x64 \ | |
| -e x86 \ | |
| ) | |
| IFS=$'\n' | |
| for i in $DIRS; do | |
| if [ -e "$i" ]; then | |
| FILE_COUNT=$(find "$i" -type f | wc -l) | |
| echo "$FILE_COUNT $i" | |
| else | |
| echo "99999 !! DELETED !! $i" | |
| fi | |
| done | sort -n |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment