Last active
March 15, 2018 14:20
-
-
Save AntonOsika/67bf6bccd7aede1a1d4d7d069ebd2cbb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import datetime | |
def excluded_datetimes(start_str): | |
""" | |
Creates regex matching every datetime chronologically before start_str. | |
Can be used to exclude files/folders with e.g. rsync file downloads. | |
""" | |
# Datetime format: | |
fmt = '%Y%m%d_%H%M%S' | |
in_len = len(start_str) | |
if (len(fmt) - in_len) % 2 == 0: | |
# Make input match datetime format if substring: | |
fmt = fmt[:in_len] | |
else: | |
print("start datetime given in wrong format") | |
return | |
start_dt = datetime.datetime.strptime(start_str, fmt) | |
excluded = [] | |
start_dt_c = start_dt | |
# Walk backwards in time and exclude everything in: [year 0 AC, start_str): | |
while start_dt_c.second > 0: | |
start_dt_c = start_dt_c - datetime.timedelta(seconds=1) | |
excluded.append(start_dt_c.strftime(fmt)) | |
while start_dt_c.minute > 0: | |
start_dt_c = start_dt_c - datetime.timedelta(minutes=1) | |
excluded.append(start_dt_c.strftime(fmt[:-2]) + '\d{2}') | |
while start_dt_c.hour > 0: | |
start_dt_c = start_dt_c - datetime.timedelta(hours=1) | |
excluded.append(start_dt_c.strftime(fmt[:-4]) + '\d{4}') | |
while start_dt_c.month == start_dt.month: | |
start_dt_c = start_dt_c - datetime.timedelta(days=1) | |
excluded.append(start_dt_c.strftime(fmt[:-7]) + '_\d{6}') | |
while start_dt_c.year == start_dt.year: | |
start_dt_c = start_dt_c - datetime.timedelta(weeks=1) | |
excluded.append(start_dt_c.strftime(fmt[:-9]) + '\d{2}_\d{6}') | |
excluded += [str(y) + '\d{4}_\d{6}' for y in range(2000, start_dt.year)] | |
excluded += ['1' + '\d{7}_\d{6}', '0' + '\d{7}_\d{6}'] | |
exclude_str = '.*|.*'.join(excluded) | |
exclude_str = '.*' + exclude_str + '.*' | |
return exclude_str | |
# If used to sync GCS with gsutil this file can be run: | |
# (In which case it should be renamed to e.g. rsync_selection.py) | |
import argparse | |
from subprocess import call | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('source') | |
parser.add_argument('dest') | |
parser.add_argument('start') | |
parser.add_argument('--exclude', default='') | |
flags = parser.parse_args() | |
start_str = flags.start | |
exclude_str = excluded_times(start_str) | |
if flags.exclude: | |
exclude_str += '|' + flags.exclude | |
cmd = 'gsutil -m rsync -r -x {} {} {}'.format(exclude_str, flags.source, flags.dest) | |
print("Runnning rsync command:") | |
print(cmd) | |
call(cmd.split()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment