Created
May 17, 2024 18:20
-
-
Save sr105/c99cc6e85a45e9e0fdaee19d6c70af08 to your computer and use it in GitHub Desktop.
Python method to keep two folders in sync (in one direction)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from pathlib import Path | |
import shutil | |
# I needed a utilty on Windows to sync log files from a very slow network drive to a local drive | |
# for faster analysis. I'm not sure why Windows doesn't support this. This is pretty quick. I run | |
# just before analysis, and it grabs all the newer files and any that have changed. | |
def sync_dirs(src:Path, dst:Path) -> None: | |
"""Sync files from src to dst skipping files with same name, size, and modified time.""" | |
print(f"Sync Dirs:") | |
print(f"\tSrc: {src.as_posix()}") | |
print(f"\tDst: {dst.as_posix()}") | |
def metadata_match(stat1:os.stat_result, stat2:os.stat_result) -> bool: | |
# I'm on Windows, and it can't sync atime and ctime. | |
return (stat1.st_size, stat1.st_mtime) == (stat2.st_size, stat2.st_mtime) | |
# It is *so* much faster to use os.scandir(): https://stackoverflow.com/a/2485843/47078 | |
src_files = ((d.name, d.stat()) for d in os.scandir(src) if d.is_file()) | |
for name, stat in src_files: | |
dest = dst.joinpath(name) | |
if dest.exists() and metadata_match(stat, dest.stat()): | |
continue | |
print(f"\t{name}") | |
shutil.copy2(src.joinpath(name), dest) | |
if __name__ == "__main__": | |
src = Path('I:/slow/network/path') | |
dst = Path('C:/fast/local/path') | |
sync_dirs(src, dst) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment