Created
December 12, 2023 01:43
-
-
Save pszemraj/41079a0123ed84b6bef5c2e68cac150b to your computer and use it in GitHub Desktop.
The script is designed to monitor a specified directory for any file system changes (like additions, deletions, or modifications of files and subdirectories) and automatically upload the changes to a specified repository on the Hugging Face Hub.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
The script is designed to monitor a specified directory for any file system changes (like additions, deletions, or modifications of files and subdirectories) and automatically upload the changes to a specified repository on the Hugging Face Hub. | |
pip install huggingface-hub watchdog | |
""" | |
import argparse | |
import logging | |
import time | |
from pathlib import Path | |
from typing import Optional | |
from huggingface_hub import upload_folder | |
from watchdog.events import FileSystemEventHandler | |
from watchdog.observers import Observer | |
def validate_inputs( | |
repo_id: str, folder_path: Path, path_in_repo: Optional[str] | |
) -> None: | |
""" | |
Validates the input parameters. | |
Args: | |
repo_id (str): The repository ID in 'username/repository' format. | |
folder_path (Path): The path to the folder that will be uploaded. | |
path_in_repo (Optional[str]): The path within the repository where the folder will be uploaded. | |
Raises: | |
ValueError: If the repo_id format is incorrect. | |
FileNotFoundError: If the folder_path does not exist or is not a directory. | |
ValueError: If path_in_repo starts or ends with '/'. | |
""" | |
if "/" not in repo_id: | |
raise ValueError( | |
"Invalid repo_id format. It should be in 'username/repository' format." | |
) | |
if not folder_path.exists() or not folder_path.is_dir(): | |
raise FileNotFoundError( | |
f"The folder path '{folder_path}' does not exist or is not a directory." | |
) | |
if path_in_repo and (path_in_repo.startswith("/") or path_in_repo.endswith("/")): | |
raise ValueError("path_in_repo should not start or end with '/'.") | |
def upload_to_huggingface( | |
repo_id: str, folder_path: Path, path_in_repo: Optional[str] | |
) -> None: | |
""" | |
Uploads the folder to Hugging Face Hub. | |
Args: | |
repo_id (str): The repository ID in 'username/repository' format. | |
folder_path (Path): The path to the folder that will be uploaded. | |
path_in_repo (Optional[str]): The path within the repository where the folder will be uploaded. | |
""" | |
try: | |
upload_folder( | |
repo_id=repo_id, | |
folder_path=str(folder_path), | |
path_in_repo=path_in_repo, | |
ignore_patterns="*.pt*", | |
commit_message="Automated upload due to directory change", | |
) | |
logging.info("Upload completed successfully.") | |
except Exception as e: | |
logging.error(f"An error occurred during upload: {e}") | |
class ChangeHandler(FileSystemEventHandler): | |
""" | |
Handler for file system changes, triggering upload to Hugging Face Hub. | |
Attributes: | |
repo_id (str): The repository ID. | |
folder_path (Path): The path to the folder that will be uploaded. | |
path_in_repo (Optional[str]): The path within the repository where the folder will be uploaded. | |
""" | |
def __init__( | |
self, | |
repo_id: str, | |
folder_path: Path, | |
path_in_repo: Optional[str], | |
delay: float = 15.0, | |
) -> None: | |
""" | |
Initializes the ChangeHandler with repository information. | |
Args: | |
repo_id (str): The repository ID in 'username/repository' format. | |
folder_path (Path): The path to the folder that will be uploaded. | |
path_in_repo (Optional[str]): The path within the repository where the folder will be uploaded. | |
""" | |
self.repo_id = repo_id | |
self.folder_path = folder_path | |
self.path_in_repo = path_in_repo | |
self.delay = delay | |
def on_any_event(self, event) -> None: | |
""" | |
Responds to any file system event by triggering an upload. | |
Args: | |
event: The event object representing the file system event. | |
""" | |
logging.info(f"Change detected: {event}") | |
time.sleep(self.delay) | |
upload_to_huggingface(self.repo_id, self.folder_path, self.path_in_repo) | |
def main() -> None: | |
""" | |
Main function to set up the folder monitoring and upload process. | |
""" | |
parser = argparse.ArgumentParser( | |
description="Monitor a folder and upload to Hugging Face Hub on changes." | |
) | |
parser.add_argument( | |
"repo_id", | |
type=str, | |
help="Repository ID on Hugging Face (e.g., 'username/repo_name')", | |
) | |
parser.add_argument( | |
"folder_path", type=Path, help="Path to the folder to be monitored" | |
) | |
parser.add_argument( | |
"-p", | |
"--path_in_repo", | |
type=str, | |
default=None, | |
help="Path in the repository where the folder will be uploaded (default: None)", | |
) | |
parser.add_argument( | |
"-f", | |
"--check_freq", | |
type=int, | |
default=30, | |
help="Frequency (in seconds) to check for changes (default: 30)", | |
) | |
args = parser.parse_args() | |
validate_inputs(args.repo_id, args.folder_path, args.path_in_repo) | |
logging.basicConfig( | |
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" | |
) | |
event_handler = ChangeHandler(args.repo_id, args.folder_path, args.path_in_repo) | |
observer = Observer() | |
observer.schedule(event_handler, path=str(args.folder_path), recursive=True) | |
observer.start() | |
logging.info(f"Monitoring folder:\t{args.folder_path}") | |
try: | |
while True: | |
time.sleep(args.check_freq) | |
except KeyboardInterrupt: | |
observer.stop() | |
observer.join() | |
logging.info(f"Stopping monitoring:\t{args.folder_path}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment