Skip to content

Instantly share code, notes, and snippets.

@pszemraj
Created December 12, 2023 01:43
Show Gist options
  • Save pszemraj/41079a0123ed84b6bef5c2e68cac150b to your computer and use it in GitHub Desktop.
Save pszemraj/41079a0123ed84b6bef5c2e68cac150b to your computer and use it in GitHub Desktop.
The script is designed to monitor a specified directory for any file system changes (like additions, deletions, or modifications of files and subdirectories) and automatically upload the changes to a specified repository on the Hugging Face Hub.
"""
The script is designed to monitor a specified directory for any file system changes (like additions, deletions, or modifications of files and subdirectories) and automatically upload the changes to a specified repository on the Hugging Face Hub.
pip install huggingface-hub watchdog
"""
import argparse
import logging
import time
from pathlib import Path
from typing import Optional
from huggingface_hub import upload_folder
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
def validate_inputs(
repo_id: str, folder_path: Path, path_in_repo: Optional[str]
) -> None:
"""
Validates the input parameters.
Args:
repo_id (str): The repository ID in 'username/repository' format.
folder_path (Path): The path to the folder that will be uploaded.
path_in_repo (Optional[str]): The path within the repository where the folder will be uploaded.
Raises:
ValueError: If the repo_id format is incorrect.
FileNotFoundError: If the folder_path does not exist or is not a directory.
ValueError: If path_in_repo starts or ends with '/'.
"""
if "/" not in repo_id:
raise ValueError(
"Invalid repo_id format. It should be in 'username/repository' format."
)
if not folder_path.exists() or not folder_path.is_dir():
raise FileNotFoundError(
f"The folder path '{folder_path}' does not exist or is not a directory."
)
if path_in_repo and (path_in_repo.startswith("/") or path_in_repo.endswith("/")):
raise ValueError("path_in_repo should not start or end with '/'.")
def upload_to_huggingface(
repo_id: str, folder_path: Path, path_in_repo: Optional[str]
) -> None:
"""
Uploads the folder to Hugging Face Hub.
Args:
repo_id (str): The repository ID in 'username/repository' format.
folder_path (Path): The path to the folder that will be uploaded.
path_in_repo (Optional[str]): The path within the repository where the folder will be uploaded.
"""
try:
upload_folder(
repo_id=repo_id,
folder_path=str(folder_path),
path_in_repo=path_in_repo,
ignore_patterns="*.pt*",
commit_message="Automated upload due to directory change",
)
logging.info("Upload completed successfully.")
except Exception as e:
logging.error(f"An error occurred during upload: {e}")
class ChangeHandler(FileSystemEventHandler):
"""
Handler for file system changes, triggering upload to Hugging Face Hub.
Attributes:
repo_id (str): The repository ID.
folder_path (Path): The path to the folder that will be uploaded.
path_in_repo (Optional[str]): The path within the repository where the folder will be uploaded.
"""
def __init__(
self,
repo_id: str,
folder_path: Path,
path_in_repo: Optional[str],
delay: float = 15.0,
) -> None:
"""
Initializes the ChangeHandler with repository information.
Args:
repo_id (str): The repository ID in 'username/repository' format.
folder_path (Path): The path to the folder that will be uploaded.
path_in_repo (Optional[str]): The path within the repository where the folder will be uploaded.
"""
self.repo_id = repo_id
self.folder_path = folder_path
self.path_in_repo = path_in_repo
self.delay = delay
def on_any_event(self, event) -> None:
"""
Responds to any file system event by triggering an upload.
Args:
event: The event object representing the file system event.
"""
logging.info(f"Change detected: {event}")
time.sleep(self.delay)
upload_to_huggingface(self.repo_id, self.folder_path, self.path_in_repo)
def main() -> None:
"""
Main function to set up the folder monitoring and upload process.
"""
parser = argparse.ArgumentParser(
description="Monitor a folder and upload to Hugging Face Hub on changes."
)
parser.add_argument(
"repo_id",
type=str,
help="Repository ID on Hugging Face (e.g., 'username/repo_name')",
)
parser.add_argument(
"folder_path", type=Path, help="Path to the folder to be monitored"
)
parser.add_argument(
"-p",
"--path_in_repo",
type=str,
default=None,
help="Path in the repository where the folder will be uploaded (default: None)",
)
parser.add_argument(
"-f",
"--check_freq",
type=int,
default=30,
help="Frequency (in seconds) to check for changes (default: 30)",
)
args = parser.parse_args()
validate_inputs(args.repo_id, args.folder_path, args.path_in_repo)
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
event_handler = ChangeHandler(args.repo_id, args.folder_path, args.path_in_repo)
observer = Observer()
observer.schedule(event_handler, path=str(args.folder_path), recursive=True)
observer.start()
logging.info(f"Monitoring folder:\t{args.folder_path}")
try:
while True:
time.sleep(args.check_freq)
except KeyboardInterrupt:
observer.stop()
observer.join()
logging.info(f"Stopping monitoring:\t{args.folder_path}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment