Skip to content

Instantly share code, notes, and snippets.

@SqrtRyan
Created November 26, 2025 07:16
Show Gist options
  • Select an option

  • Save SqrtRyan/7a433ec1ee985b6127b7bc5262d08a67 to your computer and use it in GitHub Desktop.

Select an option

Save SqrtRyan/7a433ec1ee985b6127b7bc5262d08a67 to your computer and use it in GitHub Desktop.
url = "https://huggingface.co/datasets/OneOverZero/Calvin__task_ABC_D_h5__training_s224_h"
dataset_parent = "/"
clone_command = f"""
cd {dataset_parent}
git lfs install
git clone {url}
""" # sh
dataset_folder = path_join(dataset_parent, get_folder_name(url))
if not folder_exists(dataset_folder):
r._run_sys_command(clone_command)
dataset_files = _get_all_paths_fast(dataset_folder)
dataset_files = [x for x in dataset_files if "_" in get_file_name(x)]
dataset_files = [x for x in dataset_files if x.endswith(".mp4")]
dataset_files = [x for x in dataset_files if not "gripper" in x]
# dataset_files=[x for x in dataset_files if 'gripper' in x]
video_pairs = cluster_by_key(dataset_files, lambda x: get_file_name(x).split("_")[1])
video_pairs = [x for x in video_pairs if len(x) == 2]
video_pairs = [sorted(x) for x in video_pairs]
output_dataset = dataset_folder + "__processed"
def process(pair):
fa, fb = pair
va, vb = load_videos(fa, fb,show_progress=False)
v = horizontally_concatenated_videos(va, vb)
name = get_file_name(fa) + "____" + get_file_name(fb)
path = path_join(output_dataset, name)
return save_video_mp4(v, path, show_progress=False)
ans = load_files(
process,
video_pairs,
show_progress=True,
strict=True,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment