Created
November 26, 2025 07:16
-
-
Save SqrtRyan/7a433ec1ee985b6127b7bc5262d08a67 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| url = "https://huggingface.co/datasets/OneOverZero/Calvin__task_ABC_D_h5__training_s224_h" | |
| dataset_parent = "/" | |
| clone_command = f""" | |
| cd {dataset_parent} | |
| git lfs install | |
| git clone {url} | |
| """ # sh | |
| dataset_folder = path_join(dataset_parent, get_folder_name(url)) | |
| if not folder_exists(dataset_folder): | |
| r._run_sys_command(clone_command) | |
| dataset_files = _get_all_paths_fast(dataset_folder) | |
| dataset_files = [x for x in dataset_files if "_" in get_file_name(x)] | |
| dataset_files = [x for x in dataset_files if x.endswith(".mp4")] | |
| dataset_files = [x for x in dataset_files if not "gripper" in x] | |
| # dataset_files=[x for x in dataset_files if 'gripper' in x] | |
| video_pairs = cluster_by_key(dataset_files, lambda x: get_file_name(x).split("_")[1]) | |
| video_pairs = [x for x in video_pairs if len(x) == 2] | |
| video_pairs = [sorted(x) for x in video_pairs] | |
| output_dataset = dataset_folder + "__processed" | |
| def process(pair): | |
| fa, fb = pair | |
| va, vb = load_videos(fa, fb,show_progress=False) | |
| v = horizontally_concatenated_videos(va, vb) | |
| name = get_file_name(fa) + "____" + get_file_name(fb) | |
| path = path_join(output_dataset, name) | |
| return save_video_mp4(v, path, show_progress=False) | |
| ans = load_files( | |
| process, | |
| video_pairs, | |
| show_progress=True, | |
| strict=True, | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment