Last active
March 19, 2023 00:31
-
-
Save mka142/c363c9acca4ff41fa792a668527206d9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#based on | |
# @immuntasir | |
#immuntasir/drive_script_gen.ipynb | |
# Script for generating sh file that will download ans save in tree all files under given parent folder id from Google drive. | |
#Executing | |
# python gdrive_download_folder.py -s <folder_id> -f output_file_name | |
# Import the Libraries | |
from pydrive.auth import GoogleAuth | |
from pydrive.drive import GoogleDrive | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-s", "--source", type=str) | |
parser.add_argument("-f", "--file", type=str) | |
parser.add_argument("-o", "--output", type=str, default="$PWD") | |
# Set the id of the Google Drive folder. You can find it in the URL of the google drive folder. | |
args = parser.parse_args() | |
parent_folder_id = args.source | |
file_name = args.file | |
# Set the parent folder, where you want to store the contents of the google drive folder | |
parent_folder_dir = args.output | |
# A browser window will open. login using the appropriate account. | |
gauth = GoogleAuth() | |
gauth.LoadCredentialsFile("mycreds.txt") | |
if gauth.credentials is None: | |
# Authenticate if they're not there | |
gauth.LocalWebserverAuth() | |
elif gauth.access_token_expired: | |
# Refresh them if expired | |
gauth.Refresh() | |
else: | |
# Initialize the saved creds | |
gauth.Authorize() | |
# Save the current credentials to a file | |
gauth.SaveCredentialsFile("mycreds.txt") | |
drive = GoogleDrive(gauth) | |
if parent_folder_dir[-1] != "/": | |
parent_folder_dir = parent_folder_dir + "/" | |
parent_folder_dir += file_name + "/" | |
# This is the base wget command that we will use. This might change in the future due to changes in Google drive | |
wget_text = "\"wget -a logfile" + file_name + " " + "--load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=FILE_ID' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id=FILE_ID\" -O \"FILE_NAME\" && rm -rf /tmp/cookies.txt\"".replace( | |
"&", "&" | |
) | |
# Get the folder structure | |
file_dict = dict() | |
folder_queue = [parent_folder_id] | |
dir_queue = [parent_folder_dir] | |
cnt = 0 | |
while len(folder_queue) != 0: | |
current_folder_id = folder_queue.pop(0) | |
file_list = drive.ListFile( | |
{"q": "'{}' in parents and trashed=false".format(current_folder_id)} | |
).GetList() | |
current_parent = dir_queue.pop(0) | |
print(current_parent, current_folder_id) | |
for file1 in file_list: | |
file_dict[cnt] = dict() | |
file_dict[cnt]["id"] = file1["id"] | |
file_dict[cnt]["title"] = file1["title"] | |
file_dict[cnt]["dir"] = current_parent + file1["title"] | |
if file1["mimeType"] == "application/vnd.google-apps.folder": | |
file_dict[cnt]["type"] = "folder" | |
file_dict[cnt]["dir"] += "/" | |
folder_queue.append(file1["id"]) | |
dir_queue.append(file_dict[cnt]["dir"]) | |
else: | |
file_dict[cnt]["type"] = "file" | |
cnt += 1 | |
# Write the bash script | |
f = open(file_name + ".sh", "w") | |
file_dict.keys() | |
f.write(f'#!/bin/bash\nmkdir -p "{parent_folder_dir}"\n') | |
f.write( | |
"""function call_completed () { | |
echo -ne "$(( 100*$1/$2))% | Downloaded $1 of $2 ... \\r" | |
} | |
function download_tree { | |
""" | |
) | |
all_files = len(file_dict.keys()) | |
current_file = 1 | |
for file_iter in file_dict.keys(): | |
if file_dict[file_iter]["type"] == "folder": | |
f.write("mkdir " + '"' + file_dict[file_iter]["dir"] + '"' + "\n") | |
f.write(f"call_completed {current_file} {all_files}\n") | |
else: | |
f.write( | |
wget_text[1:-1] | |
.replace("FILE_ID", file_dict[file_iter]["id"]) | |
.replace("FILE_NAME", file_dict[file_iter]["dir"]) | |
+ "\n" | |
) | |
f.write(f"call_completed {current_file} {all_files}\n") | |
current_file += 1 | |
f.write( | |
"""echo -ne '\\r\\n' | |
echo -ne "Downloaded!\\n" | |
}\n""" | |
) | |
f.write("download_tree\n") | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment