Last active
February 5, 2023 16:19
-
-
Save prerakmody/41833c6f8cd6afb98b26e53cb7235784 to your computer and use it in GitHub Desktop.
Download open files from GDrive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, time | |
import requests | |
def download_file_from_google_drive(id, destination): | |
def get_confirm_token(response): | |
for key, value in response.cookies.items(): | |
if key.startswith('download_warning'): | |
return value | |
return None | |
def save_response_content(response, destination): | |
CHUNK_SIZE = 32768 | |
count = 0 | |
with open(destination, "wb") as f: | |
start_time = time.time() | |
for i, chunk in enumerate(response.iter_content(CHUNK_SIZE)): | |
if chunk: # filter out keep-alive new chunks | |
f.write(chunk) | |
duration = time.time() - start_time | |
progress_size = int(count * CHUNK_SIZE) | |
count += 1 | |
if duration == 0: | |
duration = 0.1 | |
speed = int(progress_size / (1024 * duration)) | |
sys.stdout.write("\r...%d MB, %d KB/s, %d seconds passed" % (progress_size / (1024 * 1024), speed, duration)) | |
URL = "https://docs.google.com/uc?export=download" | |
session = requests.Session() | |
response = session.get(URL, params = { 'id' : id }, stream = True) | |
print ('Response : ', response) | |
token = get_confirm_token(response) | |
print ('Token : ', token) | |
if token: | |
params = { 'id' : id, 'confirm' : token } | |
response = session.get(URL, params = params, stream = True) | |
print ('URL : ', response.url) | |
print ('----------------------------------') | |
tmp = response.headers['Access-Control-Allow-Headers'] | |
save_response_content(response, destination) | |
else: | |
print ('Something is amiss') | |
if __name__ == "__main__": | |
import sys | |
if len(sys.argv) is not 3: | |
print ("Usage: python google_drive.py <drive_file_id> <destination_file_path>") | |
else: | |
# TAKE ID FROM SHAREABLE LINK | |
file_id = sys.argv[1] | |
# DESTINATION FILE ON YOUR DISK | |
destination = sys.argv[2] | |
download_file_from_google_drive(file_id, destination) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Ref 1: https://www.geeksforgeeks.org/uploading-files-on-google-drive-using-python/ | |
# Ref 2: https://pythonhosted.org/PyDrive/filemanagement.html#upload-and-update-file-content | |
# pip install pydrive | |
# pip install httplib2==0.15.0 | |
# You need a file called client_secrets.json --> Follow this URL https://pythonhosted.org/PyDrive/quickstart.html | |
from pydrive.drive import GoogleDrive | |
from pydrive.auth import GoogleAuth | |
gauth = GoogleAuth() | |
gauth.LocalWebserverAuth() | |
drive = GoogleDrive(gauth) | |
filename = 'trial8-zeus-simco30-train-2023-01-22-19-40-14.zip' | |
f = drive.CreateFile({'title': filename}) | |
f.SetContentFile(filename) | |
f.Upload() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Goal: For one time upload of a large file (as the GDrive UI hangs up) | |
Step 1 - Create OAuth 2.0 Client ID + Client Secret | |
- by following the "Authentication" part of https://pythonhosted.org/PyDrive/quickstart.html | |
Step 2 - Get Access Token | |
- from the OAuth playground -> https://developers.google.com/oauthplayground/ | |
--> Select Drive API v3 -> www.googleapis.com/auth/drive --> Click on "Authorize APIs" | |
--> Click on "Exchange authorization code for tokens" --> "Copy paste the access token" | |
--> Use it in the script below | |
Step 3 - Run file as daemon process | |
- nohup python -u upload_gdrive.py > upload_gdrive.log 2>&1 & | |
- tail -f upload_gdrive.log | |
""" | |
import sys | |
import json | |
import requests | |
from tqdm import tqdm | |
import requests_toolbelt # pip install requests_toolbelt | |
from requests.exceptions import JSONDecodeError | |
import collections | |
class ProgressBar(tqdm): | |
def update_to(self, n: int) -> None: | |
self.update(n - self.n) | |
def upload_file(access_token:str, filename:str, filepath:str): | |
metadata = { | |
"name": filename, | |
} | |
files = {} | |
session = requests.session() | |
with open(filepath, "rb") as fp: | |
files = collections.OrderedDict(data=("metadata", json.dumps(metadata), "application/json"), file=fp) | |
encoder = requests_toolbelt.MultipartEncoder(files) | |
with ProgressBar( | |
total=encoder.len, | |
unit="B", | |
unit_scale=True, | |
unit_divisor=1024, | |
miniters=1, | |
file=sys.stdout, | |
) as bar: | |
monitor = requests_toolbelt.MultipartEncoderMonitor( | |
encoder, lambda monitor: bar.update_to(monitor.bytes_read) | |
) | |
r = session.post( | |
"https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart", | |
data=monitor, | |
allow_redirects=False, | |
headers={ | |
"Authorization": "Bearer " + access_token | |
, "Content-Type": monitor.content_type | |
}, | |
) | |
try: | |
resp = r.json() | |
print(resp) | |
except JSONDecodeError: | |
sys.exit(r.text) | |
upload_file("<access_token>" | |
, "<upload_filename>", "<path_to_file>") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment