Skip to content

Instantly share code, notes, and snippets.

@prerakmody
Last active February 5, 2023 16:19
Show Gist options
  • Save prerakmody/41833c6f8cd6afb98b26e53cb7235784 to your computer and use it in GitHub Desktop.
Save prerakmody/41833c6f8cd6afb98b26e53cb7235784 to your computer and use it in GitHub Desktop.
Download open files from GDrive
import sys, time
import requests
def download_file_from_google_drive(id, destination):
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
count = 0
with open(destination, "wb") as f:
start_time = time.time()
for i, chunk in enumerate(response.iter_content(CHUNK_SIZE)):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
duration = time.time() - start_time
progress_size = int(count * CHUNK_SIZE)
count += 1
if duration == 0:
duration = 0.1
speed = int(progress_size / (1024 * duration))
sys.stdout.write("\r...%d MB, %d KB/s, %d seconds passed" % (progress_size / (1024 * 1024), speed, duration))
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id }, stream = True)
print ('Response : ', response)
token = get_confirm_token(response)
print ('Token : ', token)
if token:
params = { 'id' : id, 'confirm' : token }
response = session.get(URL, params = params, stream = True)
print ('URL : ', response.url)
print ('----------------------------------')
tmp = response.headers['Access-Control-Allow-Headers']
save_response_content(response, destination)
else:
print ('Something is amiss')
if __name__ == "__main__":
import sys
if len(sys.argv) is not 3:
print ("Usage: python google_drive.py <drive_file_id> <destination_file_path>")
else:
# TAKE ID FROM SHAREABLE LINK
file_id = sys.argv[1]
# DESTINATION FILE ON YOUR DISK
destination = sys.argv[2]
download_file_from_google_drive(file_id, destination)
# Ref 1: https://www.geeksforgeeks.org/uploading-files-on-google-drive-using-python/
# Ref 2: https://pythonhosted.org/PyDrive/filemanagement.html#upload-and-update-file-content
# pip install pydrive
# pip install httplib2==0.15.0
# You need a file called client_secrets.json --> Follow this URL https://pythonhosted.org/PyDrive/quickstart.html
from pydrive.drive import GoogleDrive
from pydrive.auth import GoogleAuth
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
filename = 'trial8-zeus-simco30-train-2023-01-22-19-40-14.zip'
f = drive.CreateFile({'title': filename})
f.SetContentFile(filename)
f.Upload()
"""
Goal: For one time upload of a large file (as the GDrive UI hangs up)
Step 1 - Create OAuth 2.0 Client ID + Client Secret
- by following the "Authentication" part of https://pythonhosted.org/PyDrive/quickstart.html
Step 2 - Get Access Token
- from the OAuth playground -> https://developers.google.com/oauthplayground/
--> Select Drive API v3 -> www.googleapis.com/auth/drive --> Click on "Authorize APIs"
--> Click on "Exchange authorization code for tokens" --> "Copy paste the access token"
--> Use it in the script below
Step 3 - Run file as daemon process
- nohup python -u upload_gdrive.py > upload_gdrive.log 2>&1 &
- tail -f upload_gdrive.log
"""
import sys
import json
import requests
from tqdm import tqdm
import requests_toolbelt # pip install requests_toolbelt
from requests.exceptions import JSONDecodeError
import collections
class ProgressBar(tqdm):
def update_to(self, n: int) -> None:
self.update(n - self.n)
def upload_file(access_token:str, filename:str, filepath:str):
metadata = {
"name": filename,
}
files = {}
session = requests.session()
with open(filepath, "rb") as fp:
files = collections.OrderedDict(data=("metadata", json.dumps(metadata), "application/json"), file=fp)
encoder = requests_toolbelt.MultipartEncoder(files)
with ProgressBar(
total=encoder.len,
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
file=sys.stdout,
) as bar:
monitor = requests_toolbelt.MultipartEncoderMonitor(
encoder, lambda monitor: bar.update_to(monitor.bytes_read)
)
r = session.post(
"https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart",
data=monitor,
allow_redirects=False,
headers={
"Authorization": "Bearer " + access_token
, "Content-Type": monitor.content_type
},
)
try:
resp = r.json()
print(resp)
except JSONDecodeError:
sys.exit(r.text)
upload_file("<access_token>"
, "<upload_filename>", "<path_to_file>")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment