giuliano-macedo · September 7, 2022 14:43
diff --git a/gdown_folder_regex.py b/gdown_folder_regex.py
 import re
 from bs4 import BeautifulSoup
 import requests
 import gdown
 import json

 import argparse

 parser=argparse.ArgumentParser()
 parser.add_argument("url")
 args=parser.parse_args()

 r=requests.get(args.url)
 soup=BeautifulSoup(r.text,"lxml")

 string_regex=re.compile(r"\'([^\']+)\'") #pretty dumb string regex, would fail in scaped strings

 encoded_data=None
 for script in soup.select("script"):
 	if "_DRIVE_ivd" in script.text: # hacky script tag search
 		encoded_data=string_regex.findall(script.text)[1] # second one, first one is '_DRIVE_ivdc'
 		break
 if encoded_data==None:
 	raise RuntimeError("Didn't found script tag")
 decoded = bytes(encoded_data, "utf-8").decode("unicode_escape")

 data=json.loads(decoded)

 ids=[elem[0] for elem in data[0]] #don't know why these indices

 for id_ in ids:
 	gdown.download('https://drive.google.com/uc?id='+id_)
	import re
	from bs4 import BeautifulSoup
	import requests
	import gdown
	import json

	import argparse

	parser=argparse.ArgumentParser()
	parser.add_argument("url")
	args=parser.parse_args()

	r=requests.get(args.url)
	soup=BeautifulSoup(r.text,"lxml")

	string_regex=re.compile(r"\'([^\']+)\'") #pretty dumb string regex, would fail in scaped strings

	encoded_data=None
	for script in soup.select("script"):
	if "_DRIVE_ivd" in script.text: # hacky script tag search
	encoded_data=string_regex.findall(script.text)[1] # second one, first one is '_DRIVE_ivdc'
	break
	if encoded_data==None:
	raise RuntimeError("Didn't found script tag")
	decoded = bytes(encoded_data, "utf-8").decode("unicode_escape")

	data=json.loads(decoded)

	ids=[elem[0] for elem in data[0]] #don't know why these indices

	for id_ in ids:
	gdown.download('https://drive.google.com/uc?id='+id_)