0187773933 · April 7, 2025 12:13
diff --git a/download.py b/download.py
 #!/usr/bin/env python3
 import sys
 import requests
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import time
 import json
 from tqdm import tqdm
 import base64
 from pathlib import Path
 from pdfmerge import pdfmerge
 from pypdf import PdfReader, PdfWriter

 def decode_base64_json( encoded_str ):
 	try:
 		utf8_bytes = base64.b64decode(encoded_str)
 		json_str = utf8_bytes.decode('utf-8')
 		return json.loads(json_str)
 	except Exception as e:
 		print("Decode error:", e)
 		return None

 def create_session():
 	session = requests.Session()
 	retries = Retry(
 		total=5,
 		backoff_factor=1,
 		status_forcelist=[429, 500, 502, 503, 504],
 		allowed_methods=["GET"],
 		raise_on_status=False
 	)
 	adapter = HTTPAdapter(max_retries=retries)
 	session.mount("http://", adapter)
 	session.mount("https://", adapter)
 	return session

 def download_file(url, save_path, session, try_stream=False):
 	try:
 		if not try_stream:
 			# Quick, simple full-body request
 			r = session.get(url, timeout=30)
 			r.raise_for_status()
 			save_path.write_bytes(r.content)
 			print(f"✅ Downloaded (no stream): {save_path.name}")
 			return
 		else:
 			# Streamed download (for big files / progress)
 			with session.get(url, stream=True, timeout=30) as r:
 				r.raise_for_status()
 				total_size = int(r.headers.get('content-length', 0))
 				block_size = 1024
 				t = tqdm(total=total_size, unit='iB', unit_scale=True, desc=save_path.name)
 				with open(save_path, 'wb') as f:
 					for data in r.iter_content(block_size):
 						t.update(len(data))
 						f.write(data)
 				t.close()
 				if total_size != 0 and t.n != total_size:
 					print(f"❌ Incomplete download: {url}")
 	except Exception as e:
 		if try_stream:
 			print(f"⚠️ Stream failed, retrying without stream: {url} ({e})")
 			download_file(url, save_path, session, try_stream=False)
 		else:
 			print(f"❌ Final failure: {url} ({e})")

 def download_all( urls , output_dir="downloads" , max_workers=5 ):
 	session = create_session()
 	Path(output_dir).mkdir(parents=True, exist_ok=True)
 	results = {}

 	with ThreadPoolExecutor(max_workers=max_workers) as executor:
 		future_to_url = {}
 		for url in urls:
 			filename = url.split("/")[-1]
 			save_path = Path(output_dir) / filename
 			if save_path.exists():
 				print(f"✅ Already exists: {save_path.name}")
 				results[url] = True
 				continue
 			future = executor.submit(download_file, url, save_path, session)
 			future_to_url[ future ] = url

 		for future in tqdm( as_completed( future_to_url ) , total=len( urls ) , desc="Downloading all" ):
 			url = future_to_url[ future ]
 			try:
 				future.result()
 				results[url] = True
 			except Exception as e:
 				print( f"⚠️ {url} failed: {e}" )
 				results[ url ] = False

 	return results

 def merge_pdfs( pdf_paths , output_path ):
 	writer = PdfWriter()
 	page_offset = 0

 	for pdf_path in pdf_paths:
 		path = Path( pdf_path )
 		reader = PdfReader( str( path ) )
 		num_pages = len( reader.pages )

 		# Append all pages
 		writer.append( reader )

 		# Add top-level bookmark pointing to first page of this PDF
 		writer.add_outline_item(
 			title=path.stem ,
 			page_number=page_offset
 		)

 		print( f"✅ Added {path.name} ({num_pages} pages) at page offset {page_offset}" )
 		page_offset += num_pages

 	with open( output_path, "wb" ) as out_f:
 		writer.write( out_f )

 	print( f"\n🎉 Merged PDF with bookmarks saved to: {output_path}" )


 if __name__ == "__main__":
 	urls = decode_base64_json( sys.argv[ 1 ] )
 	download_all( urls )
 	save_paths = [ str( Path("downloads") / url.split("/")[-1] ) for url in urls ]
 	# pdfmerge( save_paths , "merged_output.pdf" )
 	merge_pdfs( save_paths , "merged_output.pdf" )
diff --git a/dropper.js b/dropper.js
 ( ()=> {
 	// target = https://open.win.ox.ac.uk/pages/fslcourse/website/online_materials.html
 	// example = https://open.win.ox.ac.uk/pages/fslcourse/lectures/Reg_P1E1.pdf

 	// <a class="card-link" href="../lectures/Struc_P1E4.pdf" target="_blank" style="font-family: Arial;">PDF slides</a>

 	function base64_encode( js_obj ) {
 		const seen = new WeakSet();
 		const safe_string = JSON.stringify( js_obj , ( key , value ) => {
 		if ( typeof value === "object" && value !== null ) {
 			if ( seen.has( value ) ) return "[Circular]";
 			seen.add( value );
 		}
 			return value;
 		});
 		const utf8_bytes = new TextEncoder().encode( safe_string );
 		const binary_str = Array.from( utf8_bytes , byte => String.fromCharCode( byte ) ).join( '' );
 		return btoa( binary_str );
 	}

 	let pdf_elements = document.querySelectorAll( 'a.card-link[href^="../lectures/"][href$=".pdf"]' );
 	let pdf_urls = [ ...pdf_elements ].map( x => x.href );
 	let json_b64 = base64_encode( pdf_urls );
 	console.log( json_b64 );

 })();
	#!/usr/bin/env python3
	import sys
	import requests
	from requests.adapters import HTTPAdapter
	from urllib3.util.retry import Retry
	from concurrent.futures import ThreadPoolExecutor, as_completed
	import time
	import json
	from tqdm import tqdm
	import base64
	from pathlib import Path
	from pdfmerge import pdfmerge
	from pypdf import PdfReader, PdfWriter

	def decode_base64_json( encoded_str ):
	try:
	utf8_bytes = base64.b64decode(encoded_str)
	json_str = utf8_bytes.decode('utf-8')
	return json.loads(json_str)
	except Exception as e:
	print("Decode error:", e)
	return None

	def create_session():
	session = requests.Session()
	retries = Retry(
	total=5,
	backoff_factor=1,
	status_forcelist=[429, 500, 502, 503, 504],
	allowed_methods=["GET"],
	raise_on_status=False
	)
	adapter = HTTPAdapter(max_retries=retries)
	session.mount("http://", adapter)
	session.mount("https://", adapter)
	return session

	def download_file(url, save_path, session, try_stream=False):
	try:
	if not try_stream:
	# Quick, simple full-body request
	r = session.get(url, timeout=30)
	r.raise_for_status()
	save_path.write_bytes(r.content)
	print(f"✅ Downloaded (no stream): {save_path.name}")
	return
	else:
	# Streamed download (for big files / progress)
	with session.get(url, stream=True, timeout=30) as r:
	r.raise_for_status()
	total_size = int(r.headers.get('content-length', 0))
	block_size = 1024
	t = tqdm(total=total_size, unit='iB', unit_scale=True, desc=save_path.name)
	with open(save_path, 'wb') as f:
	for data in r.iter_content(block_size):
	t.update(len(data))
	f.write(data)
	t.close()
	if total_size != 0 and t.n != total_size:
	print(f"❌ Incomplete download: {url}")
	except Exception as e:
	if try_stream:
	print(f"⚠️ Stream failed, retrying without stream: {url} ({e})")
	download_file(url, save_path, session, try_stream=False)
	else:
	print(f"❌ Final failure: {url} ({e})")

	def download_all( urls , output_dir="downloads" , max_workers=5 ):
	session = create_session()
	Path(output_dir).mkdir(parents=True, exist_ok=True)
	results = {}

	with ThreadPoolExecutor(max_workers=max_workers) as executor:
	future_to_url = {}
	for url in urls:
	filename = url.split("/")[-1]
	save_path = Path(output_dir) / filename
	if save_path.exists():
	print(f"✅ Already exists: {save_path.name}")
	results[url] = True
	continue
	future = executor.submit(download_file, url, save_path, session)
	future_to_url[ future ] = url

	for future in tqdm( as_completed( future_to_url ) , total=len( urls ) , desc="Downloading all" ):
	url = future_to_url[ future ]
	try:
	future.result()
	results[url] = True
	except Exception as e:
	print( f"⚠️ {url} failed: {e}" )
	results[ url ] = False

	return results

	def merge_pdfs( pdf_paths , output_path ):
	writer = PdfWriter()
	page_offset = 0

	for pdf_path in pdf_paths:
	path = Path( pdf_path )
	reader = PdfReader( str( path ) )
	num_pages = len( reader.pages )

	# Append all pages
	writer.append( reader )

	# Add top-level bookmark pointing to first page of this PDF
	writer.add_outline_item(
	title=path.stem ,
	page_number=page_offset
	)

	print( f"✅ Added {path.name} ({num_pages} pages) at page offset {page_offset}" )
	page_offset += num_pages

	with open( output_path, "wb" ) as out_f:
	writer.write( out_f )

	print( f"\n🎉 Merged PDF with bookmarks saved to: {output_path}" )


	if __name__ == "__main__":
	urls = decode_base64_json( sys.argv[ 1 ] )
	download_all( urls )
	save_paths = [ str( Path("downloads") / url.split("/")[-1] ) for url in urls ]
	# pdfmerge( save_paths , "merged_output.pdf" )
	merge_pdfs( save_paths , "merged_output.pdf" )
	( ()=> {
	// target = https://open.win.ox.ac.uk/pages/fslcourse/website/online_materials.html
	// example = https://open.win.ox.ac.uk/pages/fslcourse/lectures/Reg_P1E1.pdf

	// <a class="card-link" href="../lectures/Struc_P1E4.pdf" target="_blank" style="font-family: Arial;">PDF slides</a>

	function base64_encode( js_obj ) {
	const seen = new WeakSet();
	const safe_string = JSON.stringify( js_obj , ( key , value ) => {
	if ( typeof value === "object" && value !== null ) {
	if ( seen.has( value ) ) return "[Circular]";
	seen.add( value );
	}
	return value;
	});
	const utf8_bytes = new TextEncoder().encode( safe_string );
	const binary_str = Array.from( utf8_bytes , byte => String.fromCharCode( byte ) ).join( '' );
	return btoa( binary_str );
	}

	let pdf_elements = document.querySelectorAll( 'a.card-link[href^="../lectures/"][href$=".pdf"]' );
	let pdf_urls = [ ...pdf_elements ].map( x => x.href );
	let json_b64 = base64_encode( pdf_urls );
	console.log( json_b64 );

	})();