Skip to content

Instantly share code, notes, and snippets.

@i80and
Last active October 11, 2024 20:57
Show Gist options
  • Select an option

  • Save i80and/496ab45980aefe24822c1564990b9cc4 to your computer and use it in GitHub Desktop.

Select an option

Save i80and/496ab45980aefe24822c1564990b9cc4 to your computer and use it in GitHub Desktop.
import boto3
import sys
from pathlib import Path
import json
BUCKET_NAME = "docs-mongodb-org-dotcomprd"
versions = sys.argv[1:]
def remove_prefix(s: str, version: str) -> str:
return s.split(f"docs/{version}/", 1)[-1]
s3 = boto3.resource("s3")
my_bucket = s3.Bucket(BUCKET_NAME)
version_db = {}
for version in versions + ["manual"]:
output_pathname = Path(version + ".txt")
if output_pathname.exists():
version_db[version] = {remove_prefix(k, version): v for k, v in json.loads(output_pathname.read_text())}
continue
print("Downloading " + version)
files = []
for obj in my_bucket.objects.filter(Prefix=f"docs/{version}/"):
if not obj.key.endswith("index.html"):
continue
redirect_target = None
if obj.size == 0:
real_obj = s3.Object(BUCKET_NAME, obj.key)
redirect_target = real_obj.website_redirect_location
files.append([obj.key, redirect_target])
output_pathname.write_text(json.dumps(files, indent=2))
version_db[version] = {remove_prefix(k, version): v for k,v in files}
for version in versions:
for key, _ in version_db[version].items():
fully_qualified_key = f"docs/{version}/{key}"
if key in version_db["manual"]:
redirect_target = version_db["manual"][key]
if redirect_target:
print(f"{fully_qualified_key}\t{redirect_target}")
else:
key_without_indexhtml = key.removesuffix("/index.html") + "/"
print(f"{fully_qualified_key}\thttps://www.mongodb.com/docs/manual/{key_without_indexhtml}")
else:
print(f"{key}\thttps://www.mongodb.com/docs/manual/")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment