Skip to content

Instantly share code, notes, and snippets.

@ian-r-rose
Last active April 4, 2022 22:35
Show Gist options
  • Save ian-r-rose/f7af486f5c1e32a60d1047b8ad28cfdd to your computer and use it in GitHub Desktop.
Save ian-r-rose/f7af486f5c1e32a60d1047b8ad28cfdd to your computer and use it in GitHub Desktop.
import contextlib
import os
import subprocess
import time
import s3fs
BUCKET = "ursa-labs-taxi-data"
KEY = "2009/01/data.parquet"
URL = f"s3://{BUCKET}/{KEY}"
HTTP_URL = f"https://{BUCKET}.s3.us-east-2.amazonaws.com/{KEY}"
OUT = "out.parquet"
@contextlib.contextmanager
def timer(label="Block"):
start = time.time()
yield
end = time.time()
print(f"{label} took {end-start} seconds")
os.remove(OUT)
with timer("s3fs"):
fs = s3fs.S3FileSystem(anon=True)
fs.download(URL, OUT)
with timer("aws cli"):
subprocess.run(
[
"aws",
"s3",
"cp",
URL,
OUT,
"--no-sign-request",
],
capture_output=True,
)
with timer("curl"):
subprocess.run(
[
"curl",
"-o",
OUT,
HTTP_URL,
],
capture_output=True,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment