Last active
July 20, 2024 19:04
-
-
Save rajivnarayan/c38f01b89de852b3e7d459cfde067f3f to your computer and use it in GitHub Desktop.
A Pydantic model for validating S3 URLs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from pydantic import BaseModel, Field, computed_field | |
class S3Path(BaseModel): | |
"""Pydantic Model for S3 URLs""" | |
# https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html | |
_s3_pattern = re.compile( | |
r'^s3://' | |
r'(?=[a-z0-9])' # Bucket name must start with a letter or digit | |
r'(?!(^xn--|sthree-|sthree-configurator|.+-s3alias$))' # Bucket name must not start with xn--, sthree-, sthree-configurator or end with -s3alias | |
r'(?!.*\.\.)' # Bucket name must not contain two adjacent periods | |
r'[a-z0-9][a-z0-9.-]{1,61}[a-z0-9]' # Bucket naming constraints | |
r'(?<!\.-$)' # Bucket name must not end with a period followed by a hyphen | |
r'(?<!\.$)' # Bucket name must not end with a period | |
r'(?<!-$)' # Bucket name must not end with a hyphen | |
r'(/([a-zA-Z0-9._-]+/?)*)?$' # key naming constraints | |
) | |
url : str = Field(..., | |
strip_whitespace = True, | |
pattern = _s3_pattern, | |
min_length=8, | |
max_length=1023) | |
def s3_url_parts(self): | |
#path_parts=self.url.replace('s3://', '').split("/") | |
path_parts=self.url[5:].split("/") | |
bucket=path_parts.pop(0) | |
key='/'.join(path_parts) | |
return (bucket, key) | |
@computed_field | |
@property | |
def bucket(self) -> str: | |
"""Add bucket name as a computed property""" | |
return self.s3_url_parts()[0] | |
@computed_field | |
@property | |
def key(self) -> str: | |
"""Add key as a computed property""" | |
return self.s3_url_parts()[1] | |
good_urls = [ | |
's3://my-bucket/', | |
's3://my.bucket.name/my-key', | |
's3://123-bucket/another/key/path', | |
's3://bucket/this/is/a/folder/', | |
's3://my-bucket/some/key/path/file.txt', | |
's3://10-bucket-01/nested.folder/structure/file.jpg', | |
's3://my-bucket/with-key'] | |
bad_urls = [ | |
's3://', | |
's3://my', | |
'https://my-bucket/', | |
's3://bucket-name/with/white space/in/key', | |
's3:/my-bucket/', | |
's3://my_bucket/', | |
's3://My-Bucket/', | |
's3://123.bucketname./invalid', | |
's3://123.bucketname.-/invalid', | |
's3://123.bucket..name/invalid', | |
's3://-bucket-starts-with-hyphen/', | |
's3://bucket-ends-with-hyphen-/', | |
's3://my-bucket/ends/with/slash//', | |
's3://toolongnameofthebucketwhichexceedssixtyfourcharacterslongforthisexample/', | |
's3://my-bucket/?query=parameters', | |
's3://my-bucket/this/path/has/a/tab/char\there' | |
] | |
print(f'Validating {len(good_urls)} good urls') | |
good_fails = [] | |
for url in good_urls: | |
try: | |
print(S3Path(url = url).model_dump()) | |
except Exception as e: | |
print(f"Error for good {url}: {e}") | |
good_fails.append(url) | |
print(f'Validating {len(bad_urls)} bad urls') | |
bad_fails = [] | |
for url in bad_urls: | |
try: | |
print(S3Path(url = url).model_dump()) | |
print(f'Error bad url passes : {url}') | |
bad_fails.append(url) | |
except Exception as e: | |
pass | |
print(f'Good urls that failed validation: {good_fails}') | |
print(f'Bad urls that failed validation: {bad_fails}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See pydantic/pydantic#4271 for discussions on this issue