Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save CypherpunkSamurai/07f7519f28c22b3a28a68eaae2abd4f5 to your computer and use it in GitHub Desktop.
Save CypherpunkSamurai/07f7519f28c22b3a28a68eaae2abd4f5 to your computer and use it in GitHub Desktop.
storage.googleapis.com storage bucket client
"""
Google Cloud Storage Bucket Browser Library
A Python library for browsing Google Cloud Storage buckets using the public API.
Provides an S3-like interface for listing and navigating bucket contents.
"""
import logging
from dataclasses import dataclass
from typing import Dict, Iterator, List, Optional, Union
from urllib.parse import quote, urlencode
import requests
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@dataclass
class StorageObject:
"""Represents a file/object in the storage bucket."""
name: str
size: int
updated: str
media_link: str
kind: str = "storage#object"
metadata: Optional[Dict] = None
@property
def filename(self) -> str:
"""Get just the filename without path."""
return self.name.split('/')[-1]
@property
def is_file(self) -> bool:
"""Check if this is a file (not a prefix/folder)."""
return True
@dataclass
class StoragePrefix:
"""Represents a folder/prefix in the storage bucket."""
name: str
@property
def folder_name(self) -> str:
"""Get just the folder name without path."""
return self.name.rstrip('/').split('/')[-1]
@property
def is_file(self) -> bool:
"""Check if this is a file (not a prefix/folder)."""
return False
class StorageBucketBrowser:
"""
A browser for Google Cloud Storage buckets using the public API.
Example usage:
browser = StorageBucketBrowser("chromium-browser-snapshots")
# List root contents
contents = browser.list_contents()
# Navigate to a folder
win_contents = browser.list_contents("Win_x64/")
# List with pagination
for page in browser.list_contents_paginated("Win_x64/"):
for item in page:
print(f"{item.name} ({'folder' if isinstance(item, StoragePrefix) else 'file'})")
"""
BASE_URL = "https://www.googleapis.com/storage/v1/b"
def __init__(self, bucket_name: str, timeout: int = 30):
"""
Initialize the storage bucket browser.
Args:
bucket_name: Name of the Google Storage bucket
timeout: Request timeout in seconds
"""
self.bucket_name = bucket_name
self.timeout = timeout
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'StorageBucketBrowser/1.0'
})
def _build_url(self, prefix: str = "", page_token: Optional[str] = None) -> str:
"""Build the API URL for listing bucket contents."""
url = f"{self.BASE_URL}/{self.bucket_name}/o"
params = {
'delimiter': '/',
'prefix': prefix,
'fields': 'items(kind,mediaLink,metadata,name,size,updated),kind,prefixes,nextPageToken'
}
if page_token:
params['pageToken'] = page_token
return f"{url}?{urlencode(params)}"
def _make_request(self, url: str) -> Dict:
"""Make a request to the storage API."""
try:
logger.debug(f"Making request to: {url}")
response = self.session.get(url, timeout=self.timeout)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
logger.error(f"Request failed: {e}")
raise StorageBrowserError(f"Failed to fetch bucket contents: {e}")
except ValueError as e:
logger.error(f"Invalid JSON response: {e}")
raise StorageBrowserError(
f"Invalid response from storage API: {e}")
def list_contents(self, prefix: str = "", max_items: Optional[int] = None) -> List[Union[StorageObject, StoragePrefix]]:
"""
List contents of the bucket at the given prefix.
Args:
prefix: Path prefix to list (e.g., "Win_x64/" for a folder)
max_items: Maximum number of items to return (None for all)
Returns:
List of StorageObject and StoragePrefix instances
"""
contents = []
page_token = None
while True:
url = self._build_url(prefix, page_token)
data = self._make_request(url)
# Add prefixes (folders)
for prefix_name in data.get('prefixes', []):
contents.append(StoragePrefix(name=prefix_name))
if max_items and len(contents) >= max_items:
return contents[:max_items]
# Add items (files)
for item in data.get('items', []):
storage_obj = StorageObject(
name=item['name'],
size=int(item['size']),
updated=item['updated'],
media_link=item['mediaLink'],
kind=item['kind'],
metadata=item.get('metadata')
)
contents.append(storage_obj)
if max_items and len(contents) >= max_items:
return contents[:max_items]
# Check for next page
page_token = data.get('nextPageToken')
if not page_token:
break
return contents
def list_contents_paginated(self, prefix: str = "") -> Iterator[List[Union[StorageObject, StoragePrefix]]]:
"""
List contents with pagination, yielding one page at a time.
Args:
prefix: Path prefix to list (e.g., "Win_x64/" for a folder)
Yields:
Pages of StorageObject and StoragePrefix instances
"""
page_token = None
while True:
url = self._build_url(prefix, page_token)
data = self._make_request(url)
page_contents = []
# Add prefixes (folders)
for prefix_name in data.get('prefixes', []):
page_contents.append(StoragePrefix(name=prefix_name))
# Add items (files)
for item in data.get('items', []):
storage_obj = StorageObject(
name=item['name'],
size=int(item['size']),
updated=item['updated'],
media_link=item['mediaLink'],
kind=item['kind'],
metadata=item.get('metadata')
)
page_contents.append(storage_obj)
if page_contents:
yield page_contents
# Check for next page
page_token = data.get('nextPageToken')
if not page_token:
break
def list_files_only(self, prefix: str = "", max_items: Optional[int] = None) -> List[StorageObject]:
"""
List only files (not folders) at the given prefix.
Args:
prefix: Path prefix to list
max_items: Maximum number of files to return
Returns:
List of StorageObject instances
"""
contents = self.list_contents(prefix, max_items)
return [item for item in contents if isinstance(item, StorageObject)]
def list_folders_only(self, prefix: str = "", max_items: Optional[int] = None) -> List[StoragePrefix]:
"""
List only folders (prefixes) at the given prefix.
Args:
prefix: Path prefix to list
max_items: Maximum number of folders to return
Returns:
List of StoragePrefix instances
"""
contents = self.list_contents(prefix, max_items)
return [item for item in contents if isinstance(item, StoragePrefix)]
def get_object_info(self, object_name: str) -> Optional[StorageObject]:
"""
Get information about a specific object.
Args:
object_name: Full path to the object
Returns:
StorageObject if found, None otherwise
"""
# Extract prefix from object name
parts = object_name.split('/')
if len(parts) > 1:
prefix = '/'.join(parts[:-1]) + '/'
filename = parts[-1]
else:
prefix = ""
filename = object_name
contents = self.list_contents(prefix)
for item in contents:
if isinstance(item, StorageObject) and item.filename == filename:
return item
return None
def download_url(self, object_name: str) -> Optional[str]:
"""
Get the download URL for a specific object.
Args:
object_name: Full path to the object
Returns:
Download URL if object exists, None otherwise
"""
obj = self.get_object_info(object_name)
return obj.media_link if obj else None
def exists(self, path: str) -> bool:
"""
Check if a file or folder exists at the given path.
Args:
path: Path to check
Returns:
True if exists, False otherwise
"""
if path.endswith('/'):
# Check for folder
folders = self.list_folders_only(path.rsplit(
'/', 2)[0] + '/' if '/' in path.rstrip('/') else "")
return any(folder.name == path for folder in folders)
else:
# Check for file
return self.get_object_info(path) is not None
def walk(self, prefix: str = "") -> Iterator[tuple[str, List[str], List[StorageObject]]]:
"""
Walk through the bucket structure like os.walk().
Args:
prefix: Starting prefix/path
Yields:
Tuples of (current_path, folder_names, files)
"""
def _walk_recursive(current_prefix: str):
contents = self.list_contents(current_prefix)
folders = [item.folder_name for item in contents if isinstance(
item, StoragePrefix)]
files = [item for item in contents if isinstance(
item, StorageObject)]
yield (current_prefix, folders, files)
# Recursively walk subfolders
for item in contents:
if isinstance(item, StoragePrefix):
yield from _walk_recursive(item.name)
yield from _walk_recursive(prefix)
def search(self, pattern: str, prefix: str = "", case_sensitive: bool = False) -> List[Union[StorageObject, StoragePrefix]]:
"""
Search for files and folders matching a pattern.
Args:
pattern: Pattern to search for (simple string matching)
prefix: Prefix to search within
case_sensitive: Whether search should be case sensitive
Returns:
List of matching StorageObject and StoragePrefix instances
"""
if not case_sensitive:
pattern = pattern.lower()
results = []
for _, folders, files in self.walk(prefix):
# Search in folder names
for folder in folders:
search_target = folder if case_sensitive else folder.lower()
if pattern in search_target:
# Find the full StoragePrefix object
contents = self.list_contents(prefix)
for item in contents:
if isinstance(item, StoragePrefix) and item.folder_name == folder:
results.append(item)
break
# Search in file names
for file_obj in files:
search_target = file_obj.filename if case_sensitive else file_obj.filename.lower()
if pattern in search_target:
results.append(file_obj)
return results
class StorageBrowserError(Exception):
"""Exception raised for storage browser errors."""
pass
# Convenience functions
def list_bucket_contents(bucket_name: str, prefix: str = "", max_items: Optional[int] = None) -> List[Union[StorageObject, StoragePrefix]]:
"""
Convenience function to list bucket contents.
Args:
bucket_name: Name of the storage bucket
prefix: Path prefix to list
max_items: Maximum number of items to return
Returns:
List of StorageObject and StoragePrefix instances
"""
browser = StorageBucketBrowser(bucket_name)
return browser.list_contents(prefix, max_items)
def download_file_url(bucket_name: str, object_name: str) -> Optional[str]:
"""
Convenience function to get download URL for a file.
Args:
bucket_name: Name of the storage bucket
object_name: Full path to the object
Returns:
Download URL if object exists, None otherwise
"""
browser = StorageBucketBrowser(bucket_name)
return browser.download_url(object_name)
# Example usage
if __name__ == "__main__":
# Example: Browse Chromium snapshots
browser = StorageBucketBrowser("chromium-browser-snapshots")
print("=== Root contents ===")
root_contents = browser.list_contents(max_items=10)
for item in root_contents:
item_type = "folder" if isinstance(item, StoragePrefix) else "file"
print(f"{item.name} ({item_type})")
print("\n=== Win_x64 contents (first page) ===")
for page in browser.list_contents_paginated("Win_x64/"):
for item in page[:5]: # Show first 5 items
item_type = "folder" if isinstance(item, StoragePrefix) else "file"
if isinstance(item, StorageObject):
print(f"{item.name} ({item_type}, {item.size} bytes)")
else:
print(f"{item.name} ({item_type})")
break # Just show first page
print("\n=== Files in Win_x64/1000027/ ===")
files = browser.list_files_only("Win_x64/1000027/")
for file_obj in files:
print(f"{file_obj.filename} - {file_obj.size} bytes - {file_obj.updated}")
print(f" Download: {file_obj.media_link}")
# === Root contents ===
# Android/ (folder)
# AndroidDesktop_arm64/ (folder)
# AndroidDesktop_x64/ (folder)
# Android_Arm64/ (folder)
# Arm/ (folder)
# Linux/ (folder)
# LinuxGit/ (folder)
# LinuxGit_x64/ (folder)
# Linux_ARM_Cross-Compile/ (folder)
# Linux_ChromiumOS/ (folder)
# === Win_x64 contents (first page) ===
# Win_x64/1000027/ (folder)
# Win_x64/1000052/ (folder)
# Win_x64/1000083/ (folder)
# Win_x64/1000111/ (folder)
# Win_x64/1000141/ (folder)
# === Files in Win_x64/1000027/ ===
# REVISIONS - 726 bytes - 2022-05-05T19:58:53.133Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2FREVISIONS?generation=1651780733113894&alt=media
# chrome-win.zip - 191056509 bytes - 2022-05-05T19:58:48.357Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fchrome-win.zip?generation=1651780728332948&alt=media
# chrome-win32-syms.zip - 529859865 bytes - 2022-05-05T20:01:38.594Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fchrome-win32-syms.zip?generation=1651780898562649&alt=media
# chromedriver_win32-syms.zip - 30738408 bytes - 2022-05-05T20:01:50.893Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fchromedriver_win32-syms.zip?generation=1651780910873002&alt=media
# chromedriver_win32.zip - 6315326 bytes - 2022-05-05T20:01:56.619Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fchromedriver_win32.zip?generation=1651780916599219&alt=media
# content-shell.zip - 66516348 bytes - 2022-05-05T20:02:16.469Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fcontent-shell.zip?generation=1651780936445045&alt=media
# devtools-frontend.zip - 25572995 bytes - 2022-05-05T20:03:22.706Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fdevtools-frontend.zip?generation=1651781002684463&alt=media
# gcapi.zip - 436767 bytes - 2022-05-05T20:03:27.862Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fgcapi.zip?generation=1651781007842617&alt=media
# metrics-metadata.zip - 2707705 bytes - 2022-05-05T20:03:34.096Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fmetrics-metadata.zip?generation=1651781014075851&alt=media
# mini_installer.exe - 73218560 bytes - 2022-05-05T20:03:36.786Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fmini_installer.exe?generation=1651781016766106&alt=media
# pnacl.zip - 12625268 bytes - 2022-05-05T20:03:45.646Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fpnacl.zip?generation=1651781025618228&alt=media
# updater-syms.zip - 17662096 bytes - 2022-05-05T20:03:57.290Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fupdater-syms.zip?generation=1651781037269021&alt=media
# updater.zip - 10447483 bytes - 2022-05-05T20:03:48.417Z
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fupdater.zip?generation=1651781028397359&alt=media
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment