Last active
June 11, 2025 08:10
-
-
Save CypherpunkSamurai/07f7519f28c22b3a28a68eaae2abd4f5 to your computer and use it in GitHub Desktop.
storage.googleapis.com storage bucket client
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Google Cloud Storage Bucket Browser Library | |
A Python library for browsing Google Cloud Storage buckets using the public API. | |
Provides an S3-like interface for listing and navigating bucket contents. | |
""" | |
import logging | |
from dataclasses import dataclass | |
from typing import Dict, Iterator, List, Optional, Union | |
from urllib.parse import quote, urlencode | |
import requests | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
@dataclass | |
class StorageObject: | |
"""Represents a file/object in the storage bucket.""" | |
name: str | |
size: int | |
updated: str | |
media_link: str | |
kind: str = "storage#object" | |
metadata: Optional[Dict] = None | |
@property | |
def filename(self) -> str: | |
"""Get just the filename without path.""" | |
return self.name.split('/')[-1] | |
@property | |
def is_file(self) -> bool: | |
"""Check if this is a file (not a prefix/folder).""" | |
return True | |
@dataclass | |
class StoragePrefix: | |
"""Represents a folder/prefix in the storage bucket.""" | |
name: str | |
@property | |
def folder_name(self) -> str: | |
"""Get just the folder name without path.""" | |
return self.name.rstrip('/').split('/')[-1] | |
@property | |
def is_file(self) -> bool: | |
"""Check if this is a file (not a prefix/folder).""" | |
return False | |
class StorageBucketBrowser: | |
""" | |
A browser for Google Cloud Storage buckets using the public API. | |
Example usage: | |
browser = StorageBucketBrowser("chromium-browser-snapshots") | |
# List root contents | |
contents = browser.list_contents() | |
# Navigate to a folder | |
win_contents = browser.list_contents("Win_x64/") | |
# List with pagination | |
for page in browser.list_contents_paginated("Win_x64/"): | |
for item in page: | |
print(f"{item.name} ({'folder' if isinstance(item, StoragePrefix) else 'file'})") | |
""" | |
BASE_URL = "https://www.googleapis.com/storage/v1/b" | |
def __init__(self, bucket_name: str, timeout: int = 30): | |
""" | |
Initialize the storage bucket browser. | |
Args: | |
bucket_name: Name of the Google Storage bucket | |
timeout: Request timeout in seconds | |
""" | |
self.bucket_name = bucket_name | |
self.timeout = timeout | |
self.session = requests.Session() | |
self.session.headers.update({ | |
'User-Agent': 'StorageBucketBrowser/1.0' | |
}) | |
def _build_url(self, prefix: str = "", page_token: Optional[str] = None) -> str: | |
"""Build the API URL for listing bucket contents.""" | |
url = f"{self.BASE_URL}/{self.bucket_name}/o" | |
params = { | |
'delimiter': '/', | |
'prefix': prefix, | |
'fields': 'items(kind,mediaLink,metadata,name,size,updated),kind,prefixes,nextPageToken' | |
} | |
if page_token: | |
params['pageToken'] = page_token | |
return f"{url}?{urlencode(params)}" | |
def _make_request(self, url: str) -> Dict: | |
"""Make a request to the storage API.""" | |
try: | |
logger.debug(f"Making request to: {url}") | |
response = self.session.get(url, timeout=self.timeout) | |
response.raise_for_status() | |
return response.json() | |
except requests.exceptions.RequestException as e: | |
logger.error(f"Request failed: {e}") | |
raise StorageBrowserError(f"Failed to fetch bucket contents: {e}") | |
except ValueError as e: | |
logger.error(f"Invalid JSON response: {e}") | |
raise StorageBrowserError( | |
f"Invalid response from storage API: {e}") | |
def list_contents(self, prefix: str = "", max_items: Optional[int] = None) -> List[Union[StorageObject, StoragePrefix]]: | |
""" | |
List contents of the bucket at the given prefix. | |
Args: | |
prefix: Path prefix to list (e.g., "Win_x64/" for a folder) | |
max_items: Maximum number of items to return (None for all) | |
Returns: | |
List of StorageObject and StoragePrefix instances | |
""" | |
contents = [] | |
page_token = None | |
while True: | |
url = self._build_url(prefix, page_token) | |
data = self._make_request(url) | |
# Add prefixes (folders) | |
for prefix_name in data.get('prefixes', []): | |
contents.append(StoragePrefix(name=prefix_name)) | |
if max_items and len(contents) >= max_items: | |
return contents[:max_items] | |
# Add items (files) | |
for item in data.get('items', []): | |
storage_obj = StorageObject( | |
name=item['name'], | |
size=int(item['size']), | |
updated=item['updated'], | |
media_link=item['mediaLink'], | |
kind=item['kind'], | |
metadata=item.get('metadata') | |
) | |
contents.append(storage_obj) | |
if max_items and len(contents) >= max_items: | |
return contents[:max_items] | |
# Check for next page | |
page_token = data.get('nextPageToken') | |
if not page_token: | |
break | |
return contents | |
def list_contents_paginated(self, prefix: str = "") -> Iterator[List[Union[StorageObject, StoragePrefix]]]: | |
""" | |
List contents with pagination, yielding one page at a time. | |
Args: | |
prefix: Path prefix to list (e.g., "Win_x64/" for a folder) | |
Yields: | |
Pages of StorageObject and StoragePrefix instances | |
""" | |
page_token = None | |
while True: | |
url = self._build_url(prefix, page_token) | |
data = self._make_request(url) | |
page_contents = [] | |
# Add prefixes (folders) | |
for prefix_name in data.get('prefixes', []): | |
page_contents.append(StoragePrefix(name=prefix_name)) | |
# Add items (files) | |
for item in data.get('items', []): | |
storage_obj = StorageObject( | |
name=item['name'], | |
size=int(item['size']), | |
updated=item['updated'], | |
media_link=item['mediaLink'], | |
kind=item['kind'], | |
metadata=item.get('metadata') | |
) | |
page_contents.append(storage_obj) | |
if page_contents: | |
yield page_contents | |
# Check for next page | |
page_token = data.get('nextPageToken') | |
if not page_token: | |
break | |
def list_files_only(self, prefix: str = "", max_items: Optional[int] = None) -> List[StorageObject]: | |
""" | |
List only files (not folders) at the given prefix. | |
Args: | |
prefix: Path prefix to list | |
max_items: Maximum number of files to return | |
Returns: | |
List of StorageObject instances | |
""" | |
contents = self.list_contents(prefix, max_items) | |
return [item for item in contents if isinstance(item, StorageObject)] | |
def list_folders_only(self, prefix: str = "", max_items: Optional[int] = None) -> List[StoragePrefix]: | |
""" | |
List only folders (prefixes) at the given prefix. | |
Args: | |
prefix: Path prefix to list | |
max_items: Maximum number of folders to return | |
Returns: | |
List of StoragePrefix instances | |
""" | |
contents = self.list_contents(prefix, max_items) | |
return [item for item in contents if isinstance(item, StoragePrefix)] | |
def get_object_info(self, object_name: str) -> Optional[StorageObject]: | |
""" | |
Get information about a specific object. | |
Args: | |
object_name: Full path to the object | |
Returns: | |
StorageObject if found, None otherwise | |
""" | |
# Extract prefix from object name | |
parts = object_name.split('/') | |
if len(parts) > 1: | |
prefix = '/'.join(parts[:-1]) + '/' | |
filename = parts[-1] | |
else: | |
prefix = "" | |
filename = object_name | |
contents = self.list_contents(prefix) | |
for item in contents: | |
if isinstance(item, StorageObject) and item.filename == filename: | |
return item | |
return None | |
def download_url(self, object_name: str) -> Optional[str]: | |
""" | |
Get the download URL for a specific object. | |
Args: | |
object_name: Full path to the object | |
Returns: | |
Download URL if object exists, None otherwise | |
""" | |
obj = self.get_object_info(object_name) | |
return obj.media_link if obj else None | |
def exists(self, path: str) -> bool: | |
""" | |
Check if a file or folder exists at the given path. | |
Args: | |
path: Path to check | |
Returns: | |
True if exists, False otherwise | |
""" | |
if path.endswith('/'): | |
# Check for folder | |
folders = self.list_folders_only(path.rsplit( | |
'/', 2)[0] + '/' if '/' in path.rstrip('/') else "") | |
return any(folder.name == path for folder in folders) | |
else: | |
# Check for file | |
return self.get_object_info(path) is not None | |
def walk(self, prefix: str = "") -> Iterator[tuple[str, List[str], List[StorageObject]]]: | |
""" | |
Walk through the bucket structure like os.walk(). | |
Args: | |
prefix: Starting prefix/path | |
Yields: | |
Tuples of (current_path, folder_names, files) | |
""" | |
def _walk_recursive(current_prefix: str): | |
contents = self.list_contents(current_prefix) | |
folders = [item.folder_name for item in contents if isinstance( | |
item, StoragePrefix)] | |
files = [item for item in contents if isinstance( | |
item, StorageObject)] | |
yield (current_prefix, folders, files) | |
# Recursively walk subfolders | |
for item in contents: | |
if isinstance(item, StoragePrefix): | |
yield from _walk_recursive(item.name) | |
yield from _walk_recursive(prefix) | |
def search(self, pattern: str, prefix: str = "", case_sensitive: bool = False) -> List[Union[StorageObject, StoragePrefix]]: | |
""" | |
Search for files and folders matching a pattern. | |
Args: | |
pattern: Pattern to search for (simple string matching) | |
prefix: Prefix to search within | |
case_sensitive: Whether search should be case sensitive | |
Returns: | |
List of matching StorageObject and StoragePrefix instances | |
""" | |
if not case_sensitive: | |
pattern = pattern.lower() | |
results = [] | |
for _, folders, files in self.walk(prefix): | |
# Search in folder names | |
for folder in folders: | |
search_target = folder if case_sensitive else folder.lower() | |
if pattern in search_target: | |
# Find the full StoragePrefix object | |
contents = self.list_contents(prefix) | |
for item in contents: | |
if isinstance(item, StoragePrefix) and item.folder_name == folder: | |
results.append(item) | |
break | |
# Search in file names | |
for file_obj in files: | |
search_target = file_obj.filename if case_sensitive else file_obj.filename.lower() | |
if pattern in search_target: | |
results.append(file_obj) | |
return results | |
class StorageBrowserError(Exception): | |
"""Exception raised for storage browser errors.""" | |
pass | |
# Convenience functions | |
def list_bucket_contents(bucket_name: str, prefix: str = "", max_items: Optional[int] = None) -> List[Union[StorageObject, StoragePrefix]]: | |
""" | |
Convenience function to list bucket contents. | |
Args: | |
bucket_name: Name of the storage bucket | |
prefix: Path prefix to list | |
max_items: Maximum number of items to return | |
Returns: | |
List of StorageObject and StoragePrefix instances | |
""" | |
browser = StorageBucketBrowser(bucket_name) | |
return browser.list_contents(prefix, max_items) | |
def download_file_url(bucket_name: str, object_name: str) -> Optional[str]: | |
""" | |
Convenience function to get download URL for a file. | |
Args: | |
bucket_name: Name of the storage bucket | |
object_name: Full path to the object | |
Returns: | |
Download URL if object exists, None otherwise | |
""" | |
browser = StorageBucketBrowser(bucket_name) | |
return browser.download_url(object_name) | |
# Example usage | |
if __name__ == "__main__": | |
# Example: Browse Chromium snapshots | |
browser = StorageBucketBrowser("chromium-browser-snapshots") | |
print("=== Root contents ===") | |
root_contents = browser.list_contents(max_items=10) | |
for item in root_contents: | |
item_type = "folder" if isinstance(item, StoragePrefix) else "file" | |
print(f"{item.name} ({item_type})") | |
print("\n=== Win_x64 contents (first page) ===") | |
for page in browser.list_contents_paginated("Win_x64/"): | |
for item in page[:5]: # Show first 5 items | |
item_type = "folder" if isinstance(item, StoragePrefix) else "file" | |
if isinstance(item, StorageObject): | |
print(f"{item.name} ({item_type}, {item.size} bytes)") | |
else: | |
print(f"{item.name} ({item_type})") | |
break # Just show first page | |
print("\n=== Files in Win_x64/1000027/ ===") | |
files = browser.list_files_only("Win_x64/1000027/") | |
for file_obj in files: | |
print(f"{file_obj.filename} - {file_obj.size} bytes - {file_obj.updated}") | |
print(f" Download: {file_obj.media_link}") | |
# === Root contents === | |
# Android/ (folder) | |
# AndroidDesktop_arm64/ (folder) | |
# AndroidDesktop_x64/ (folder) | |
# Android_Arm64/ (folder) | |
# Arm/ (folder) | |
# Linux/ (folder) | |
# LinuxGit/ (folder) | |
# LinuxGit_x64/ (folder) | |
# Linux_ARM_Cross-Compile/ (folder) | |
# Linux_ChromiumOS/ (folder) | |
# === Win_x64 contents (first page) === | |
# Win_x64/1000027/ (folder) | |
# Win_x64/1000052/ (folder) | |
# Win_x64/1000083/ (folder) | |
# Win_x64/1000111/ (folder) | |
# Win_x64/1000141/ (folder) | |
# === Files in Win_x64/1000027/ === | |
# REVISIONS - 726 bytes - 2022-05-05T19:58:53.133Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2FREVISIONS?generation=1651780733113894&alt=media | |
# chrome-win.zip - 191056509 bytes - 2022-05-05T19:58:48.357Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fchrome-win.zip?generation=1651780728332948&alt=media | |
# chrome-win32-syms.zip - 529859865 bytes - 2022-05-05T20:01:38.594Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fchrome-win32-syms.zip?generation=1651780898562649&alt=media | |
# chromedriver_win32-syms.zip - 30738408 bytes - 2022-05-05T20:01:50.893Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fchromedriver_win32-syms.zip?generation=1651780910873002&alt=media | |
# chromedriver_win32.zip - 6315326 bytes - 2022-05-05T20:01:56.619Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fchromedriver_win32.zip?generation=1651780916599219&alt=media | |
# content-shell.zip - 66516348 bytes - 2022-05-05T20:02:16.469Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fcontent-shell.zip?generation=1651780936445045&alt=media | |
# devtools-frontend.zip - 25572995 bytes - 2022-05-05T20:03:22.706Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fdevtools-frontend.zip?generation=1651781002684463&alt=media | |
# gcapi.zip - 436767 bytes - 2022-05-05T20:03:27.862Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fgcapi.zip?generation=1651781007842617&alt=media | |
# metrics-metadata.zip - 2707705 bytes - 2022-05-05T20:03:34.096Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fmetrics-metadata.zip?generation=1651781014075851&alt=media | |
# mini_installer.exe - 73218560 bytes - 2022-05-05T20:03:36.786Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fmini_installer.exe?generation=1651781016766106&alt=media | |
# pnacl.zip - 12625268 bytes - 2022-05-05T20:03:45.646Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fpnacl.zip?generation=1651781025618228&alt=media | |
# updater-syms.zip - 17662096 bytes - 2022-05-05T20:03:57.290Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fupdater-syms.zip?generation=1651781037269021&alt=media | |
# updater.zip - 10447483 bytes - 2022-05-05T20:03:48.417Z | |
# Download: https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Win_x64%2F1000027%2Fupdater.zip?generation=1651781028397359&alt=media |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment