MaximRouiller · July 19, 2024 16:18
diff --git a/UrlToAzureStorage.py b/UrlToAzureStorage.py
 import os
 from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
 from datetime import datetime
 from urllib.parse import urlparse

 # Required package: azure-storage-blob
 # API Docs: https://docs.microsoft.com/en-us/rest/api/storageservices/copy-blob-from-url

 now = datetime.now()
 connection_string = os.getenv('AZURE_CONNECTION_STRING')

 files_to_process = ["http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/AMAZON_FASHION_5.json.gz",
    "http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/AMAZON_FASHION.csv",
    "http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/All_Beauty_5.json.gz"]

 print(f'Copying {len(files_to_process)} Blobs from URL')

 blob_service_client=  BlobServiceClient.from_connection_string(connection_string)

 timestamp = now.strftime("%Y%m%d%H%M%S")
 container_name = f'data-{timestamp}'
 container_client = blob_service_client.create_container(container_name)

 for file_to_process in files_to_process:
    url = urlparse(file_to_process)
    filename = os.path.basename(url.path)
    blob_client = container_client.get_blob_client(filename)
    blob_client.start_copy_from_url(file_to_process, metadata=None, incremental_copy=False)

 print("Operation started. Closing application.")
	import os
	from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
	from datetime import datetime
	from urllib.parse import urlparse

	# Required package: azure-storage-blob
	# API Docs: https://docs.microsoft.com/en-us/rest/api/storageservices/copy-blob-from-url

	now = datetime.now()
	connection_string = os.getenv('AZURE_CONNECTION_STRING')

	files_to_process = ["http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/AMAZON_FASHION_5.json.gz",
	"http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/AMAZON_FASHION.csv",
	"http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/All_Beauty_5.json.gz"]

	print(f'Copying {len(files_to_process)} Blobs from URL')

	blob_service_client= BlobServiceClient.from_connection_string(connection_string)

	timestamp = now.strftime("%Y%m%d%H%M%S")
	container_name = f'data-{timestamp}'
	container_client = blob_service_client.create_container(container_name)

	for file_to_process in files_to_process:
	url = urlparse(file_to_process)
	filename = os.path.basename(url.path)
	blob_client = container_client.get_blob_client(filename)
	blob_client.start_copy_from_url(file_to_process, metadata=None, incremental_copy=False)

	print("Operation started. Closing application.")