Skip to content

Instantly share code, notes, and snippets.

@MaximRouiller
Created December 3, 2020 21:32
Show Gist options
  • Save MaximRouiller/04078c7e567a5f9d5211b16702b8f884 to your computer and use it in GitHub Desktop.
Save MaximRouiller/04078c7e567a5f9d5211b16702b8f884 to your computer and use it in GitHub Desktop.
Automatically download files to Azure Blob Storage using Copy From URL API with node
// Required Package: @azure/storage-blob and dotenv
// API Docs: https://docs.microsoft.com/en-us/rest/api/storageservices/copy-blob-from-url
require('dotenv').config();
const { BlobServiceClient } = require('@azure/storage-blob');
const path = require('path');
let now = new Date();
const AZURE_STORAGE_CONNECTION_STRING = process.env.AZURE_STORAGE_CONNECTION_STRING;
async function main() {
let filesToProcess = ["http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/AMAZON_FASHION_5.json.gz",
"http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/AMAZON_FASHION.csv",
"http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/All_Beauty_5.json.gz"]
console.log(`Copying ${filesToProcess.length} Blobs from URL`);
const blobServiceClient = BlobServiceClient.fromConnectionString(AZURE_STORAGE_CONNECTION_STRING);
let timestamp = "" + now.getFullYear() + now.getMonth() + now.getDay() + now.getHours() + now.getMinutes() + now.getSeconds();
let containerName = `data-${timestamp}`;
containerClient = (await blobServiceClient.createContainer(containerName)).containerClient;
for (const fileToProcess of filesToProcess) {
let url = new URL(fileToProcess);
let filename = path.basename(url.pathname);
const blobClient = containerClient.getBlockBlobClient(filename);
blobClient.beginCopyFromURL(fileToProcess);
}
}
main().then(() => console.log('Operation started. Closing application.')).catch((ex) => console.log(ex.message));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment