Skip to content

Instantly share code, notes, and snippets.

@hodzanassredin
Created April 8, 2021 07:48
Show Gist options
  • Save hodzanassredin/61c44d9947dd826033f2f7d0060fe5a8 to your computer and use it in GitHub Desktop.
Save hodzanassredin/61c44d9947dd826033f2f7d0060fe5a8 to your computer and use it in GitHub Desktop.
calc md5 sum for a files in a blob storage (azure data lake gen2)
using Azure.Storage;
using Azure.Storage.Files.DataLake;
using Azure.Storage.Files.DataLake.Models;
using System;
using System.Collections.Generic;
using System.IO;
using System.Security.Cryptography;
using System.Threading.Tasks;
namespace CopyToBlob
{
class Program
{
public static IEnumerable<string> GetDirFiles(string path) {
foreach (string file in Directory.GetFiles(path, "*", SearchOption.AllDirectories))
{
yield return file;
}
}
public static DataLakeServiceClient GetDataLakeServiceClient(string accountName, string accountKey)
{
StorageSharedKeyCredential sharedKeyCredential =
new StorageSharedKeyCredential(accountName, accountKey);
string dfsUri = "https://" + accountName + ".dfs.core.windows.net";
return new DataLakeServiceClient
(new Uri(dfsUri), sharedKeyCredential);
}
public static async Task UploadFileBulk(FileStream fileStream, string dirName, string fileName, DataLakeFileSystemClient fileSystemClient)
{
DataLakeDirectoryClient directoryClient =
fileSystemClient.GetDirectoryClient(dirName);
var p = new Progress(fileStream.Length);
var opts = new DataLakeFileUploadOptions {
ProgressHandler = p,
TransferOptions = new StorageTransferOptions {
MaximumTransferSize = 100003838
}
};
DataLakeFileClient fileClient = directoryClient.GetFileClient(fileName);
await fileClient.UploadAsync(fileStream, opts);
}
static async Task Main(string[] args)
{
var ds = GetDataLakeServiceClient("", "");
var fs = ds.GetFileSystemClient("");
var dir = fs.GetDirectoryClient("/cs/ScoringData");
var appender = File.AppendText("ScoringData-datalake.chk");
foreach (var path in dir.GetPaths())
{
var file = fs.GetFileClient(path.Name);
using (var stream = file.OpenRead())
{
var provider = new MD5CryptoServiceProvider();
var md5 = provider.ComputeHash(stream);
string sum = BitConverter.ToString(md5).Replace("-","").ToLower();
appender.WriteLine($"{path.Name}\t{sum}");
appender.Flush();
Console.WriteLine($"{path.Name}\t{sum}");
}
}
appender.Close();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment