Created
April 8, 2021 07:48
-
-
Save hodzanassredin/61c44d9947dd826033f2f7d0060fe5a8 to your computer and use it in GitHub Desktop.
calc md5 sum for a files in a blob storage (azure data lake gen2)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Azure.Storage; | |
using Azure.Storage.Files.DataLake; | |
using Azure.Storage.Files.DataLake.Models; | |
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Security.Cryptography; | |
using System.Threading.Tasks; | |
namespace CopyToBlob | |
{ | |
class Program | |
{ | |
public static IEnumerable<string> GetDirFiles(string path) { | |
foreach (string file in Directory.GetFiles(path, "*", SearchOption.AllDirectories)) | |
{ | |
yield return file; | |
} | |
} | |
public static DataLakeServiceClient GetDataLakeServiceClient(string accountName, string accountKey) | |
{ | |
StorageSharedKeyCredential sharedKeyCredential = | |
new StorageSharedKeyCredential(accountName, accountKey); | |
string dfsUri = "https://" + accountName + ".dfs.core.windows.net"; | |
return new DataLakeServiceClient | |
(new Uri(dfsUri), sharedKeyCredential); | |
} | |
public static async Task UploadFileBulk(FileStream fileStream, string dirName, string fileName, DataLakeFileSystemClient fileSystemClient) | |
{ | |
DataLakeDirectoryClient directoryClient = | |
fileSystemClient.GetDirectoryClient(dirName); | |
var p = new Progress(fileStream.Length); | |
var opts = new DataLakeFileUploadOptions { | |
ProgressHandler = p, | |
TransferOptions = new StorageTransferOptions { | |
MaximumTransferSize = 100003838 | |
} | |
}; | |
DataLakeFileClient fileClient = directoryClient.GetFileClient(fileName); | |
await fileClient.UploadAsync(fileStream, opts); | |
} | |
static async Task Main(string[] args) | |
{ | |
var ds = GetDataLakeServiceClient("", ""); | |
var fs = ds.GetFileSystemClient(""); | |
var dir = fs.GetDirectoryClient("/cs/ScoringData"); | |
var appender = File.AppendText("ScoringData-datalake.chk"); | |
foreach (var path in dir.GetPaths()) | |
{ | |
var file = fs.GetFileClient(path.Name); | |
using (var stream = file.OpenRead()) | |
{ | |
var provider = new MD5CryptoServiceProvider(); | |
var md5 = provider.ComputeHash(stream); | |
string sum = BitConverter.ToString(md5).Replace("-","").ToLower(); | |
appender.WriteLine($"{path.Name}\t{sum}"); | |
appender.Flush(); | |
Console.WriteLine($"{path.Name}\t{sum}"); | |
} | |
} | |
appender.Close(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment