# How to upload 501M SHA1 hashes into a SQL database
#
# Based on:
# https://blog.netnerds.net/2015/01/powershell-high-performance-techniques-for-importing-csv-to-sql-server/
# https://gallery.technet.microsoft.com/scriptcenter/Import-Large-CSVs-into-SQL-216223d9
# 
# Database variables
$sqlserver = "YOUR_SERVER"
$database = "YOUR_DATABASE"
$table = "YOUR_TABLE"
 
# CSV variables
$csvfile = ".\pwned-passwords-ordered-2.0.txt" # downloaded from https://haveibeenpwned.com/Passwords

################################# service functiion in c#. ###############################
#
# Damn you powershell, it all starts so easy - and then you end up writing code like this.
# 

$code = @"
// 
// https://stackoverflow.com/questions/321370/how-can-i-convert-a-hex-string-to-a-byte-array
//
namespace Helper {
    using System;
    public class Convert { 
         public static byte[] StringToByteArrayFastest(string hex) 
         {
                if (hex.Length % 2 == 1) 
                {
                    throw new Exception("The binary key cannot have an odd number of digits");
                }
                byte[] arr = new byte[hex.Length >> 1];
                for (int i = 0; i < hex.Length >> 1; ++i)
                {
                    arr[i] = (byte)((GetHexVal(hex[i << 1]) << 4) + (GetHexVal(hex[(i << 1) + 1])));
                }
                return arr;
         }
         private static int GetHexVal(char hex) 
         {
                int val = (int)hex;
                //For uppercase A-F letters:
                return val - (val < 58 ? 48 : 55);
                //For lowercase a-f letters:
                //return val - (val < 58 ? 48 : 87);
                //Or the two combined, but a bit slower:
                //return val - (val < 58 ? 48 : (val < 97 ? 55 : 87));
         }
    }
}
"@;


Add-Type -TypeDefinition $code -Language CSharp

$elapsed = [System.Diagnostics.Stopwatch]::StartNew() 
[void][Reflection.Assembly]::LoadWithPartialName("System.Data")
[void][Reflection.Assembly]::LoadWithPartialName("System.Data.SqlClient")
$batchsize = 50000

#connect to sql 
$connectionstring = "Data Source=$sqlserver;Integrated Security=true;Initial Catalog=$database;"
$bulkcopy = New-Object Data.SqlClient.SqlBulkCopy($connectionstring, [System.Data.SqlClient.SqlBulkCopyOptions]::TableLock)
$bulkcopy.DestinationTableName = $table
$bulkcopy.bulkcopyTimeout = 0
$bulkcopy.batchsize = $batchsize
 
# Create the datatable. Add the columns by hand, since we have custom datatypes
$datatable = New-Object System.Data.DataTable
[void]$datatable.Columns.Add("SHA1",[System.Byte[]])
[void]$datatable.Columns.Add("Freq",[System.Int32])
 
$i=0;

# Read in the data, line by line. No headers in our case.
$reader = New-Object System.IO.StreamReader($csvfile)
while (($line = $reader.ReadLine()) -ne $null)  
{
    $fields = $line.Split(":")
    $row =$datatable.NewRow()
    $row.SHA1 = [Helper.Convert]::StringToByteArrayFastest($fields[0])
    $row.Freq = [int]$fields[1]
    [void]$datatable.Rows.Add($row)
    $i++; 
    if (($i % $batchsize) -eq 0) 
    { 
        $bulkcopy.WriteToServer($datatable) 
        $datatable.Clear() 
        Write-Host "row count: $i `t elapsed: $($elapsed.Elapsed)"
    } 
} 
 
# Add in all the remaining rows since the last .Clear()
if($datatable.Rows.Count -gt 0) 
{
    Write-Host "flushing last chunk of $($datatable.Rows.Count) rows"
    $bulkcopy.WriteToServer($datatable)
    $datatable.Clear()
    Write-Host "row count: $i `t elapsed: $($elapsed.Elapsed)"
}
 
# Clean Up
$reader.Close(); $reader.Dispose()
$bulkcopy.Close(); $bulkcopy.Dispose()
$datatable.Dispose()
 
Write-Host "Script complete. $i rows have been inserted into the database."
Write-Host "Total Elapsed Time: $($elapsed.Elapsed.ToString())"

[System.GC]::Collect()