# How to upload 501M SHA1 hashes into a SQL database # # Based on: # https://blog.netnerds.net/2015/01/powershell-high-performance-techniques-for-importing-csv-to-sql-server/ # https://gallery.technet.microsoft.com/scriptcenter/Import-Large-CSVs-into-SQL-216223d9 # # Database variables $sqlserver = "YOUR_SERVER" $database = "YOUR_DATABASE" $table = "YOUR_TABLE" # CSV variables $csvfile = ".\pwned-passwords-ordered-2.0.txt" # downloaded from https://haveibeenpwned.com/Passwords ################################# service functiion in c#. ############################### # # Damn you powershell, it all starts so easy - and then you end up writing code like this. # $code = @" // // https://stackoverflow.com/questions/321370/how-can-i-convert-a-hex-string-to-a-byte-array // namespace Helper { using System; public class Convert { public static byte[] StringToByteArrayFastest(string hex) { if (hex.Length % 2 == 1) { throw new Exception("The binary key cannot have an odd number of digits"); } byte[] arr = new byte[hex.Length >> 1]; for (int i = 0; i < hex.Length >> 1; ++i) { arr[i] = (byte)((GetHexVal(hex[i << 1]) << 4) + (GetHexVal(hex[(i << 1) + 1]))); } return arr; } private static int GetHexVal(char hex) { int val = (int)hex; //For uppercase A-F letters: return val - (val < 58 ? 48 : 55); //For lowercase a-f letters: //return val - (val < 58 ? 48 : 87); //Or the two combined, but a bit slower: //return val - (val < 58 ? 48 : (val < 97 ? 55 : 87)); } } } "@; Add-Type -TypeDefinition $code -Language CSharp $elapsed = [System.Diagnostics.Stopwatch]::StartNew() [void][Reflection.Assembly]::LoadWithPartialName("System.Data") [void][Reflection.Assembly]::LoadWithPartialName("System.Data.SqlClient") $batchsize = 50000 #connect to sql $connectionstring = "Data Source=$sqlserver;Integrated Security=true;Initial Catalog=$database;" $bulkcopy = New-Object Data.SqlClient.SqlBulkCopy($connectionstring, [System.Data.SqlClient.SqlBulkCopyOptions]::TableLock) $bulkcopy.DestinationTableName = $table $bulkcopy.bulkcopyTimeout = 0 $bulkcopy.batchsize = $batchsize # Create the datatable. Add the columns by hand, since we have custom datatypes $datatable = New-Object System.Data.DataTable [void]$datatable.Columns.Add("SHA1",[System.Byte[]]) [void]$datatable.Columns.Add("Freq",[System.Int32]) $i=0; # Read in the data, line by line. No headers in our case. $reader = New-Object System.IO.StreamReader($csvfile) while (($line = $reader.ReadLine()) -ne $null) { $fields = $line.Split(":") $row =$datatable.NewRow() $row.SHA1 = [Helper.Convert]::StringToByteArrayFastest($fields[0]) $row.Freq = [int]$fields[1] [void]$datatable.Rows.Add($row) $i++; if (($i % $batchsize) -eq 0) { $bulkcopy.WriteToServer($datatable) $datatable.Clear() Write-Host "row count: $i `t elapsed: $($elapsed.Elapsed)" } } # Add in all the remaining rows since the last .Clear() if($datatable.Rows.Count -gt 0) { Write-Host "flushing last chunk of $($datatable.Rows.Count) rows" $bulkcopy.WriteToServer($datatable) $datatable.Clear() Write-Host "row count: $i `t elapsed: $($elapsed.Elapsed)" } # Clean Up $reader.Close(); $reader.Dispose() $bulkcopy.Close(); $bulkcopy.Dispose() $datatable.Dispose() Write-Host "Script complete. $i rows have been inserted into the database." Write-Host "Total Elapsed Time: $($elapsed.Elapsed.ToString())" [System.GC]::Collect()