Skip to content

Instantly share code, notes, and snippets.

@Hashbrown777
Created January 16, 2023 08:52
Show Gist options
  • Save Hashbrown777/b08e947001dcf6d322d147e2ca507401 to your computer and use it in GitHub Desktop.
Save Hashbrown777/b08e947001dcf6d322d147e2ca507401 to your computer and use it in GitHub Desktop.
Sees if there are any differences between two folder structures and file contents therein using a multithreaded process
#folders you want checked that are found in each root
$paths = 'Pics of cats', 'Pics of dogs'
#roots to compare, must include terminating slash
$roots = '//?/D:/PicsBackup/', '//?/C:/Users/Hashbrown/Desktop/'
#any files that are permitted to differ and wont be checked
$skip = `
'Pics of cats/test.ps1',
'Pics of cats/tabby/download.log'
#periodically the script outputs the last sucessfully checked path.
#upon failure you can start from exactly where you were, skipping all previously checked files
#by putting that path here:
$startAfter = $False
#default is to stop on failure; you can change this and [manually] log output to check all differences later yourself
$ErrorActionPreference = 'Stop'
#maximum number of checking threads to run simultaneously
$parallel = (Get-ComputerInfo).CsProcessors.NumberOfLogicalProcessors
#maximum number of paths to check given to each thread as they spawn
$batch = 64
$paths = [System.Collections.Queue]($paths)
$skip = $skip `
| &{
Begin {
$output = @{}
}
Process {
$output[$_] = $True
}
End {
$output
}
}
$test = { Param($roots)
$genericTests = @(
{
!!$_
},
{
#$_.Attributes.HasFlag([System.IO.FileAttributes]::Directory)
('File','Directory')[$_.Attributes -match '(^|,)\s*Directory\s*(,|$)']
}
)
$fileTests = @(
{
$_.Length
},
{
($_ | Get-FileHash -Algorithm MD5).Hash
}
)
Filter Test { Param($items, $path)
$first = $NULL
$index = -1
$items `
| % $_ `
| %{
if (++$index -lt 1) {
$first = $_
}
elseif ($first -ne $_) {
throw "$path`n`t$($first)`t$($roots[0])`n`t$($_)`t$($roots[$index])`n."
}
}
}
#`Process {` doesn't work
$Input.GetEnumerator() `
| %{
$path = $_
$items = $roots `
| %{
try {
#Get-Item wont error or return null when non-existent if path is UNC..
(Get-Item -LiteralPath ($_ + $path) -ErrorAction Stop)[0]
}
catch {
$NULL
}
}
$genericTests | Test $items $path
if ($items[0] -is [System.IO.DirectoryInfo]) {
$items `
| Get-ChildItem `
| %{ "$path/$($_.Name)" } `
| Sort-Object -Unique
}
else {
$fileTests | Test $items $path
}
}
}
$running = @{}
$ran = [System.Collections.Queue]@()
while ($paths.Count -or $running.Count) {
while ($running.Count -lt $parallel) {
$queue = [System.Collections.ArrayList]@()
$last = $NULL
while ($queue.Count -lt $batch -and $paths.Count -gt 0) {
$path = $paths.Dequeue()
if ($startAfter) {
if ($startAfter -eq $path) {
$startAfter = $False
}
$items = $roots `
| %{
(Get-Item -LiteralPath ($_ + $path) -ErrorAction Stop)[0]
}
if ($items[0] -is [System.IO.DirectoryInfo]) {
$items `
| Get-ChildItem `
| %{ "$path/$($_.Name)" } `
| Sort-Object -Unique `
| %{ $paths.Enqueue($_) }
}
continue
}
if ($skip[$path]) {
continue
}
$queue.Add(($last = $path)) | Out-Null
}
if (!$last) {
break
}
$run = Start-Job `
-ScriptBlock $test `
-InputObject $queue `
-ArgumentList @(,$roots)
$ran.Enqueue(@($run.Id, $last))
$running[$run.Id] = $run
}
Sleep 1
@($running.Values) `
| %{
if ($_.State -ne 'Running') {
$running.Remove($_.Id)
}
Receive-Job `
$_ `
-Wait:(!$running[$_.Id]) `
-AutoRemoveJob:(!$running[$_.Id]) `
| %{
$paths.Enqueue($_)
}
$last = $NULL
while ($ran.Count -and !$running[$ran.Peek()[0]]) {
$last = $ran.Dequeue()[1]
}
$last
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment