Last active December 24, 2024 19:50
Bulk NOAA AWS to SatDump Processor
# BulkAWS2SatDump.ps1
# See the param() block for all supported parameters
# Supported Satellite/Region Parameters:
# -Satellite goes16, -Satellite goes17, -Satellite goes18, or -Satellite goes19
# -Region FD, -Region CONUS, -Region PACUS, -Region M1, or -Region M2
# -Satellite gk2a
# -Region FD or -Region LA
# Notes:
# - SatDump 1.2.3 must be installed and in your system PATH (otherwise, modify line 259).
# - Be careful with MaxParallel! For full disk images, you need roughly 8GB RAM
# per parallel thread. Regional images require less memory and can be parallelized
# better.
# Example Use: .\BulkAWS2SatDump.ps1 -Satellite goes16 -Region CONUS -OutDir "C:\Users\jvita\Desktop\CONUS" -StartTime "December 16, 2024 5:00 AM" -EndTime "December 23, 2024 4:59 AM" -MaxParallel 4
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
if($StartTime -gt $EndTime)
Write-Error "Start time cannot be after the end time!"
if(-not [System.IO.Path]::IsPathRooted($OutDir))
Write-Error "OutDir must be an absolute path"
$baseURL = "https://noaa-"
$files_to_download = @{}
# Pull files for GOES
if($Satellite.Substring(0, 4) -eq "goes")
if($Region -eq "FD")
$urlRegion = "ABI-L1b-RadF"
$urlSubregion = "RadF"
elseif($Region -eq "CONUS" -or $Region -eq "PACUS")
$urlRegion = "ABI-L1b-RadC"
$urlSubregion = "RadC"
elseif($Region -eq "M1")
$urlRegion = "ABI-L1b-RadM"
$urlSubregion = "RadM1"
elseif($Region -eq "M2")
$urlRegion = "ABI-L1b-RadM"
$urlSubregion = "RadM2"
Write-Error "Unknown region for $Satellite $Region"
$baseURL += "$"
$regionURL = $baseURL + "?list-type=2&prefix=$urlRegion%2F"
# Get all GOES Files in the date range
Write-Output "[$(Get-Date -Format 'G')] Querying files to download..."
for($i = 0; $i -le $($EndTime - $StartTime).Days; $i++)
$this_day = $StartTime.AddDays($i)
$day_url = $regionURL + $this_day.Year + "%2F" + $this_day.DayOfYear
$continuation = $true
$continuation_token = ""
$aws_result = Invoke-WebRequest -Uri $($day_url + $continuation_token)
if($aws_result.StatusDescription -ne "OK")
Write-Error "Error querying $day_url!"
$aws_xml = [xml]$aws_result
if($aws_xml.GetElementsByTagName("ListBucketResult").Count -eq 0)
Write-Error "No Data returned by AWS!"
# Loop through request results
foreach($content_result in $aws_xml.ListBucketResult.Contents)
# Sanity Checks
$file_name = $content_result.Key.Split("/")[-1].split(".")[0]
$split1 = $file_name.Split("_")
if($split1.Count -ne 6)
Write-Warning "Skipping invalid GOES-R ABI file $file_name"
$split2 = $split1[1].split("-")
if($split2.Count -ne 4)
Write-Warning "Skipping invalid GOES-R ABI file $file_name"
# Skip files outside of the time filter
if($split2[2] -ne $urlSubregion) {continue}
$this_datetime = New-Object DateTime -ArgumentList ([int]$split1[3].Substring(1, 4), 1, 1, `
[int]$split1[3].Substring(8, 2), [int]$split1[3].Substring(10, 2), [int]$split1[3].Substring(12, 2))
$this_datetime = $this_datetime.AddDays([int]$split1[3].Substring(5, 3) - 1)
if($this_datetime -lt $StartTime -or $this_datetime -gt $EndTime) {continue}
# Add files to download
if(-not $files_to_download.ContainsKey($this_datetime)) {$files_to_download.Add($this_datetime, @())}
$files_to_download[$this_datetime] += $baseURL + $content_result.Key
# Check if AWS has more data for us
if($aws_xml.ListBucketResult.GetElementsByTagName("NextContinuationToken").Count -gt 0)
$continuation_token = "&continuation-token=" + [uri]::EscapeDataString($aws_xml.ListBucketResult.NextContinuationToken)
$continuation = $false
# Pull files for GK-2A
elseif($Satellite.Replace('-', '') -eq "GK2A")
# Checks, and ensure case is correct
if($Region -eq "FD")
$urlRegion = "FD"
elseif($Region -eq "LA")
$urlRegion = "LA"
Write-Error "Unknown region for $Satellite $Region"
$baseURL += ""
$regionURL = $baseURL + "?list-type=2&prefix=AMI%2FL1B%2F$urlRegion"
# Get all GOES Files in the date range
Write-Output "[$(Get-Date -Format 'G')] Querying files to download..."
for($i = 0; $i -le $($EndTime - $StartTime).Days; $i++)
$this_day = $StartTime.AddDays($i)
$day_url = "$($regionURL)%2F$($this_day.Year)$($this_day.Month)%2F$($this_day.Day)"
$continuation = $true
$continuation_token = ""
$aws_result = Invoke-WebRequest -Uri $($day_url + $continuation_token)
if($aws_result.StatusDescription -ne "OK")
Write-Error "Error querying $day_url!"
$aws_xml = [xml]$aws_result
if($aws_xml.GetElementsByTagName("ListBucketResult").Count -eq 0)
Write-Error "No Data returned by AWS!"
# Loop through request results
foreach($content_result in $aws_xml.ListBucketResult.Contents)
$file_name = $content_result.Key.Split("/")[-1].split(".")[0]
$split = $file_name.Split("_")
if($split.Count -ne 6)
Write-Warning "Skipping invalid GK-2A AMI file $file_name"
# Skip files outside of the time filter
$this_datetime = [DateTime]::ParseExact($split[5], "yyyyMMddHHmm", [System.Globalization.CultureInfo]::InvariantCulture)
if($this_datetime -lt $StartTime -or $this_datetime -gt $EndTime) {continue}
# Add files to download
if(-not $files_to_download.ContainsKey($this_datetime)) {$files_to_download.Add($this_datetime, @())}
$files_to_download[$this_datetime] += $baseURL + $content_result.Key
# Check if AWS has more data for us
if($aws_xml.ListBucketResult.GetElementsByTagName("NextContinuationToken").Count -gt 0)
$continuation_token = "&continuation-token=" + [uri]::EscapeDataString($aws_xml.ListBucketResult.NextContinuationToken)
$continuation = $false
#TODO: Handle pulling files from Himawari
Write-Error "Unknown satellite $Satellite"
# Download and parse files
if(-not $(Test-Path $OutDir -ErrorAction SilentlyContinue)) {mkdir $OutDir | Out-Null}
if(-not $(Test-Path "$OutDir/_source" -ErrorAction SilentlyContinue)) {mkdir "$OutDir/_source" | Out-Null}
$runspaces = @()
$pool = [runspacefactory]::CreateRunspacePool(1, $MaxParallel)
foreach($this_time_files in $files_to_download.GetEnumerator())
$PSInstance = [powershell]::Create().AddScript({
param($OutDir, $this_time_files)
# Compute Output Folder
$this_path = "$OutDir/$($this_time_files.Name.Year)-$("{0:D2}" -f $this_time_files.Name.Month)-$("{0:D2}" -f $this_time_files.Name.Day)_" + `
"$("{0:D2}" -f $this_time_files.Name.Hour)-$("{0:D2}" -f $this_time_files.Name.Minute)-$("{0:D2}" -f $this_time_files.Name.Second)"
# Skip if already done
if(Test-Path $this_path -ErrorAction SilentlyContinue) {return}
# Download Files
foreach($download_url in $this_time_files.Value) {Invoke-WebRequest -Uri $download_url -OutFile "$OutDir/_source/$($download_url.Split("/")[-1])"}
# Process
satdump off2pro file "$OutDir/_source/$($download_url.Split("/")[-1])" $this_path 2>&1 | Out-Null
}).AddParameter("OutDir", $OutDir).AddParameter("this_time_files", $this_time_files)
$PSInstance.RunspacePool = $pool
$runspaces += [pscustomobject]@{Instance=$PSInstance; Result=$PSInstance.BeginInvoke()}
# Wait for all processing to finish
while($runspaces | ? { -not $_.Result.IsCompleted })
Write-Output "[$(Get-Date -Format 'G')] Completed processing $($($runspaces.Result | where IsCompleted -eq $true).Count)/$($runspaces.Count) products..."
Start-Sleep -Seconds 60
# Done, clean up
$Runspaces | % {$_.Instance.EndInvoke($_.Result)} | Out-Null
if($KeepSource -eq $false)
rm -Recurse -Force "$OutDir/_source"
