Last active
December 24, 2024 19:50
-
-
Save JVital2013/93245384eca11f85fc5646b950c0e9be to your computer and use it in GitHub Desktop.
Bulk NOAA AWS to SatDump Processor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# BulkAWS2SatDump.ps1 | |
# | |
# See the param() block for all supported parameters | |
# | |
# Supported Satellite/Region Parameters: | |
# -Satellite goes16, -Satellite goes17, -Satellite goes18, or -Satellite goes19 | |
# -Region FD, -Region CONUS, -Region PACUS, -Region M1, or -Region M2 | |
# | |
# -Satellite gk2a | |
# -Region FD or -Region LA | |
# | |
# Notes: | |
# - SatDump 1.2.3 must be installed and in your system PATH (otherwise, modify line 259). | |
# - Be careful with MaxParallel! For full disk images, you need roughly 8GB RAM | |
# per parallel thread. Regional images require less memory and can be parallelized | |
# better. | |
# | |
# Example Use: .\BulkAWS2SatDump.ps1 -Satellite goes16 -Region CONUS -OutDir "C:\Users\jvita\Desktop\CONUS" -StartTime "December 16, 2024 5:00 AM" -EndTime "December 23, 2024 4:59 AM" -MaxParallel 4 | |
param( | |
[Parameter(Mandatory)][string]$OutDir="C:\output", | |
[string]$Satellite="goes16", | |
[string]$Region="FD", | |
[DateTime]$StartTime=$(Get-Date).AddDays(-1), | |
[DateTime]$EndTime=$(Get-Date), | |
[switch]$KeepSource=$false, | |
[int]$MaxParallel=2 | |
) | |
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 | |
if($StartTime -gt $EndTime) | |
{ | |
Write-Error "Start time cannot be after the end time!" | |
return | |
} | |
if(-not [System.IO.Path]::IsPathRooted($OutDir)) | |
{ | |
Write-Error "OutDir must be an absolute path" | |
return | |
} | |
$baseURL = "https://noaa-" | |
$files_to_download = @{} | |
# Pull files for GOES | |
if($Satellite.Substring(0, 4) -eq "goes") | |
{ | |
if($Region -eq "FD") | |
{ | |
$urlRegion = "ABI-L1b-RadF" | |
$urlSubregion = "RadF" | |
} | |
elseif($Region -eq "CONUS" -or $Region -eq "PACUS") | |
{ | |
$urlRegion = "ABI-L1b-RadC" | |
$urlSubregion = "RadC" | |
} | |
elseif($Region -eq "M1") | |
{ | |
$urlRegion = "ABI-L1b-RadM" | |
$urlSubregion = "RadM1" | |
} | |
elseif($Region -eq "M2") | |
{ | |
$urlRegion = "ABI-L1b-RadM" | |
$urlSubregion = "RadM2" | |
} | |
else | |
{ | |
Write-Error "Unknown region for $Satellite $Region" | |
return | |
} | |
$baseURL += "$Satellite.s3.amazonaws.com/" | |
$regionURL = $baseURL + "?list-type=2&prefix=$urlRegion%2F" | |
# Get all GOES Files in the date range | |
Write-Output "[$(Get-Date -Format 'G')] Querying files to download..." | |
for($i = 0; $i -le $($EndTime - $StartTime).Days; $i++) | |
{ | |
$this_day = $StartTime.AddDays($i) | |
$day_url = $regionURL + $this_day.Year + "%2F" + $this_day.DayOfYear | |
$continuation = $true | |
$continuation_token = "" | |
while($continuation) | |
{ | |
$aws_result = Invoke-WebRequest -Uri $($day_url + $continuation_token) | |
if($aws_result.StatusDescription -ne "OK") | |
{ | |
Write-Error "Error querying $day_url!" | |
return | |
} | |
$aws_xml = [xml]$aws_result | |
if($aws_xml.GetElementsByTagName("ListBucketResult").Count -eq 0) | |
{ | |
Write-Error "No Data returned by AWS!" | |
return | |
} | |
# Loop through request results | |
foreach($content_result in $aws_xml.ListBucketResult.Contents) | |
{ | |
# Sanity Checks | |
$file_name = $content_result.Key.Split("/")[-1].split(".")[0] | |
$split1 = $file_name.Split("_") | |
if($split1.Count -ne 6) | |
{ | |
Write-Warning "Skipping invalid GOES-R ABI file $file_name" | |
continue | |
} | |
$split2 = $split1[1].split("-") | |
if($split2.Count -ne 4) | |
{ | |
Write-Warning "Skipping invalid GOES-R ABI file $file_name" | |
continue | |
} | |
# Skip files outside of the time filter | |
if($split2[2] -ne $urlSubregion) {continue} | |
$this_datetime = New-Object DateTime -ArgumentList ([int]$split1[3].Substring(1, 4), 1, 1, ` | |
[int]$split1[3].Substring(8, 2), [int]$split1[3].Substring(10, 2), [int]$split1[3].Substring(12, 2)) | |
$this_datetime = $this_datetime.AddDays([int]$split1[3].Substring(5, 3) - 1) | |
if($this_datetime -lt $StartTime -or $this_datetime -gt $EndTime) {continue} | |
# Add files to download | |
if(-not $files_to_download.ContainsKey($this_datetime)) {$files_to_download.Add($this_datetime, @())} | |
$files_to_download[$this_datetime] += $baseURL + $content_result.Key | |
} | |
# Check if AWS has more data for us | |
if($aws_xml.ListBucketResult.GetElementsByTagName("NextContinuationToken").Count -gt 0) | |
{ | |
$continuation_token = "&continuation-token=" + [uri]::EscapeDataString($aws_xml.ListBucketResult.NextContinuationToken) | |
} | |
else | |
{ | |
$continuation = $false | |
} | |
} | |
} | |
} | |
# Pull files for GK-2A | |
elseif($Satellite.Replace('-', '') -eq "GK2A") | |
{ | |
# Checks, and ensure case is correct | |
if($Region -eq "FD") | |
{ | |
$urlRegion = "FD" | |
} | |
elseif($Region -eq "LA") | |
{ | |
$urlRegion = "LA" | |
} | |
else | |
{ | |
Write-Error "Unknown region for $Satellite $Region" | |
return | |
} | |
$baseURL += "gk2a-pds.s3.amazonaws.com/" | |
$regionURL = $baseURL + "?list-type=2&prefix=AMI%2FL1B%2F$urlRegion" | |
# Get all GOES Files in the date range | |
Write-Output "[$(Get-Date -Format 'G')] Querying files to download..." | |
for($i = 0; $i -le $($EndTime - $StartTime).Days; $i++) | |
{ | |
$this_day = $StartTime.AddDays($i) | |
$day_url = "$($regionURL)%2F$($this_day.Year)$($this_day.Month)%2F$($this_day.Day)" | |
$continuation = $true | |
$continuation_token = "" | |
while($continuation) | |
{ | |
$aws_result = Invoke-WebRequest -Uri $($day_url + $continuation_token) | |
if($aws_result.StatusDescription -ne "OK") | |
{ | |
Write-Error "Error querying $day_url!" | |
return | |
} | |
$aws_xml = [xml]$aws_result | |
if($aws_xml.GetElementsByTagName("ListBucketResult").Count -eq 0) | |
{ | |
Write-Error "No Data returned by AWS!" | |
return | |
} | |
# Loop through request results | |
foreach($content_result in $aws_xml.ListBucketResult.Contents) | |
{ | |
$file_name = $content_result.Key.Split("/")[-1].split(".")[0] | |
$split = $file_name.Split("_") | |
if($split.Count -ne 6) | |
{ | |
Write-Warning "Skipping invalid GK-2A AMI file $file_name" | |
continue | |
} | |
# Skip files outside of the time filter | |
$this_datetime = [DateTime]::ParseExact($split[5], "yyyyMMddHHmm", [System.Globalization.CultureInfo]::InvariantCulture) | |
if($this_datetime -lt $StartTime -or $this_datetime -gt $EndTime) {continue} | |
# Add files to download | |
if(-not $files_to_download.ContainsKey($this_datetime)) {$files_to_download.Add($this_datetime, @())} | |
$files_to_download[$this_datetime] += $baseURL + $content_result.Key | |
} | |
# Check if AWS has more data for us | |
if($aws_xml.ListBucketResult.GetElementsByTagName("NextContinuationToken").Count -gt 0) | |
{ | |
$continuation_token = "&continuation-token=" + [uri]::EscapeDataString($aws_xml.ListBucketResult.NextContinuationToken) | |
} | |
else | |
{ | |
$continuation = $false | |
} | |
} | |
} | |
} | |
#TODO: Handle pulling files from Himawari | |
else | |
{ | |
Write-Error "Unknown satellite $Satellite" | |
return | |
} | |
# Download and parse files | |
if(-not $(Test-Path $OutDir -ErrorAction SilentlyContinue)) {mkdir $OutDir | Out-Null} | |
if(-not $(Test-Path "$OutDir/_source" -ErrorAction SilentlyContinue)) {mkdir "$OutDir/_source" | Out-Null} | |
$runspaces = @() | |
$pool = [runspacefactory]::CreateRunspacePool(1, $MaxParallel) | |
$pool.Open() | |
foreach($this_time_files in $files_to_download.GetEnumerator()) | |
{ | |
$PSInstance = [powershell]::Create().AddScript({ | |
param($OutDir, $this_time_files) | |
# Compute Output Folder | |
$this_path = "$OutDir/$($this_time_files.Name.Year)-$("{0:D2}" -f $this_time_files.Name.Month)-$("{0:D2}" -f $this_time_files.Name.Day)_" + ` | |
"$("{0:D2}" -f $this_time_files.Name.Hour)-$("{0:D2}" -f $this_time_files.Name.Minute)-$("{0:D2}" -f $this_time_files.Name.Second)" | |
# Skip if already done | |
if(Test-Path $this_path -ErrorAction SilentlyContinue) {return} | |
# Download Files | |
foreach($download_url in $this_time_files.Value) {Invoke-WebRequest -Uri $download_url -OutFile "$OutDir/_source/$($download_url.Split("/")[-1])"} | |
# Process | |
satdump off2pro file "$OutDir/_source/$($download_url.Split("/")[-1])" $this_path 2>&1 | Out-Null | |
}).AddParameter("OutDir", $OutDir).AddParameter("this_time_files", $this_time_files) | |
$PSInstance.RunspacePool = $pool | |
$runspaces += [pscustomobject]@{Instance=$PSInstance; Result=$PSInstance.BeginInvoke()} | |
} | |
# Wait for all processing to finish | |
while($runspaces | ? { -not $_.Result.IsCompleted }) | |
{ | |
Write-Output "[$(Get-Date -Format 'G')] Completed processing $($($runspaces.Result | where IsCompleted -eq $true).Count)/$($runspaces.Count) products..." | |
Start-Sleep -Seconds 60 | |
} | |
# Done, clean up | |
$Runspaces | % {$_.Instance.EndInvoke($_.Result)} | Out-Null | |
$pool.Dispose() | |
$pool.Close() | |
if($KeepSource -eq $false) | |
{ | |
rm -Recurse -Force "$OutDir/_source" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment