Skip to content

Instantly share code, notes, and snippets.

@JVital2013
Last active December 24, 2024 19:50
Show Gist options
  • Save JVital2013/93245384eca11f85fc5646b950c0e9be to your computer and use it in GitHub Desktop.
Save JVital2013/93245384eca11f85fc5646b950c0e9be to your computer and use it in GitHub Desktop.
Bulk NOAA AWS to SatDump Processor
# BulkAWS2SatDump.ps1
#
# See the param() block for all supported parameters
#
# Supported Satellite/Region Parameters:
# -Satellite goes16, -Satellite goes17, -Satellite goes18, or -Satellite goes19
# -Region FD, -Region CONUS, -Region PACUS, -Region M1, or -Region M2
#
# -Satellite gk2a
# -Region FD or -Region LA
#
# Notes:
# - SatDump 1.2.3 must be installed and in your system PATH (otherwise, modify line 259).
# - Be careful with MaxParallel! For full disk images, you need roughly 8GB RAM
# per parallel thread. Regional images require less memory and can be parallelized
# better.
#
# Example Use: .\BulkAWS2SatDump.ps1 -Satellite goes16 -Region CONUS -OutDir "C:\Users\jvita\Desktop\CONUS" -StartTime "December 16, 2024 5:00 AM" -EndTime "December 23, 2024 4:59 AM" -MaxParallel 4
param(
[Parameter(Mandatory)][string]$OutDir="C:\output",
[string]$Satellite="goes16",
[string]$Region="FD",
[DateTime]$StartTime=$(Get-Date).AddDays(-1),
[DateTime]$EndTime=$(Get-Date),
[switch]$KeepSource=$false,
[int]$MaxParallel=2
)
[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
if($StartTime -gt $EndTime)
{
Write-Error "Start time cannot be after the end time!"
return
}
if(-not [System.IO.Path]::IsPathRooted($OutDir))
{
Write-Error "OutDir must be an absolute path"
return
}
$baseURL = "https://noaa-"
$files_to_download = @{}
# Pull files for GOES
if($Satellite.Substring(0, 4) -eq "goes")
{
if($Region -eq "FD")
{
$urlRegion = "ABI-L1b-RadF"
$urlSubregion = "RadF"
}
elseif($Region -eq "CONUS" -or $Region -eq "PACUS")
{
$urlRegion = "ABI-L1b-RadC"
$urlSubregion = "RadC"
}
elseif($Region -eq "M1")
{
$urlRegion = "ABI-L1b-RadM"
$urlSubregion = "RadM1"
}
elseif($Region -eq "M2")
{
$urlRegion = "ABI-L1b-RadM"
$urlSubregion = "RadM2"
}
else
{
Write-Error "Unknown region for $Satellite $Region"
return
}
$baseURL += "$Satellite.s3.amazonaws.com/"
$regionURL = $baseURL + "?list-type=2&prefix=$urlRegion%2F"
# Get all GOES Files in the date range
Write-Output "[$(Get-Date -Format 'G')] Querying files to download..."
for($i = 0; $i -le $($EndTime - $StartTime).Days; $i++)
{
$this_day = $StartTime.AddDays($i)
$day_url = $regionURL + $this_day.Year + "%2F" + $this_day.DayOfYear
$continuation = $true
$continuation_token = ""
while($continuation)
{
$aws_result = Invoke-WebRequest -Uri $($day_url + $continuation_token)
if($aws_result.StatusDescription -ne "OK")
{
Write-Error "Error querying $day_url!"
return
}
$aws_xml = [xml]$aws_result
if($aws_xml.GetElementsByTagName("ListBucketResult").Count -eq 0)
{
Write-Error "No Data returned by AWS!"
return
}
# Loop through request results
foreach($content_result in $aws_xml.ListBucketResult.Contents)
{
# Sanity Checks
$file_name = $content_result.Key.Split("/")[-1].split(".")[0]
$split1 = $file_name.Split("_")
if($split1.Count -ne 6)
{
Write-Warning "Skipping invalid GOES-R ABI file $file_name"
continue
}
$split2 = $split1[1].split("-")
if($split2.Count -ne 4)
{
Write-Warning "Skipping invalid GOES-R ABI file $file_name"
continue
}
# Skip files outside of the time filter
if($split2[2] -ne $urlSubregion) {continue}
$this_datetime = New-Object DateTime -ArgumentList ([int]$split1[3].Substring(1, 4), 1, 1, `
[int]$split1[3].Substring(8, 2), [int]$split1[3].Substring(10, 2), [int]$split1[3].Substring(12, 2))
$this_datetime = $this_datetime.AddDays([int]$split1[3].Substring(5, 3) - 1)
if($this_datetime -lt $StartTime -or $this_datetime -gt $EndTime) {continue}
# Add files to download
if(-not $files_to_download.ContainsKey($this_datetime)) {$files_to_download.Add($this_datetime, @())}
$files_to_download[$this_datetime] += $baseURL + $content_result.Key
}
# Check if AWS has more data for us
if($aws_xml.ListBucketResult.GetElementsByTagName("NextContinuationToken").Count -gt 0)
{
$continuation_token = "&continuation-token=" + [uri]::EscapeDataString($aws_xml.ListBucketResult.NextContinuationToken)
}
else
{
$continuation = $false
}
}
}
}
# Pull files for GK-2A
elseif($Satellite.Replace('-', '') -eq "GK2A")
{
# Checks, and ensure case is correct
if($Region -eq "FD")
{
$urlRegion = "FD"
}
elseif($Region -eq "LA")
{
$urlRegion = "LA"
}
else
{
Write-Error "Unknown region for $Satellite $Region"
return
}
$baseURL += "gk2a-pds.s3.amazonaws.com/"
$regionURL = $baseURL + "?list-type=2&prefix=AMI%2FL1B%2F$urlRegion"
# Get all GOES Files in the date range
Write-Output "[$(Get-Date -Format 'G')] Querying files to download..."
for($i = 0; $i -le $($EndTime - $StartTime).Days; $i++)
{
$this_day = $StartTime.AddDays($i)
$day_url = "$($regionURL)%2F$($this_day.Year)$($this_day.Month)%2F$($this_day.Day)"
$continuation = $true
$continuation_token = ""
while($continuation)
{
$aws_result = Invoke-WebRequest -Uri $($day_url + $continuation_token)
if($aws_result.StatusDescription -ne "OK")
{
Write-Error "Error querying $day_url!"
return
}
$aws_xml = [xml]$aws_result
if($aws_xml.GetElementsByTagName("ListBucketResult").Count -eq 0)
{
Write-Error "No Data returned by AWS!"
return
}
# Loop through request results
foreach($content_result in $aws_xml.ListBucketResult.Contents)
{
$file_name = $content_result.Key.Split("/")[-1].split(".")[0]
$split = $file_name.Split("_")
if($split.Count -ne 6)
{
Write-Warning "Skipping invalid GK-2A AMI file $file_name"
continue
}
# Skip files outside of the time filter
$this_datetime = [DateTime]::ParseExact($split[5], "yyyyMMddHHmm", [System.Globalization.CultureInfo]::InvariantCulture)
if($this_datetime -lt $StartTime -or $this_datetime -gt $EndTime) {continue}
# Add files to download
if(-not $files_to_download.ContainsKey($this_datetime)) {$files_to_download.Add($this_datetime, @())}
$files_to_download[$this_datetime] += $baseURL + $content_result.Key
}
# Check if AWS has more data for us
if($aws_xml.ListBucketResult.GetElementsByTagName("NextContinuationToken").Count -gt 0)
{
$continuation_token = "&continuation-token=" + [uri]::EscapeDataString($aws_xml.ListBucketResult.NextContinuationToken)
}
else
{
$continuation = $false
}
}
}
}
#TODO: Handle pulling files from Himawari
else
{
Write-Error "Unknown satellite $Satellite"
return
}
# Download and parse files
if(-not $(Test-Path $OutDir -ErrorAction SilentlyContinue)) {mkdir $OutDir | Out-Null}
if(-not $(Test-Path "$OutDir/_source" -ErrorAction SilentlyContinue)) {mkdir "$OutDir/_source" | Out-Null}
$runspaces = @()
$pool = [runspacefactory]::CreateRunspacePool(1, $MaxParallel)
$pool.Open()
foreach($this_time_files in $files_to_download.GetEnumerator())
{
$PSInstance = [powershell]::Create().AddScript({
param($OutDir, $this_time_files)
# Compute Output Folder
$this_path = "$OutDir/$($this_time_files.Name.Year)-$("{0:D2}" -f $this_time_files.Name.Month)-$("{0:D2}" -f $this_time_files.Name.Day)_" + `
"$("{0:D2}" -f $this_time_files.Name.Hour)-$("{0:D2}" -f $this_time_files.Name.Minute)-$("{0:D2}" -f $this_time_files.Name.Second)"
# Skip if already done
if(Test-Path $this_path -ErrorAction SilentlyContinue) {return}
# Download Files
foreach($download_url in $this_time_files.Value) {Invoke-WebRequest -Uri $download_url -OutFile "$OutDir/_source/$($download_url.Split("/")[-1])"}
# Process
satdump off2pro file "$OutDir/_source/$($download_url.Split("/")[-1])" $this_path 2>&1 | Out-Null
}).AddParameter("OutDir", $OutDir).AddParameter("this_time_files", $this_time_files)
$PSInstance.RunspacePool = $pool
$runspaces += [pscustomobject]@{Instance=$PSInstance; Result=$PSInstance.BeginInvoke()}
}
# Wait for all processing to finish
while($runspaces | ? { -not $_.Result.IsCompleted })
{
Write-Output "[$(Get-Date -Format 'G')] Completed processing $($($runspaces.Result | where IsCompleted -eq $true).Count)/$($runspaces.Count) products..."
Start-Sleep -Seconds 60
}
# Done, clean up
$Runspaces | % {$_.Instance.EndInvoke($_.Result)} | Out-Null
$pool.Dispose()
$pool.Close()
if($KeepSource -eq $false)
{
rm -Recurse -Force "$OutDir/_source"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment