Skip to content

Instantly share code, notes, and snippets.

@sean-m
Last active September 19, 2023 15:27
Show Gist options
  • Save sean-m/7dd2d6f044f935c84791147cc244f9df to your computer and use it in GitHub Desktop.
Save sean-m/7dd2d6f044f935c84791147cc244f9df to your computer and use it in GitHub Desktop.
PowerShell module with functions to assist reading and writting GZip compressed files.
#Requires -Version 4
#Load Types
{
if (-not ([System.Management.Automation.PSTypeName]'CompressHelper').Type) {
Add-Type -Language CSharp -TypeDefinition @"
using System;
using System.IO;
using System.Threading.Tasks;
public static class CompressHelper {
public static void CopyStream (Stream src, Stream dest) {
CopyStreamAsync(src, dest).Wait(); // Faster than just using CopyTo()
}
public static Task CopyStreamAsync (Stream src, Stream dest) {
return src.CopyToAsync(dest);
}
}
"@
}
}.Invoke()
<# Copy-Stream
.SYNOPSIS
Stream.CopyTo(stream) method causing problems in PowerShell is nonsense.
.DESCRIPTION
Utter nonsense. This does that with a blocking call to CopyToAsync, in
the case of large files while using compression the async variant can execute
twice as fast although the function explicitly blocks.
.PARAMETER SrcStream
Stream your data is coming from.
.PARAMETER DestStream
Stream your data is going to.
#>
function Copy-Stream {
param (
[IO.Stream]$SrcStream,
[IO.Stream]$DestStream
)
if (-not $SrcStream.CanRead) { throw [System.IO.IOException] "Cannot read from source stream." }
if (-not $DestStream.CanWrite) { throw [System.IO.IOException] "Cannot write to destination stream." }
Write-Debug "Copying streams"
[CompressHelper]::CopyStream($SrcStream, $DestStream)
}
Export-ModuleMember -Function Copy-Stream
<# Out-GzipFile
.SYNOPSIS
Writes incoming data to a gzip compressed file. Note: CompressionMode=Optimal
slowest compresion but optimal space savings. GZip semi-analog to Out-File, sans encoding.
.DESCRIPTION
Writes incoming data to a gzip compressed file. Note: CompressionMode=Optimal
slowest compresion but optimal space savings. Incoming data may be a object/object[]
(.ToString() values are written to GZip stream) or Stream.
*Update: punting on pipeline input until I can figure out how to avoid tearing down the output
stream on each itteration.
.PARAMETER InputData
Incoming data may be a object/object[] (.ToString() values are written to GZip stream) or Stream.
Note: Stream input is processed much faster as object/string types are written syncronously while
Stream types are an asyncrouns buffered copy by invoking Copy-Stream.
.PARAMETER FilePath
Path to written GZip compressed file.
.PARAMETER Force
Force overwriting destination file.
.EXAMPLE
Out-GzipFile -Source [System.IO.File]::OpenRead("Example.txt") -FilePath "Example.txt.gz" -Force
.EXAMPLE
Out-GzipFile -Source @(Get-Content "Example.txt") -FilePath "Example.txt.gz" -Force
.EXAMPLE
$request = [FtpWebRequest]WebRequest.Create($serverUri)
$request.Method = [Net.WebRequestMethods.FtpWebRequestMethods+Ftp]::DownloadFile
[Net.FtpWebResponse]$response = request.GetResponse();
Out-GzipFile -Source ($response.GetResponseStream()) -FilePath "ftp_data.txt.gz"
#>
function Out-GzipFile {
[CmdletBinding()]
[Alias()]
[OutputType([int])]
param (# Data to compress
[Parameter(Mandatory=$true,
ValueFromPipeline=$false,
ValueFromPipelineByPropertyName=$true,
Position=0)]
$InputData,
# File to write to
[Parameter(Mandatory=$true,
ValueFromPipeline=$false,
ValueFromPipelineByPropertyName=$true,
Position=1)]
[string]$FilePath,
# Overwrite destination if it exists
[Parameter(Mandatory=$false,
ValueFromPipeline=$false,
ValueFromPipelineByPropertyName=$true,
Position=2)]
[switch]$Force,
[Parameter(Mandatory=$false,
ValueFromPipeline=$false,
ValueFromPipelineByPropertyName=$true,
Position=4)]
[switch]$Verify)
begin {
# Initialize variables
$destPath = $FilePath
$exists = Test-Path $destPath
if ($exists) {
# Get full path
$local:f = (Resolve-Path $destPath).Path
if (-not [String]::IsNullOrWhiteSpace($local:f)) {
$destPath = $local:f
}
}
if ($Append -and $Force) { throw "Append and Force switches cannot be used together" }
# Set Stream mode
$mode = $null
if ($exists -and $Force) { $mode = 'Truncate' }
elseif ($exists -and $Append) { $mode = 'Append' }
elseif ($exists) {
throw [System.IO.IOException] "File exists and `$Force flag not passed. Quiting.."
}
else { $mode = 'CreateNew' }
## Setup streams
$local:lastStream = $null
$local:fs = New-Object System.IO.FileStream $destPath, $mode, 'Write', 'None'
$local:gz = New-Object System.IO.Compression.GZipStream $fs, ([System.IO.Compression.CompressionLevel]::Optimal)
$local:lastStream = $local:gz
$local:sw = New-Object System.IO.StreamWriter $gz
$local:cryptStream = $null
}
process {
$task = $null
$local:hashAlg = $null
$s_hash = ""
$d_hash = ""
try {
$local:basetype = $InputData.GetType().BaseType
if ($local:basetype -like [Array]) {
## Itterate through array
foreach ($i in $InputData) {
if ($task -eq $null) {
$task = $local:sw.WriteLineAsync($i.ToString())
}
else {
$task.Wait()
$task = $local:sw.WriteLineAsync($i.ToString())
}
}
}
elseif ($local:basetype -like [IO.Stream]) {
if ($Verify) {
## Verify data integrity with SHA256 hash
# Initialize crypto stream to generate running hash
$local:hashAlg = New-Object System.Security.Cryptography.SHA256Cng
$local:cryptStream = New-Object System.Security.Cryptography.CryptoStream $local:gz, $local:hashAlg, 'Write'
$local:lastStream = $local:cryptStream
# Copy stream
Copy-Stream $InputData $local:cryptStream
$local:cryptStream.FlushFinalBlock()
$s_hash = [System.BitConverter]::ToString($local:hashAlg.Hash)
$ds = Get-GzipContent -ReturnStream -FilePath $destPath
$d_alg = New-Object System.Security.Cryptography.SHA256Cng
$d_bytes = $d_alg.ComputeHash($ds)
$d_hash = [System.BitConverter]::ToString($d_bytes)
$ds.Close()
}
else {
## Do not compute hash, just copy stream data
Copy-Stream $InputData $local:lastStream
}
}
else {
if ($task -eq $null) {
$task = $local:sw.WriteLineAsync($InputData.ToString())
}
else {
$task.Wait()
$task = $local:sw.WriteLineAsync($InputData.ToString())
}
}
}
catch { throw }
finally {
if ($local:lastStream) {
$local:lastStream.Close() | Out-Null
}
}
if ($Verify) {
return "Verified: $FilePath - $($s_hash -like $d_hash)"
}
}
}
Export-ModuleMember -Function Out-GzipFile
function Decompress-GzipFile {
[CmdletBinding()]
[Alias()]
[OutputType([int])]
Param
(
[Parameter(Mandatory=$true,
Position=0)]
[string]$FilePath,
[Parameter(Mandatory=$true,
Position=1)]
[string]$OutFile,
[switch]$Force
)
process {
try {
$local:src = Get-GzipContent -FilePath (Resolve-Path $FilePath) -ReturnStream
if ($OutFile.StartsWith(".")) {
$OutFile = (Join-Path -Path (pwd).Path -ChildPath $OutFile).Replace("\.\","\").Replace("/./","/")
}
if ((Test-Path $OutFile) -and -not $Force) {
throw "File exists and -Force switch not set."
}
$local:out = [System.IO.File]::Create($OutFile)
Write-Host "Decompressing: $FilePath -> $OutFile"
Copy-Stream -SrcStream $src -DestStream $out
}
finally {
if ($local:out) { $out.Close() }
if ($local:src) { $src.Close() }
}
}
}
Export-ModuleMember -Function Decompress-GzipFile
<# Get-GzipContent
.Synopsis
Read in data from GZip compressed file.
.DESCRIPTION
Read in data from GZip compressed file. Returns content line by line or
optionally returns a Stream object initialized with GZip decompress stream adapter.
.PARAMETER FilePath
.PARAMETER ReturnStream
.EXAMPLE
Get-GzipContent -FilePath ".\NYSE-2000-2001.tsv.gz" | ConvertFrom-Csv -Delimiter "`t"
.EXAMPLE
Get-GzipContent -FilePath ".\NYSE-2000-2001.tsv.gz" -ReturnStream
#>
function Get-GzipContent
{
[CmdletBinding()]
[Alias()]
[OutputType([int])]
Param
(
[Parameter(Mandatory=$true,
Position=0)]
$FilePath,
[switch]$ReturnStream
)
Process
{
$local:srcFile = (Resolve-Path $FilePath).Path
$local:fs = [IO.File]::OpenRead($local:srcFile)
$local:gz = New-Object IO.Compression.GZipStream $local:fs, ([System.IO.Compression.CompressionMode]::Decompress)
if ($ReturnStream) {
Write-Debug "Returned stream does not close or dispose automatically."
return $local:gz
}
$local:sr = New-Object IO.StreamReader $local:gz
try {
while (-not $local:sr.EndOfStream) {
$local:sr.ReadLine()
}
}
catch { throw $_ }
finally { $local:sr.Dispose() }
}
}
Export-ModuleMember -Function Get-GzipContent
<# Copy-ToGzipFile
.Synopsis
Copy an existing file into a GZip compressed file.
.DESCRIPTION
Copies an existing file into a GZip compressed file with the option to overwrite an
existing file and/or remove the source file. Good for archiving existing files.
.PARAMETER Source
Source file stream or file path
.PARAMETER Destination
Destination GZip file
.PARAMETER RemoveSource
Delete source file if completed
.PARAMETER Force
Overwrite destination if exists
.PARAMETER Verify
Compute SHA256 checksum of incoming file and verify against destination
.EXAMPLE
Get-ChildItem *.txt | Copy-ToGzipFile -RemoveSource -Verify
.EXAMPLE
.\BigCsvFile.csv | Copy-ToGzipFile -Destination .\LittleCsvFile.csv.gz
#>
function Copy-ToGzipFile
{
[CmdletBinding()]
[Alias()]
[OutputType([int])]
Param
(
# Source file stream or file path
[Parameter(Mandatory=$true,
ValueFromPipeline=$true,
ValueFromPipelineByPropertyName=$true,
Position=0)]
$Source,
# Destination GZip file
[Parameter(Mandatory=$false,
ValueFromPipelineByPropertyName=$true,
Position=1)]
[string]$Destination,
# Check source and destination data with SHA256 checksum
[switch]$Verify,
# Delete source file if completed
[switch]$RemoveSource,
# Overwrite destination if exists
[switch]$Force
)
Begin {
if ($RemoveSource -and -not $Verify) {
Write-Warning "RemoveSource must be used with data validation or it may result in data loss."
$RemoveSource = $false
}
}
Process
{
$local:fs = $null
$local:basetype = $Source.GetType().BaseType
$local:srcPath = ""
$local:destPath = ""
if ($local:basetype -like [IO.Stream]) {
$local:fs = $Source
}
elseif ($local:basetype -like [System.IO.FileSystemInfo]) {
$local:srcPath = $Source.FullName
$local:fs = [System.IO.File]::OpenRead($local:srcPath)
}
else {
$local:srcPath = $Source
if (-not (Test-Path $local:srcPath)) { $local:srcPath = Join-Path -Path $(Get-Location) -ChildPath $Source }
if (-not (Test-Path $local:srcPath)) { throw [System.IO.FileNotFoundException] "Cannot find `$Source: $local:srcPath" }
$local:fs = [System.IO.File]::OpenRead($local:srcPath)
}
$destPath = $Destination
if ([String]::IsNullOrEmpty($Destination) -and ($local:basetype -like [IO.Stream])) {
throw "Source is Stream and destination not specified, cannot infer destination path."
}
elseif ([String]::IsNullOrEmpty($destPath)) {
if (-not (Test-Path "$local:srcPath")) { throw [System.IO.FileNotFoundException] "Cannot find `$Source: $local:srcPath" }
else { $destPath = "$local:srcPath.gz" }
}
Write-Verbose "Source file: $local:srcPath"
Write-Verbose "Creating compressed file: $destPath"
$result = $null
try {
$result = Out-GzipFile -InputData $local:fs -FilePath $destPath -Force:$Force -Verbose:$Verbose -Verify:$Verify
if ($Verify -and ($result -ne $null)) {
New-Object PSObject -Property @{"Source"=$srcPath;"Destination"=$destPath;"VerifySuccess"=$result}
}
}
catch { throw }
finally { $local:fs.Close() }
if ($RemoveSource -and ($local:basetype -like [IO.Stream])) {
Write-Warning "Input type is Stream and `$RemoveSource flag used, cannot infer source file from Stream type."
}
elseif ($RemoveSource -and $result) {
Write-Verbose 'Checksum verified, removing source file.'
Remove-Item -Path $local:srcPath -Force:$Force
}
Write-Verbose "Complete"
}
}
Export-ModuleMember -Function Copy-ToGzipFile
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment