Last active
June 7, 2020 09:50
-
-
Save Bidthedog/22411b458113fdbdaf16731d56fe0232 to your computer and use it in GitHub Desktop.
SQS Notes ETL Script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Don't push anything that isn't specified in this file | |
* | |
!.gitignore | |
!.gitattributes | |
!etl.ps1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Clear-Host; | |
$ErrorActionPreference = "Stop"; | |
$VerbosePreference = "Continue"; | |
$outputFile = Join-Path $PSScriptRoot "notes.sql"; | |
$outputRejectionFile = Join-Path $PSScriptRoot "notes-rejected.csv"; | |
"" | Set-Content -Path $outputRejectionFile -Encoding utf8NoBOM; | |
$sql = "INSERT INTO notes (timestamp, user_id, channel_id, nick, message)`r`nVALUES`r`n"; | |
function Format-SqlString([string]$inputString) { | |
return $inputString -replace "\\", "\\" -replace "'", "\'"; | |
} | |
Get-Content -Path "notes.csv" | ConvertFrom-Csv -Delimiter "," -Header path, filename, size, type, date1, date2, date3, int1, int2, note | Select-Object -Property filename, note | % { | |
$rejected = $False; | |
$time = ""; | |
$nick = ""; | |
$message = ""; | |
$fnSplit = $_.filename -split '\.'; | |
$dateFromlogName = $fnSplit[1]; | |
$pTime = "\((\d{1,2}:\d{2}\.\d{2})\)"; | |
$pNick = "(?:(?:\s+?|\s+?->\s+?))+?(?:[<|\*|\-]{1}(.+?)[>|\*|\-]{1})"; | |
$pMsg = "\s+(?:!notes )?(.+)"; | |
$pattern = "$pTime$pNick$pMsg"; | |
if ($_.note -imatch $pattern -and $matches) { | |
$time = $matches[1]; | |
$date = [DateTime]::ParseExact("$dateFromLogName $time", "yyyyMMdd H:mm.ss", $null); | |
$nick = $matches[2]; | |
$message = $matches[3]; | |
if ($null -eq $message) { | |
$rejected = $True; | |
} | |
} | |
else { | |
$rejected = $True; | |
} | |
if (!$rejected) { | |
@{ | |
Date = $date; | |
Nick = $nick; | |
Message = $message.Trim(); | |
} | |
} | |
else { | |
# Output to rejected file | |
$_ | ConvertTo-Csv -NoTypeInformation -UseQuotes Never | Select-Object -Skip 1 | Add-Content -Path $outputRejectionFile -Encoding utf8NoBOM | Out-Null; | |
} | |
} | Group-Object -Property Message | % { | |
$_.Group | Select-Object -Property Nick, Message, Date -First 1 | Sort-Object -Property Date | |
} | ` | |
Select-Object Date, Nick, Message -Skip 1 | ` | |
Sort-Object -Property Date| ` | |
ForEach-Object { | |
# 07/02/2000 15:21:00 | |
$sql += " (STR_TO_DATE('$($_.Date)', '%m/%d/%Y %H:%i:%s'), NULL, NULL, '$(Format-SqlString $_.Nick)', '$(Format-SqlString $_.Message)'),`r`n"; | |
}; | |
# EOF clean up | |
$sql = $sql.TrimEnd(",`r`n"); | |
$sql += ";"; | |
$sql | Set-Content -Path $outputFile -Encoding utf8NoBOM; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment