Skip to content

Instantly share code, notes, and snippets.

@bielawb
Created March 13, 2015 04:31
Show Gist options
  • Save bielawb/cce7c437a1793c254745 to your computer and use it in GitHub Desktop.
Save bielawb/cce7c437a1793c254745 to your computer and use it in GitHub Desktop.
Module to read Excel as if it was just XML. Needs work (e.g. some cleanup after data is read).
<#
List taken from SO answer:
http://stackoverflow.com/questions/4730152/what-indicates-an-office-open-xml-cell-contains-a-date-time-value
#>
$ExCellStyles = @{
0 = 'General'
1 = '0'
2 = '0.00'
3 = '#,##0'
4 = '#,##0.00'
9 = '0%'
10 = '0.00%'
11 = '0.00E+00'
12 = '# ?/?'
13 = '# ??/??'
14 = 'mm-dd-yy'
15 = 'd-mmm-yy'
16 = 'd-mmm'
17 = 'mmm-yy'
18 = 'h:mm AM/PM'
19 = 'h:mm:ss AM/PM'
20 = 'h:mm'
21 = 'h:mm:ss'
22 = 'm/d/yy h:mm'
37 = '#,##0 (#,##0)'
38 = '#,##0 [Red](#,##0)'
39 = '#,##0.00(#,##0.00)'
40 = '#,##0.00[Red](#,##0.00)'
44 = '_("$"* #,##0.00_)_("$"* \(#,##0.00\)_("$"* "-"??_)_(@_)'
45 = 'mm:ss'
46 = '[h]:mm:ss'
47 = 'mmss.0'
48 = '##0.0E+0'
49 = '@'
27 = '[$-404]e/m/d'
30 = 'm/d/yy'
36 = '[$-404]e/m/d'
50 = '[$-404]e/m/d'
57 = '[$-404]e/m/d'
59 = 't0'
60 = 't0.00'
61 = 't#,##0'
62 = 't#,##0.00'
67 = 't0%'
68 = 't0.00%'
69 = 't# ?/?'
70 = 't# ??/??'
}
function Get-ExcelData {
[CmdletBinding()]
param(
$Path,
$sheet,
$HeaderRow = 1
)
$fullName = Resolve-Path -Path $Path | ForEach-Object { $_.ProviderPath }
$zip = [System.IO.Path]::GetTempFileName() |
Rename-Item -NewName { $_ -replace 'tmp$', 'zip' } -PassThru
Get-Item -LiteralPath $fullName | Copy-Item -Destination $zip.FullName
$folder = Split-Path -Leaf $fullName
$shell = New-Object -ComObject Shell.Application
$source = $shell.Namespace($zip.FullName)
$destinationFolder = (mkdir -Path ([System.IO.Path]::GetTempPath() + $folder) -Force).FullName
Remove-Item -Path $destinationFolder\* -Force -Recurse
$destination = $shell.Namespace($destinationFolder)
$destination.CopyHere($source.Items())
$xmlNs = @{
Namespace = @{
d = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
}
}
$sheets = Select-Xml -Path "$destinationFolder\xl\workbook.xml" -XPath //d:sheets @xmlNs |
ForEach-Object {
$_.Node.sheet
} | Where-Object {
$_.Name -match $sheet
} | ForEach-Object {
$_.id -replace 'rId(\d+)', 'sheet$1.xml'
}
$Strings = Select-Xml -Path "$destinationFolder\xl\sharedStrings.xml" -XPath //d:sst @xmlNs |
foreach { $_.Node }
# $Strings
$addedStyles = @{}
Select-Xml -Path "$destinationFolder\xl\styles.xml" -XPath //d:numFmts @xmlNs |
foreach { $_.Node.numFmt } | foreach {
$addedStyles[[int]$_.numFmtId] = $_.formatCode
}
$styles = $Script:ExCellStyles + $addedStyles
$formats = @(Select-Xml -Path "$destinationFolder\xl\styles.xml" @xmlNs -XPath //d:cellXfs |
foreach { $_.Node.xf } | foreach { $styles[[int]$_.numFmtId]})
foreach ($sheet in $sheets) {
$data = Select-Xml -Path "$destinationFolder\xl\worksheets\$sheet" -XPath //d:sheetData @xmlNs |
foreach { $_.Node.row } | foreach { $_.c } | foreach {
New-Object PSObject -Property @{
Column = $_.r -replace '\d'
Row = $_.r -replace '\D'
Value = if ($_.t) {
$Strings.Si[$_.v].t
} else {
$_.v
}
Style = if ($_.s) {
$formats[$_.s]
} else {
'General'
}
}
}
$headers = @{}
$data | Where-Object {
$_.Row -eq $HeaderRow
} | ForEach-Object {
if ($_.Value) {
$headers[$_.Column] = $_.Value
}
}
$rows = $data | Where-Object {
$_.Row -gt $HeaderRow
} | Group-Object -Property Row -AsHashTable
foreach ($row in $($rows.Keys | Sort-Object { [int]$_ })) {
$properties = @{}
$noValue = $true
foreach ($Key in $headers.Keys) {
$Cell = $rows[$row] | Where-Object { $_.Column -eq $Key }
if ($value = $Cell.Value) {
$value = $value |
Add-Member -MemberType NoteProperty -Value $Cell.Style -Name Style -PassThru
}
$properties[$headers[$Key]] = $value
if ($value) {
$noValue = $false
}
}
if (-not $noValue) {
New-Object PSObject -Property $properties
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment