-
-
Save Wimpje/a796ba134d61552587a7 to your computer and use it in GitHub Desktop.
param( [string]$file = $(throw "file is required"), $matchesPerSplit = 50, $maxFiles = [Int32]::MaxValue, $splitOnNode = $(throw "splitOnNode is required"), $offset = 0 ) | |
# with a little help of https://gist.github.com/awayken/5861923 | |
$ErrorActionPreference = "Stop"; | |
trap { | |
$ErrorActionPreference = "Continue" | |
write-error "Script failed: $_ \r\n $($_.ScriptStackTrace)" | |
exit (1); | |
} | |
$file = (resolve-path $file).path | |
$fileNameExt = [IO.Path]::GetExtension($file) | |
$fileNameWithoutExt = [IO.Path]::GetFileNameWithoutExtension($file) | |
$fileNameDirectory = [IO.Path]::GetDirectoryName($file) | |
$reader = [System.Xml.XmlReader]::Create($file) | |
$matchesCount = $idx = 0 | |
try { | |
"Splitting $from on node name='$splitOnNode', with a max of $matchesPerSplit matches per file. Max of $maxFiles files will be generated." | |
$result = $reader.ReadToFollowing($splitOnNode) | |
$hasNextSibling = $true | |
while (-not($reader.EOF) -and $result -and $hasNextSibling -and ($idx -lt $maxFiles + $offset)) { | |
if ($matchesCount -lt $matchesPerSplit) { | |
if($offset -gt $idx) { | |
$idx++ | |
continue | |
} | |
$to = [IO.Path]::Combine($fileNameDirectory, "$fileNameWithoutExt.$($idx -$offset)$fileNameExt") | |
"Writing to $to" | |
$toXml = New-Object System.Xml.XmlTextWriter($to, $null) | |
$toXml.Formatting = 'Indented' | |
$toXml.Indentation = 2 | |
try { | |
$toXml.WriteStartElement("split") | |
$toXml.WriteAttributeString("cnt", $null, "$idx") | |
do { | |
$toXml.WriteRaw($reader.ReadOuterXml()) | |
$matchesCount++; | |
$hasNextSibling = $reader.ReadToNextSibling($splitOnNode) | |
} while($hasNextSibling -and ($matchesCount -lt $matchesPerSplit)) | |
$toXml.WriteEndElement(); | |
} | |
finally { | |
$toXml.Flush() | |
$toXml.Close() | |
} | |
$idx++ | |
$matchesCount = 0; | |
} | |
} | |
} | |
finally { | |
$reader.Close() | |
} |
The code does not work on all files. According to the doc ReadOuterXML will advance the reader to the next tag. What I don't understand is why it sometimes works. https://docs.microsoft.com/en-us/dotnet/api/system.xml.xmlreader.readouterxml?view=net-6.0
This workaround seem to work. I have no been able to find a better solution
This seem to work in both situations which I also cannot explain if ($reader.Name -eq $splitOnNode) { $hasNextSibling = 1 } else { $hasNextSibling = $reader.ReadToNextSibling($splitOnNode) }
I think the script only works correctly on XML with "CR"?
Please compare output of sample1 & sample2
$global:ErrorActionPreference = "Stop"
$content1 = '<test><list_items><item><id>A</id></item><item><id>B</id></item></list_items></test>' | out-file -Force -filepath D:\test\sample1.xml
$content2 = '<test>' + [char]13 + '<list_items>' + [char]13 +'<item> '+ [char]13 +'<id>A</id>' + [char]13 + '</item>' + [char]13 + '<item>' + [char]13 + '<id>B</id>' + [char]13 + '</item>' + [char]13 + '</list_items>' + [char]13 + '</test>' | out-file -Force -filepath D:\test\sample2.xml
$file = (resolve-path D:\test\sample2.xml).path
#$file = (resolve-path D:\test\sample1.xml).path
$reader = [System.Xml.XmlReader]::Create($file)
$matchesCount = $idx = 0
try {
$result = $reader.ReadToFollowing("item")
$hasNextSibling = $true
while (-not($reader.EOF) -and $result -and $hasNextSibling) { #JONVIK
write-host $reader.ReadOuterXml()
$hasNextSibling = $reader.ReadToNextSibling("item")
}
}
finally {
$reader.Close()
}
Hi, many thanks for this code, brilliant!
Could you help me with code line that I can extract the node's id of the used node and use that node's id on the file name rather than incremental $idx, please?
Many thanks!
I dont have time to test right now but the node should be in hear,
$reader.ReadOuterXml())
Not sure if you can extract it directly or if you need to load the content in another object first . It needs to be done without moving the readers position
The code does not work on all files.
According to the doc ReadOuterXML will advance the reader to the next tag. What I don't understand is why it sometimes works.
https://docs.microsoft.com/en-us/dotnet/api/system.xml.xmlreader.readouterxml?view=net-6.0
This workaround seem to work. I have no been able to find a better solution
This seem to work in both situations which I also cannot explain
if ($reader.Name -eq $splitOnNode) {
$hasNextSibling = 1
} else {
$hasNextSibling = $reader.ReadToNextSibling($splitOnNode)
}