Created
October 11, 2016 14:24
-
-
Save duncansmart/d2c77ac8eaeda5cd929492b196c44f89 to your computer and use it in GitHub Desktop.
Streaming XML parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Linq; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Xml; | |
using System.Diagnostics; | |
class StreamingXmlParser | |
{ | |
public class ElementInfo | |
{ | |
public string Name { get; set; } | |
public List<string> ParentNames { get; set; } | |
public Dictionary<string, string> Attributes { get; set; } | |
public string Text { get; set; } | |
} | |
public static IEnumerable<ElementInfo> StreamElements(Stream file) | |
{ | |
using (var rdr = XmlReader.Create(file)) | |
{ | |
string prevName = null; | |
int prevDepth = 0; | |
var parentNames = new Stack<string>(); | |
ElementInfo elementInfo = null; | |
while (rdr.Read()) | |
{ | |
if (rdr.NodeType == XmlNodeType.Element) | |
{ | |
if (rdr.Depth > prevDepth) | |
parentNames.Push(prevName); | |
else if (rdr.Depth < prevDepth && parentNames.Any()) | |
parentNames.Pop(); | |
var elementName = rdr.Name; | |
var elementDepth = rdr.Depth; | |
string elementValue = null; | |
var attributes = new Dictionary<string, string>(); | |
while (rdr.MoveToNextAttribute()) | |
attributes[rdr.Name] = rdr.Value; | |
if (elementInfo != null) | |
yield return elementInfo; | |
elementInfo = new ElementInfo | |
{ | |
Name = elementName, | |
ParentNames = parentNames.ToList(), | |
Attributes = attributes, | |
Text = elementValue, | |
}; | |
prevDepth = elementDepth; | |
prevName = elementName; | |
} | |
else if (rdr.NodeType == XmlNodeType.Text && elementInfo != null) | |
{ | |
elementInfo.Text = rdr.Value; | |
yield return elementInfo; | |
elementInfo = null; | |
} | |
} | |
// last one | |
if (elementInfo != null) | |
yield return elementInfo; | |
} | |
} | |
static void streamElements_TEST() | |
{ | |
// | |
using (var file = System.IO.File.OpenRead(@"C:\Temp\test.xml")) | |
{ | |
var elements = StreamElements(file); | |
var stuff = from e in elements | |
//where e.ParentNames.Count == 2 | |
select e; | |
foreach (var item in stuff) | |
{ | |
Debug.WriteLine(string.Join(" / ", ((IEnumerable<string>)item.ParentNames).Reverse())); | |
Debug.Write(" " + item.Name); | |
if (item.Text != null) | |
Debug.Write(" = '" + item.Text.Trim() + "'"); | |
Debug.WriteLine(""); | |
foreach (var attr in item.Attributes) | |
{ | |
Debug.WriteLine($" * {attr}"); | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment