Created
May 21, 2015 10:03
-
-
Save tobiasviehweger/e13c858c57e0a5965471 to your computer and use it in GitHub Desktop.
Parsing Outlook WordOpenXML to OOXML SDK objects
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using DocumentFormat.OpenXml.Packaging; | |
using DocumentFormat.OpenXml.Wordprocessing; | |
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.IO.Packaging; | |
using System.Linq; | |
using System.Text; | |
using System.Threading.Tasks; | |
using System.Xml; | |
using System.Xml.XPath; | |
namespace OOXMLTest | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
var wordXml = File.ReadAllText("source.xml"); | |
var doc = WordprocessingDocument.Open(createPackageFromWordOpenXML(wordXml)); | |
var body = doc.MainDocumentPart.Document.Body; | |
var styles = doc.MainDocumentPart.StyleDefinitionsPart.Styles; | |
foreach (var child in body.ChildElements) | |
{ | |
if (child.LocalName == "p") | |
{ | |
Console.WriteLine("p -------------------------"); | |
var para = child as Paragraph; | |
if (para.ParagraphProperties != null) | |
{ | |
var props = para.ParagraphProperties; | |
if (props.ParagraphStyleId != null) | |
{ | |
var style = styles.FirstOrDefault((s) => s is Style && (s as Style).StyleId == props.ParagraphStyleId.Val.Value) as Style; | |
if (style != null) | |
{ | |
Console.WriteLine("Style: " + style.StyleId); | |
var fontSize = style.StyleRunProperties.FontSize; | |
} | |
} | |
} | |
foreach (var run in child.ChildElements) | |
{ | |
Console.WriteLine(" " + run.LocalName + " ---------------------"); | |
if (run.LocalName == "r") | |
{ | |
var casted = run as Run; | |
if(casted.RunProperties.Bold != null) | |
Console.WriteLine("Bold"); | |
if (casted.RunProperties.Underline != null) | |
Console.WriteLine("Underline"); | |
if (casted.RunProperties.Italic != null) | |
Console.WriteLine("Italic"); | |
} | |
Console.WriteLine(run.InnerText); | |
Console.WriteLine(" /" + run.LocalName + " ---------------------"); | |
} | |
Console.WriteLine("/p -------------------------"); | |
} | |
} | |
Console.In.Read(); | |
} | |
private static Package createPackageFromWordOpenXML(string wordOpenXML) | |
{ | |
string packageXmlns = "http://schemas.microsoft.com/office/2006/xmlPackage"; | |
MemoryStream stream = new MemoryStream(); | |
Package newPkg = System.IO.Packaging.ZipPackage.Open(stream, FileMode.Create); | |
try | |
{ | |
XPathDocument xpDocument = new XPathDocument(new StringReader(wordOpenXML)); | |
XPathNavigator xpNavigator = xpDocument.CreateNavigator(); | |
XmlNamespaceManager nsManager = new XmlNamespaceManager(xpNavigator.NameTable); | |
nsManager.AddNamespace("pkg", packageXmlns); | |
XPathNodeIterator xpIterator = xpNavigator.Select("//pkg:part", nsManager); | |
while (xpIterator.MoveNext()) | |
{ | |
Uri partUri = new Uri(xpIterator.Current.GetAttribute("name", packageXmlns), UriKind.Relative); | |
PackagePart pkgPart = newPkg.CreatePart(partUri, xpIterator.Current.GetAttribute("contentType", packageXmlns)); | |
// Set this package part's contents to this XML node's inner XML, sans its surrounding xmlData element. | |
string strInnerXml = xpIterator.Current.InnerXml | |
.Replace("<pkg:xmlData xmlns:pkg=\"" + packageXmlns + "\">", "") | |
.Replace("</pkg:xmlData>", ""); | |
byte[] buffer = Encoding.UTF8.GetBytes(strInnerXml); | |
pkgPart.GetStream().Write(buffer, 0, buffer.Length); | |
} | |
newPkg.Flush(); | |
} | |
catch { | |
} | |
return newPkg; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment