Last active
August 2, 2021 02:02
-
-
Save ryanohs/795caa5e3be0f8c9d9e0c6d1efd88989 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class DocxToJournalTransformer | |
{ | |
private const int BLOCK_TEXT = -2; | |
public void Transform(string docxFilename, Stream outputStream) | |
{ | |
var root = new Node(); | |
using (var document = DocX.Load(docxFilename)) | |
{ | |
var lastNode = root; | |
foreach(var p in document.Paragraphs) | |
{ | |
if(string.IsNullOrWhiteSpace(p.Text)) | |
{ | |
continue; | |
} | |
var currIndent = GetIndentLevel(p); | |
if(currIndent == BLOCK_TEXT) | |
{ | |
if(!string.IsNullOrEmpty(lastNode.BlockText)) | |
{ | |
lastNode.BlockText += "\n"; | |
} | |
lastNode.BlockText += p.Text; | |
} | |
else | |
{ | |
var parent = GetParent(currIndent, lastNode); | |
var node = new Node() | |
{ | |
Id = p.Xml.Attributes().FirstOrDefault(a => a.Name.LocalName == "paraId")?.Value, | |
Parent = parent, | |
Indent = currIndent, | |
Text = p.Text, | |
Type = GetNodeType(p), | |
StyleId = p.StyleId | |
}; | |
parent.Children.Add(node); | |
lastNode = node; | |
} | |
} | |
} | |
using(var writer = new StreamWriter(outputStream, Encoding.UTF8, 1024, true)) | |
{ | |
WriteNode(root, -1, writer); | |
} | |
} | |
private NodeType GetNodeType(Paragraph paragraph) | |
{ | |
if(paragraph.MagicText?.Any(t => t.formatting?.Highlight == Highlight.green) ?? false) | |
{ | |
return NodeType.Task; | |
} | |
if(paragraph.MagicText?.Any(t => t.formatting?.Highlight == Highlight.yellow) ?? false) | |
{ | |
return NodeType.Inspiration; | |
} | |
return NodeType.Note; | |
} | |
private Node GetParent(int currIndent, Node lastNode) | |
{ | |
var parent = lastNode; | |
while(currIndent <= parent.Indent) | |
{ | |
parent = parent.Parent; | |
} | |
return parent; | |
} | |
private int GetIndentLevel(Paragraph paragraph) | |
{ | |
if(paragraph.IndentationBefore > 0) | |
{ | |
return BLOCK_TEXT; | |
} | |
switch (paragraph.StyleId) | |
{ | |
case "Heading1": | |
return 1; | |
case "Heading2": | |
return 2; | |
case "Normal": | |
return 3; | |
case "ListParagraph": | |
return 4 + (paragraph.IndentLevel ?? 0); | |
default: | |
throw new Exception($"Unknown StyleId {paragraph.StyleId} at text {paragraph.Text}"); | |
} | |
} | |
private void WriteNode(Node node, int indentLevel, StreamWriter streamWriter) | |
{ | |
if(indentLevel > -1) | |
{ | |
var tabs = new string('\t', indentLevel); | |
var bullet = GetBullet(node.Type); | |
streamWriter.WriteLine($"{tabs}{bullet}{node.Text}"); | |
if(node.BlockText != null) | |
{ | |
tabs += '\t'; | |
foreach(var line in node.BlockText.Split('\n')) | |
{ | |
streamWriter.WriteLine($"{tabs}{line}"); | |
} | |
} | |
} | |
foreach(var child in node.Children) | |
{ | |
WriteNode(child, indentLevel + 1, streamWriter); | |
} | |
} | |
private string GetBullet(NodeType nodeType) | |
{ | |
switch (nodeType) | |
{ | |
case NodeType.Title: | |
return ""; | |
case NodeType.Note: | |
return "- "; | |
case NodeType.Task: | |
return "* "; | |
case NodeType.Event: | |
return "o "; | |
case NodeType.Inspiration: | |
return "! "; | |
default: | |
throw new ArgumentOutOfRangeException(nameof(nodeType), nodeType, null); | |
} | |
} | |
} | |
public class Node | |
{ | |
public string Id { get; set; } | |
public NodeType Type { get; set; } | |
public string Text { get; set; } | |
public string StyleId { get; set; } | |
public int Indent { get; set; } | |
public string BlockText { get; set; } | |
public Node Parent { get; set; } | |
public List<Node> Children { get; set; } = new List<Node>(); | |
} | |
public enum NodeType | |
{ | |
Title, | |
Note, | |
Task, | |
Event, | |
Inspiration | |
} | |
public class DocXTesting | |
{ | |
private readonly ITestOutputHelper _testOutputHelper; | |
public DocXTesting(ITestOutputHelper testOutputHelper) | |
{ | |
_testOutputHelper = testOutputHelper; | |
} | |
[Fact] | |
public void TransformDocument() | |
{ | |
var stream = new MemoryStream(); | |
var transformer = new DocxToJournalTransformer(); | |
transformer.Transform(@"C:\code\docx\All Notes.docx", stream); | |
stream.Position = 0; | |
using(var reader = new StreamReader(stream)) | |
{ | |
while(!reader.EndOfStream) | |
{ | |
_testOutputHelper.WriteLine(reader.ReadLine()); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment