Created
November 16, 2014 23:21
-
-
Save iskeld/2ac47becea86fdeba87d to your computer and use it in GitHub Desktop.
Initial works on a simple Ms Word templating engine, using Office Open XML SDK
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System; | |
open DocumentFormat.OpenXml; | |
open DocumentFormat.OpenXml.Packaging; | |
open DocumentFormat.OpenXml.Wordprocessing; | |
[<Literal>] | |
let openingBracket = "<#" | |
[<Literal>] | |
let closingBracket = "#>" | |
let indexOf (str:string) (value:string) startIndex = | |
match str.IndexOf(value, startIndex, StringComparison.Ordinal) with | |
| index when index >= 0 -> Some index | |
| _ -> None | |
type BracketMatch = | |
| Opening of index : int | |
| Full of openingIndex : int * closingIndex : int | |
type ExpressionLocation = | |
{ | |
openingBracketIndex: int; | |
closingBracketIndex: int; | |
openingNode: Text; | |
closingNode: Text | |
} | |
type PartialExpression = { node: Text; index: int } | |
type FoundExpressions = { complete: ExpressionLocation list; opened: PartialExpression option } | |
type System.Text.StringBuilder with | |
member this.AppendMany(items: string list) = | |
items |> List.iter (fun str -> this.Append(str) |> ignore) | |
this | |
let fetchUntilNull fetcher entity = | |
let rec fetchRec current acc = | |
match fetcher current with | |
| null -> acc | |
| item -> item::acc |> fetchRec item | |
fetchRec entity [] |> List.rev | |
let allFollowingSiblings<'T when 'T :> OpenXmlElement and 'T : null> node = fetchUntilNull (fun (n:'T) -> n.NextSibling<'T>()) node | |
let allPreviousSiblings<'T when 'T :> OpenXmlElement and 'T : null> node = fetchUntilNull (fun (n:'T) -> n.PreviousSibling<'T>()) node | |
let rec followingSiblingsUntil<'T when 'T :> OpenXmlElement and 'T : null> (endNode:'T) (currentNode:'T) = | |
let rec innerFetcher (currentNode:'T) acc = | |
match currentNode.NextSibling<'T>() with | |
| null -> failwith "nodes are not siblings" | |
| sibling when Object.ReferenceEquals(sibling, endNode) -> acc | |
| sibling -> innerFetcher sibling (sibling::acc) | |
innerFetcher currentNode [] |> List.rev | |
let getTextNodesBetween (node1:Text) node2 = | |
match node1, node2 with | |
| (n1, n2) when n1 = n2 -> [] // text node level | |
| (n1, n2) when n1.Parent = n2.Parent -> followingSiblingsUntil n2 n1 |> List.rev // run level | |
| (n1, n2) when n1.Parent.Parent = n2.Parent.Parent -> // run's parent level | |
let cousins = followingSiblingsUntil n2.Parent n1.Parent |> List.collect (fun node -> node.Descendants<Text>() |> List.ofSeq) | |
(allFollowingSiblings n1) @ cousins @ (allPreviousSiblings n2) | |
| _ -> failwith "nodes have no common ancestor" | |
let getExpressionText exprLocation = | |
let { openingBracketIndex = startIndex; closingBracketIndex = endIndex; openingNode = startNode; closingNode = endNode } = exprLocation | |
let startNodeText = startNode.Text | |
if startNode = endNode then startNodeText.Substring(startIndex + openingBracket.Length, endIndex - startIndex - openingBracket.Length) | |
else | |
let builder = new System.Text.StringBuilder() | |
builder.Append(startNodeText.Substring(startIndex + openingBracket.Length)) | |
.AppendMany(getTextNodesBetween startNode endNode |> List.map (fun txtNode -> txtNode.Text)) | |
.Append(endNode.Text.Substring(0, endIndex)) |> ignore | |
builder.ToString() | |
let simpleMatcher startIndex (node:Text) = | |
let text = node.Text | |
let matchExpressionInString startIndex = | |
match indexOf text openingBracket startIndex with | |
| Some openingIndex -> | |
match indexOf text closingBracket (openingIndex + closingBracket.Length) with | |
| Some closingIndex -> Some (Full (openingIndex, closingIndex)) | |
| None -> Some (Opening openingIndex) | |
| None -> None | |
let toExpressionLocation a b = { openingBracketIndex = a; closingBracketIndex = b; openingNode = node; closingNode = node } | |
let rec matchMany index acc = | |
match matchExpressionInString index with | |
| Some matchType -> | |
let accList = fst acc; | |
match matchType with | |
| Opening index -> accList, Some index | |
| Full (opening, closing) -> ((toExpressionLocation opening closing)::accList, None) |> matchMany (closing + closingBracket.Length) | |
| None -> acc | |
match matchMany startIndex ([], None) with | |
| (list, index) -> { complete = list; opened = Option.map (fun i -> { node = node; index = i}) index } | |
let completingMatcher (partial:PartialExpression) (currentNode:Text) = | |
let matchClosing (startNode:Text) (currentNode:Text) = | |
let haveSameAncestor (node1:Text) node2 = | |
node1 = node2 // text node | |
|| node1.Parent = node2.Parent // run node | |
|| node1.Parent.Parent = node2.Parent.Parent // run's parent node | |
match indexOf currentNode.Text closingBracket 0 with | |
| Some index -> if haveSameAncestor startNode currentNode then Some index else None | |
| None -> None | |
match matchClosing partial.node currentNode with | |
| Some closingIndex -> | |
let completedExpression = | |
{ | |
openingBracketIndex = partial.index; openingNode = partial.node; | |
closingBracketIndex = closingIndex; closingNode = currentNode | |
} | |
let remainingExpressions = simpleMatcher (closingIndex + closingBracket.Length) currentNode | |
{ complete = completedExpression::remainingExpressions.complete; opened = remainingExpressions.opened } | |
| None -> { complete = []; opened = Some partial} | |
let matchExpressions (textNodes: Text list) = | |
let rec matchRec matcher (elements: Text list) acc = | |
match elements with | |
| [] -> acc | |
| node::rest -> | |
let { complete = completeExpressions; opened = openedExpression } = matcher node | |
match openedExpression with | |
| Some opened -> matchRec (completingMatcher opened) rest completeExpressions@acc | |
| None -> matchRec (simpleMatcher 0) rest completeExpressions@acc | |
matchRec (simpleMatcher 0) textNodes [] | |
let doJob () = | |
use wordDoc = WordprocessingDocument.Open(@"G:\Var\kwiczolek.docx", true) | |
let textElements = wordDoc.MainDocumentPart.Document.Body.Descendants<Text>() |> List.ofSeq | |
let matchedExpressions = matchExpressions textElements | |
let puup = matchedExpressions |> List.map (fun expr -> getExpressionText expr) | |
() | |
[<EntryPoint>] | |
let main argv = | |
doJob() | |
printfn "%A" argv | |
0 // return an integer exit code |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment