Created
May 7, 2020 14:11
-
-
Save zgordan-vv/487d6d5143df61d120d71914cccbbfd0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"strconv" | |
"github.com/unidoc/unioffice/document" | |
"github.com/unidoc/unioffice/schema/soo/wml" | |
"github.com/unidoc/unioffice/schema/soo/ofc/sharedTypes" | |
) | |
type KeyValuePair struct { | |
Key []Paragraph | |
Value []Paragraph | |
} | |
type Paragraph struct { | |
IsListItem bool | |
Content []Span | |
Styles StylesT | |
} | |
type Span struct { | |
Text string | |
Styles StylesT | |
} | |
type StylesT struct { | |
RStyle string | |
Font string | |
EastAsiaFont string | |
HexColor string | |
Spacing string | |
FontSize string | |
FontSizeComplex string | |
UnderlineType wml.ST_Underline | |
UnderlineColor string | |
Bold bool | |
Italic bool | |
Caps bool | |
Strike bool | |
DoubleStrike bool | |
Outline bool | |
Shadow bool | |
Emboss bool | |
RightToLeft bool | |
VerticalAlign sharedTypes.ST_VerticalAlignRun | |
} | |
func main() { | |
doc, err := document.Open("tables.docx") | |
if err != nil { | |
panic(err) | |
} | |
extracted := ExtractFromDocTables(doc) | |
fmt.Println(extracted) | |
} | |
func ExtractFromDocTables(doc *document.Document) []KeyValuePair { | |
result := []KeyValuePair{} | |
for _, tbl := range doc.Tables() { | |
for _, crc := range tbl.X().EG_ContentRowContent { | |
for _, tr := range crc.Tr { | |
ccc := tr.EG_ContentCellContent | |
keyPars := extractFromCell(ccc[0]) | |
valuePars := extractFromCell(ccc[1]) | |
result = append(result, KeyValuePair{ | |
Key: keyPars, | |
Value: valuePars, | |
}) | |
} | |
} | |
} | |
return result | |
} | |
func extractFromCell(ccc *wml.EG_ContentCellContent) []Paragraph { | |
paragraphs := []Paragraph{} | |
for _, ble := range ccc.Tc[0].EG_BlockLevelElts { | |
for _, cbc := range ble.EG_ContentBlockContent { | |
for _, sourcePar := range cbc.P { | |
paragraph := Paragraph{ | |
Content: getContent(sourcePar.EG_PContent), | |
Styles: pPr2Styles(sourcePar.PPr), | |
IsListItem: sourcePar.PPr.NumPr != nil, | |
} | |
paragraphs = append(paragraphs, paragraph) | |
} | |
} | |
} | |
return paragraphs | |
} | |
func getContent(pcs []*wml.EG_PContent) []Span { | |
content := []Span{} | |
for _, pc := range pcs { | |
for _, crc := range pc.EG_ContentRunContent { | |
r := crc.R | |
rStyles := rPr2Styles(r.RPr) | |
text := "" | |
for _, ic := range r.EG_RunInnerContent { | |
text += ic.T.Content | |
} | |
content = append(content, Span{ | |
Text: text, | |
Styles: rStyles, | |
}) | |
} | |
} | |
return content | |
} | |
func pPr2Styles(ppr *wml.CT_PPr) StylesT { | |
pr := ppr.RPr | |
styles := StylesT{ | |
Bold: getBool(pr.B) || getBool(pr.BCs), | |
Italic: getBool(pr.I) || getBool(pr.ICs), | |
Caps: getBool(pr.Caps), | |
Strike: getBool(pr.Strike), | |
DoubleStrike: getBool(pr.Dstrike), | |
Outline: getBool(pr.Outline), | |
Shadow: getBool(pr.Shadow), | |
Emboss: getBool(pr.Emboss), | |
RightToLeft: getBool(pr.Rtl), | |
} | |
if pr.RStyle != nil { | |
styles.RStyle = pr.RStyle.ValAttr | |
} | |
if fonts := pr.RFonts; fonts != nil { | |
font := "" | |
if fonts.AsciiAttr != nil { | |
font = *fonts.AsciiAttr | |
} else if fonts.HAnsiAttr != nil { | |
font = *fonts.HAnsiAttr | |
} else if fonts.CsAttr != nil { | |
font = *fonts.CsAttr | |
} | |
styles.Font = font | |
if fonts.EastAsiaAttr != nil { | |
styles.EastAsiaFont = *fonts.EastAsiaAttr | |
} | |
} | |
if color := pr.Color; color != nil { | |
valAttr := color.ValAttr | |
if valAttr.ST_HexColorRGB != nil { | |
styles.HexColor = *valAttr.ST_HexColorRGB | |
} | |
} | |
if spacing := pr.Spacing; spacing != nil { | |
spacingResult := "" | |
valAttr := spacing.ValAttr | |
if valAttr.Int64 != nil { | |
spacingResult += strconv.FormatInt(*valAttr.Int64, 10) | |
} | |
if valAttr.ST_UniversalMeasure != nil { | |
spacingResult += *valAttr.ST_UniversalMeasure | |
} | |
styles.Spacing = spacingResult | |
} | |
if sz := pr.Sz; sz != nil { | |
size := "" | |
valAttr := sz.ValAttr | |
if valAttr.ST_UnsignedDecimalNumber != nil { | |
size += strconv.FormatUint(*valAttr.ST_UnsignedDecimalNumber, 10) | |
} | |
if valAttr.ST_PositiveUniversalMeasure != nil { | |
size += *valAttr.ST_PositiveUniversalMeasure | |
} | |
styles.FontSize = size | |
} | |
if sz := pr.SzCs; sz != nil { | |
size := "" | |
valAttr := sz.ValAttr | |
if valAttr.ST_UnsignedDecimalNumber != nil { | |
size += strconv.FormatUint(*valAttr.ST_UnsignedDecimalNumber, 10) | |
} | |
if valAttr.ST_PositiveUniversalMeasure != nil { | |
size += *valAttr.ST_PositiveUniversalMeasure | |
} | |
styles.FontSizeComplex = size | |
} | |
if underline := pr.U; underline != nil { | |
styles.UnderlineType = underline.ValAttr | |
color := underline.ColorAttr | |
if color.ST_HexColorRGB != nil { | |
styles.UnderlineColor = *color.ST_HexColorRGB | |
} | |
} | |
if vertAlign := pr.VertAlign; vertAlign != nil { | |
styles.VerticalAlign = vertAlign.ValAttr | |
} | |
return styles | |
} | |
func rPr2Styles(pr *wml.CT_RPr) StylesT { | |
styles := StylesT{ | |
Bold: getBool(pr.B) || getBool(pr.BCs), | |
Italic: getBool(pr.I) || getBool(pr.ICs), | |
Caps: getBool(pr.Caps), | |
Strike: getBool(pr.Strike), | |
DoubleStrike: getBool(pr.Dstrike), | |
Outline: getBool(pr.Outline), | |
Shadow: getBool(pr.Shadow), | |
Emboss: getBool(pr.Emboss), | |
RightToLeft: getBool(pr.Rtl), | |
} | |
if pr.RStyle != nil { | |
styles.RStyle = pr.RStyle.ValAttr | |
} | |
if fonts := pr.RFonts; fonts != nil { | |
font := "" | |
if fonts.AsciiAttr != nil { | |
font = *fonts.AsciiAttr | |
} else if fonts.HAnsiAttr != nil { | |
font = *fonts.HAnsiAttr | |
} else if fonts.CsAttr != nil { | |
font = *fonts.CsAttr | |
} | |
styles.Font = font | |
if fonts.EastAsiaAttr != nil { | |
styles.EastAsiaFont = *fonts.EastAsiaAttr | |
} | |
} | |
if color := pr.Color; color != nil { | |
valAttr := color.ValAttr | |
if valAttr.ST_HexColorRGB != nil { | |
styles.HexColor = *valAttr.ST_HexColorRGB | |
} | |
} | |
if spacing := pr.Spacing; spacing != nil { | |
spacingResult := "" | |
valAttr := spacing.ValAttr | |
if valAttr.Int64 != nil { | |
spacingResult += strconv.FormatInt(*valAttr.Int64, 10) | |
} | |
if valAttr.ST_UniversalMeasure != nil { | |
spacingResult += *valAttr.ST_UniversalMeasure | |
} | |
styles.Spacing = spacingResult | |
} | |
if sz := pr.Sz; sz != nil { | |
size := "" | |
valAttr := sz.ValAttr | |
if valAttr.ST_UnsignedDecimalNumber != nil { | |
size += strconv.FormatUint(*valAttr.ST_UnsignedDecimalNumber, 10) | |
} | |
if valAttr.ST_PositiveUniversalMeasure != nil { | |
size += *valAttr.ST_PositiveUniversalMeasure | |
} | |
styles.FontSize = size | |
} | |
if sz := pr.SzCs; sz != nil { | |
size := "" | |
valAttr := sz.ValAttr | |
if valAttr.ST_UnsignedDecimalNumber != nil { | |
size += strconv.FormatUint(*valAttr.ST_UnsignedDecimalNumber, 10) | |
} | |
if valAttr.ST_PositiveUniversalMeasure != nil { | |
size += *valAttr.ST_PositiveUniversalMeasure | |
} | |
styles.FontSizeComplex = size | |
} | |
if underline := pr.U; underline != nil { | |
styles.UnderlineType = underline.ValAttr | |
color := underline.ColorAttr | |
if color.ST_HexColorRGB != nil { | |
styles.UnderlineColor = *color.ST_HexColorRGB | |
} | |
} | |
if vertAlign := pr.VertAlign; vertAlign != nil { | |
styles.VerticalAlign = vertAlign.ValAttr | |
} | |
return styles | |
} | |
func getBool(onOff *wml.CT_OnOff) bool { | |
return onOff != nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment