Last active
November 9, 2023 01:29
-
-
Save niski84/d0a9c61b44c10b11540aee15a6fbdc5e to your computer and use it in GitHub Desktop.
custom html parsing for health check report
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"fmt" | |
"golang.org/x/net/html" | |
"strings" | |
) | |
// parseHTMLData finds the `tr` containing a `td` with exact text match for serviceName, | |
// then extracts the health status, lines between, and timestamp from the `tr`. | |
func parseHTMLData(htmlContent, serviceName string) (healthStatus string, linesBetween []string, ts string, err error) { | |
doc, err := html.Parse(strings.NewReader(htmlContent)) | |
if err != nil { | |
return "", nil, "", err | |
} | |
// Find the tr element that contains the td with the service name | |
trNode := findTRNode(doc, serviceName) | |
if trNode == nil { | |
return "", nil, "", fmt.Errorf("service name '%s' not found", serviceName) | |
} | |
// Extract the health status, linesBetween, and timestamp | |
healthStatus = extractHealthStatus(trNode) | |
linesBetween = extractLinesBetween(trNode) | |
ts = extractTimestamp(trNode) | |
if healthStatus == "" { | |
err = fmt.Errorf("health status not found") | |
} | |
if ts == "" { | |
err = fmt.Errorf("%v, timestamp not found", err) | |
} | |
return healthStatus, linesBetween, ts, err | |
} | |
// findTRNode traverses the HTML node tree and returns the `tr` node that contains the specified service name. | |
func findTRNode(n *html.Node, serviceName string) *html.Node { | |
if n.Type == html.ElementNode && n.Data == "td" && getTextFromNode(n) == serviceName { | |
return n.Parent | |
} | |
for c := n.FirstChild; c != nil; c = c.NextSibling { | |
if trNode := findTRNode(c, serviceName); trNode != nil { | |
return trNode | |
} | |
} | |
return nil | |
} | |
// extractHealthStatus extracts the health status from the first td element in the tr. | |
func extractHealthStatus(trNode *html.Node) string { | |
for c := trNode.FirstChild; c != nil; c = c.NextSibling { | |
if c.Type == html.ElementNode && c.Data == "td" { | |
return getTextFromNode(c) | |
} | |
} | |
return "" | |
} | |
<div class="line-between> | |
// extractTimestamp searches for the td element with class "ts" and returns its text content. | |
func extractTimestamp(trNode *html.Node) string { | |
for c := trNode.FirstChild; c != nil; c = c.NextSibling { | |
if c.Type == html.ElementNode && c.Data == "td" { | |
for _, a := range c.Attr { | |
if a.Key == "class" && a.Val == "ts" { | |
return getTextFromNode(c) | |
} | |
} | |
} | |
} | |
return "" | |
} | |
// getTextFromNode extracts and returns the concatenated text content of a node. | |
func getTextFromNode(n *html.Node) string { | |
var buf bytes.Buffer | |
var f func(*html.Node) | |
f = func(n *html.Node) { | |
if n.Type == html.TextNode { | |
buf.WriteString(n.Data) | |
} | |
for c := n.FirstChild; c != nil; c = c.NextSibling { | |
f(c) | |
} | |
} | |
f(n) | |
return strings.TrimSpace(buf.String()) | |
} | |
// extractLinesBetween finds all div elements with class "line-between" within the tr node and returns their text content. | |
func extractLinesBetween(trNode *html.Node) []string { | |
var lines []string | |
// Traverse the tr node to find the td element that contains div with class "line-between". | |
for c := trNode.FirstChild; c != nil; c = c.NextSibling { | |
if c.Type == html.ElementNode && c.Data == "td" { | |
// Now look for div elements with the specific class inside the td. | |
for div := c.FirstChild; div != nil; div = div.NextSibling { | |
if div.Type == html.ElementNode && div.Data == "div" { | |
for _, a := range div.Attr { | |
if a.Key == "class" && a.Val == "line-between" { | |
lines = append(lines, getTextFromNode(div)) | |
} | |
} | |
} | |
} | |
} | |
} | |
return lines | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment