Skip to content

Instantly share code, notes, and snippets.

@mmirolim
Last active March 15, 2016 10:28
Show Gist options
  • Save mmirolim/ae03a4ccd95672584b02 to your computer and use it in GitHub Desktop.
Save mmirolim/ae03a4ccd95672584b02 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"log"
"net/http"
"strings"
"sync/atomic"
"time"
"unicode/utf8"
"golang.org/x/net/html"
)
// proxy handler
func proxy(w http.ResponseWriter, r *http.Request) {
// get url to proxy
url := "http://habrahabr.ru" + r.URL.String()
log.Println("url to fetch", url)
// get page
res, err := http.Get(url)
if err != nil {
log.Println("err while fetching url", err)
return
}
doc, err := html.Parse(res.Body)
if err != nil {
log.Println("err while parsing html", err)
return
}
defer res.Body.Close()
var f func(*html.Node)
var nodeCounter int64
// define recursive dom walker
f = func(n *html.Node) {
// we need TextNode it should not be script text and should be longer than 5 rune
if n.Type == html.TextNode && n.Parent.Data != "script" && utf8.RuneCountInString(strings.TrimSpace(n.Data)) > 5 {
atomic.AddInt64(&nodeCounter, 1)
// split text to words on spaces
words := strings.Fields(n.Data)
// range over words to find appropriate one
for k, w := range words {
// define Replacer, we do not commas or dottes
// counted as rune
r := strings.NewReplacer(".", "", ",", "", ":", "", "'", "", "`", "")
// get word mostly letter runes
word := r.Replace(w)
// count it
if utf8.RuneCountInString(word) == 6 {
// and replace it
words[k] = strings.Replace(w, word, word+"\u2122", 1)
}
}
// join and assign it back to Node Data
n.Data = strings.Join(words, " ")
}
// recursively range over all children in siblings
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
// walk all nodes
start := time.Now()
f(doc)
log.Println("html parse and modification time", time.Since(start))
// write to our response (data received by Browser)
start = time.Now()
err = html.Render(w, doc)
if err != nil {
log.Println("err while rendering data", err)
return
}
log.Println("node counter", nodeCounter)
log.Println("modified dom render time", time.Since(start))
}
// this will add measuring execution time of handlerFunc passed as h argument
func measureExec(h http.HandlerFunc) http.HandlerFunc {
// wrap handlerFunc to add some functionality
return func(w http.ResponseWriter, r *http.Request) {
// for measuring time we need start time and stop time
// to get duration of action
start := time.Now()
// call h which is HandlerFunc
h(w, r)
// print to console execution duration with time.Since(Start))
log.Println("handler execution time =>", time.Since(start))
}
}
func main() {
// register url and handlerFunc called for "/" url
http.HandleFunc("/", measureExec(proxy))
// write that server starting in console
fmt.Println("Starting Server ...")
// start server and check for errors, maybe port is busy and it could
// not start
err := http.ListenAndServe("localhost:4000", nil)
// fatal (stop program) on errors
if err != nil {
// fatal and log error
log.Fatal("Server could not started", err)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment