Last active
March 15, 2016 10:28
-
-
Save mmirolim/ae03a4ccd95672584b02 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"log" | |
"net/http" | |
"strings" | |
"sync/atomic" | |
"time" | |
"unicode/utf8" | |
"golang.org/x/net/html" | |
) | |
// proxy handler | |
func proxy(w http.ResponseWriter, r *http.Request) { | |
// get url to proxy | |
url := "http://habrahabr.ru" + r.URL.String() | |
log.Println("url to fetch", url) | |
// get page | |
res, err := http.Get(url) | |
if err != nil { | |
log.Println("err while fetching url", err) | |
return | |
} | |
doc, err := html.Parse(res.Body) | |
if err != nil { | |
log.Println("err while parsing html", err) | |
return | |
} | |
defer res.Body.Close() | |
var f func(*html.Node) | |
var nodeCounter int64 | |
// define recursive dom walker | |
f = func(n *html.Node) { | |
// we need TextNode it should not be script text and should be longer than 5 rune | |
if n.Type == html.TextNode && n.Parent.Data != "script" && utf8.RuneCountInString(strings.TrimSpace(n.Data)) > 5 { | |
atomic.AddInt64(&nodeCounter, 1) | |
// split text to words on spaces | |
words := strings.Fields(n.Data) | |
// range over words to find appropriate one | |
for k, w := range words { | |
// define Replacer, we do not commas or dottes | |
// counted as rune | |
r := strings.NewReplacer(".", "", ",", "", ":", "", "'", "", "`", "") | |
// get word mostly letter runes | |
word := r.Replace(w) | |
// count it | |
if utf8.RuneCountInString(word) == 6 { | |
// and replace it | |
words[k] = strings.Replace(w, word, word+"\u2122", 1) | |
} | |
} | |
// join and assign it back to Node Data | |
n.Data = strings.Join(words, " ") | |
} | |
// recursively range over all children in siblings | |
for c := n.FirstChild; c != nil; c = c.NextSibling { | |
f(c) | |
} | |
} | |
// walk all nodes | |
start := time.Now() | |
f(doc) | |
log.Println("html parse and modification time", time.Since(start)) | |
// write to our response (data received by Browser) | |
start = time.Now() | |
err = html.Render(w, doc) | |
if err != nil { | |
log.Println("err while rendering data", err) | |
return | |
} | |
log.Println("node counter", nodeCounter) | |
log.Println("modified dom render time", time.Since(start)) | |
} | |
// this will add measuring execution time of handlerFunc passed as h argument | |
func measureExec(h http.HandlerFunc) http.HandlerFunc { | |
// wrap handlerFunc to add some functionality | |
return func(w http.ResponseWriter, r *http.Request) { | |
// for measuring time we need start time and stop time | |
// to get duration of action | |
start := time.Now() | |
// call h which is HandlerFunc | |
h(w, r) | |
// print to console execution duration with time.Since(Start)) | |
log.Println("handler execution time =>", time.Since(start)) | |
} | |
} | |
func main() { | |
// register url and handlerFunc called for "/" url | |
http.HandleFunc("/", measureExec(proxy)) | |
// write that server starting in console | |
fmt.Println("Starting Server ...") | |
// start server and check for errors, maybe port is busy and it could | |
// not start | |
err := http.ListenAndServe("localhost:4000", nil) | |
// fatal (stop program) on errors | |
if err != nil { | |
// fatal and log error | |
log.Fatal("Server could not started", err) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment