Skip to content

Instantly share code, notes, and snippets.

@PirosB3
Created November 28, 2014 19:27
Show Gist options
  • Save PirosB3/c438a46093487165decc to your computer and use it in GitHub Desktop.
Save PirosB3/c438a46093487165decc to your computer and use it in GitHub Desktop.
package main
import (
"strings"
"bytes"
"fmt"
"time"
"net/http"
"golang.org/x/net/html"
)
const (
START_WEBSITE = "http://google.com"
)
type Node struct {
Value int
}
type Queue struct {
nodes []*Node
size int
head int
tail int
count int
}
// Push adds a node to the queue.
func (q *Queue) Push(n *Node) {
if q.head == q.tail && q.count > 0 {
nodes := make([]*Node, len(q.nodes)+q.size)
copy(nodes, q.nodes[q.head:])
copy(nodes[len(q.nodes)-q.head:], q.nodes[:q.head])
q.head = 0
q.tail = len(q.nodes)
q.nodes = nodes
}
q.nodes[q.tail] = n
q.tail = (q.tail + 1) % len(q.nodes)
q.count++
}
// Pop removes and returns a node from the queue in first to last order.
func (q *Queue) Pop() *Node {
if q.count == 0 {
return nil
}
node := q.nodes[q.head]
q.head = (q.head + 1) % len(q.nodes)
q.count--
return node
}
func TryInserting(ch chan string, value string) (chan int) {
nc := make(chan int)
go func() {
ch <- value
nc <- 1
}()
return nc
}
func SpinupWorker(in chan(string), out chan(string), quit chan(bool)) {
anchor := []byte{'a'}
for {
select {
case site := <- in:
resp, err := http.Get(site)
if err == nil {
defer resp.Body.Close()
page := html.NewTokenizer(resp.Body)
for {
tt := page.Next()
if tt != html.ErrorToken {
tag, hasAttr := page.TagName()
if hasAttr && bytes.Equal(anchor, tag) {
_, url, _ := page.TagAttr()
str_url := string(url)
if strings.HasPrefix(str_url, "http://") {
switch {
case <- TryInserting(in, str_url):
fmt.Println(str_url)
case <- time.After(10 * time.Second):
fmt.Println("Queue full..")
}
}
}
} else {
break
}
}
}
case <- time.After(10 * time.Second):
quit <- true
}
}
}
func main() {
in := make(chan string, 10000)
out := make(chan string)
quit := make(chan bool)
for i:= 0; i < 20; i++ {
go SpinupWorker(in, out, quit)
}
in <- "http://webchick.net/node/99"
for i:= 0; i < 20; i++ {
<- quit
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment