Skip to content

Instantly share code, notes, and snippets.

@jbowles
Created July 27, 2013 15:50
Show Gist options
  • Save jbowles/6095234 to your computer and use it in GitHub Desktop.
Save jbowles/6095234 to your computer and use it in GitHub Desktop.
simple web crawler using simple_transport
package main
import (
"fmt"
"net/http"
"time"
"io/ioutil"
"github.com/pkulak/simpletransport/simpletransport"
)
var lotso_urls = []string{
"http://golang.org/",
"https://en.wikipedia.org/wiki/Natural_language_processing",
"http://golang.org/doc/faq#What_is_the_purpose_of_the_project",
"https://github.com/yarlett/corpustools",
"https://github.com/angeloskath/nlp-maxent-optimizer",
"https://code.google.com/p/mlgo/",
"http://en.wikipedia.org/wiki/Howdy_Doody",
"https://news.ycombinator.com/news",
"http://rubydoc.info/stdlib",
"http://www.geeksforgeeks.org/",
"https://github.com/",
"http://www.regexper.com/",
"http://www.letour.fr/le-tour/2013/us/",
"http://www.codeschool.com/courses/real-time-web-with-nodejs",
"http://balderdashy.github.io/sails/#npm-install",
"http://projecteuler.net/about",
}
type Crawler struct {
read_timeout time.Duration
req_timeout time.Duration
uris []string
}
func AsyncGet(c *Crawler) {
client := &http.Client{
Transport : &simpletransport.SimpleTransport{
ReadTimeout: c.read_timeout * time.Second,
RequestTimeout: c.req_timeout * time.Second,
},
}
for _, url := range c.uris {
resp, _ := client.Get(url)
body, _ := ioutil.ReadAll(resp.Body)
resp.Body.Close()
fmt.Println(string(body))
}
}
func main() {
//web_uris := []string{"htt","",""}
crawl := Crawler{10,10,lotso_urls}
AsyncGet(&crawl)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment