Skip to content

Instantly share code, notes, and snippets.

@linxlunx
Created May 8, 2015 06:39
Show Gist options
  • Save linxlunx/865766e713e5b5b467dc to your computer and use it in GitHub Desktop.
Save linxlunx/865766e713e5b5b467dc to your computer and use it in GitHub Desktop.
Detikcom Crawler With Golang
package main
import (
"fmt"
"log"
"io/ioutil"
"net/http"
"encoding/xml"
"sync"
"encoding/json"
)
type Con struct {
XMLName xml.Name `xml:"rss"`
Channel Channel `xml:"channel"`
}
type Channel struct {
XMLName xml.Name `xml:"channel"`
Items []Items `xml:"item"`
}
type Items struct {
Link string `xml:"link"`
}
type JSONdata struct {
Content struct {
Titles struct {
Title string
}
Url string
Resume string
Data string
}
}
var wg sync.WaitGroup
func get_data() []byte {
client := &http.Client{}
req, err := http.NewRequest("GET", "http://detik.feedsportal.com/c/33613/f/656082/index.rss", nil)
if err != nil {
log.Fatal(err)
}
resp, err := client.Do(req)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Fatal(err)
}
return body
}
func get_link(all_content []byte) *Con{
c := &Con{}
err := xml.Unmarshal(all_content, &c)
if err != nil {
log.Fatal(err)
}
return c
}
func get_content(linker string) {
resp, err := http.Get(linker)
if err != nil {
log.Fatal(err)
}
finalUrl := resp.Request.URL.String()
tempUrl := "http://ipad.detik.com/search/new_search_content.php?format=json&compress=1&url="
tempUrl += finalUrl
tempUrl += "&uuid=12345"
client := &http.Client{}
req, err := http.NewRequest("GET", tempUrl, nil)
if err != nil {
log.Fatal(err)
}
req.Header.Set("User-Agent", "detikcom/iPad (detikcom on iPad Apps)")
data_resp, err := client.Do(req)
if err != nil {
log.Fatal(err)
}
defer data_resp.Body.Close()
news_body, err := ioutil.ReadAll(data_resp.Body)
if err != nil {
log.Fatal(err)
}
j := &JSONdata{}
err = json.Unmarshal([]byte(news_body), &j)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Title: %s\n", j.Content.Titles.Title)
fmt.Printf("Url: %s\n", j.Content.Url)
fmt.Printf("Data: %s\n", j.Content.Data)
fmt.Printf("----------------\n")
wg.Done()
}
func main() {
data := get_data()
link := get_link(data)
for _, post := range link.Channel.Items {
wg.Add(1)
go get_content(post.Link)
}
wg.Wait()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment