Created
May 8, 2015 06:39
-
-
Save linxlunx/865766e713e5b5b467dc to your computer and use it in GitHub Desktop.
Detikcom Crawler With Golang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"log" | |
"io/ioutil" | |
"net/http" | |
"encoding/xml" | |
"sync" | |
"encoding/json" | |
) | |
type Con struct { | |
XMLName xml.Name `xml:"rss"` | |
Channel Channel `xml:"channel"` | |
} | |
type Channel struct { | |
XMLName xml.Name `xml:"channel"` | |
Items []Items `xml:"item"` | |
} | |
type Items struct { | |
Link string `xml:"link"` | |
} | |
type JSONdata struct { | |
Content struct { | |
Titles struct { | |
Title string | |
} | |
Url string | |
Resume string | |
Data string | |
} | |
} | |
var wg sync.WaitGroup | |
func get_data() []byte { | |
client := &http.Client{} | |
req, err := http.NewRequest("GET", "http://detik.feedsportal.com/c/33613/f/656082/index.rss", nil) | |
if err != nil { | |
log.Fatal(err) | |
} | |
resp, err := client.Do(req) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer resp.Body.Close() | |
body, err := ioutil.ReadAll(resp.Body) | |
if err != nil { | |
log.Fatal(err) | |
} | |
return body | |
} | |
func get_link(all_content []byte) *Con{ | |
c := &Con{} | |
err := xml.Unmarshal(all_content, &c) | |
if err != nil { | |
log.Fatal(err) | |
} | |
return c | |
} | |
func get_content(linker string) { | |
resp, err := http.Get(linker) | |
if err != nil { | |
log.Fatal(err) | |
} | |
finalUrl := resp.Request.URL.String() | |
tempUrl := "http://ipad.detik.com/search/new_search_content.php?format=json&compress=1&url=" | |
tempUrl += finalUrl | |
tempUrl += "&uuid=12345" | |
client := &http.Client{} | |
req, err := http.NewRequest("GET", tempUrl, nil) | |
if err != nil { | |
log.Fatal(err) | |
} | |
req.Header.Set("User-Agent", "detikcom/iPad (detikcom on iPad Apps)") | |
data_resp, err := client.Do(req) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer data_resp.Body.Close() | |
news_body, err := ioutil.ReadAll(data_resp.Body) | |
if err != nil { | |
log.Fatal(err) | |
} | |
j := &JSONdata{} | |
err = json.Unmarshal([]byte(news_body), &j) | |
if err != nil { | |
log.Fatal(err) | |
} | |
fmt.Printf("Title: %s\n", j.Content.Titles.Title) | |
fmt.Printf("Url: %s\n", j.Content.Url) | |
fmt.Printf("Data: %s\n", j.Content.Data) | |
fmt.Printf("----------------\n") | |
wg.Done() | |
} | |
func main() { | |
data := get_data() | |
link := get_link(data) | |
for _, post := range link.Channel.Items { | |
wg.Add(1) | |
go get_content(post.Link) | |
} | |
wg.Wait() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment