Created
May 5, 2017 07:04
-
-
Save swt02026/24c992046d997816c117659609bde25e to your computer and use it in GitHub Desktop.
kktix_crawler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"strings" | |
"log" | |
"os" | |
"bufio" | |
"github.com/PuerkitoBio/goquery" | |
) | |
func getNewEventTotalPage(i *int) { | |
for *i = 1; ; *i++ { | |
getURL := fmt.Sprintf("https://kktix.com/events?page=%v", *i) | |
html, err := goquery.NewDocument(getURL) | |
if err != nil { | |
log.Fatal("error") | |
} | |
item := html.Find("li.clearfix h2 a") | |
if item.Length() < 1 { | |
return | |
} | |
} | |
} | |
func getNewEvent(totalPage int) <-chan *goquery.Selection { | |
out := make(chan *goquery.Selection) | |
for i := 1; i < totalPage; i++ { | |
getURL := fmt.Sprintf("https://kktix.com/events?page=%v", i) | |
html, err := goquery.NewDocument(getURL) | |
go func() { | |
if err != nil { | |
log.Fatal("error") | |
} else { | |
item := html.Find("li.clearfix h2 a") | |
if item.Length() >= 1 { | |
out <- item | |
} | |
} | |
}() | |
} | |
return out | |
} | |
func readEventFile(filename string) *map[string]string { | |
var m map[string]string | |
m = make(map[string]string) | |
f, _ := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0777) | |
defer f.Close() | |
for scanner := bufio.NewScanner(f); scanner.Scan(); { | |
data := scanner.Text() | |
dataArray := strings.Split(data, " ||| ") | |
href, title := dataArray[0], dataArray[1] | |
m[href] = title | |
} | |
return &m | |
} | |
func getOldEvent() *map[string]string { | |
return readEventFile("oldDatas.txt") | |
} | |
func getDiffEvent() *map[string]string { | |
return readEventFile("diff.txt") | |
} | |
type EventDataHandler struct { | |
oldDatas map[string]string | |
newDatas <-chan *goquery.Selection | |
diffDatas map[string]string | |
totalPage int | |
} | |
func (eventData *EventDataHandler) reflashEventInfo() { | |
eventData.totalPage = 0 | |
getNewEventTotalPage(&eventData.totalPage) | |
eventData.newDatas = getNewEvent(eventData.totalPage) | |
eventData.oldDatas = *getOldEvent() | |
eventData.diffDatas = *getDiffEvent() | |
} | |
func (eventData *EventDataHandler) writeEventInfoDiffToFile() { | |
f, _ := os.OpenFile("oldDatas.txt", os.O_RDWR|os.O_TRUNC, 0777) | |
defer f.Close() | |
diff, _ := os.OpenFile("diff.txt", os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0777) | |
defer diff.Close() | |
for i := 1; i < eventData.totalPage; i++ { | |
(<-eventData.newDatas).Each(func(_ int, s *goquery.Selection) { | |
title := s.Text() | |
href, _ := s.Attr("href") | |
_, ok := eventData.oldDatas[href] | |
data := href + " ||| " + title | |
if !ok { | |
fmt.Fprintln(diff, data) | |
} | |
fmt.Fprintln(f, data) | |
}) | |
} | |
} | |
func main() { | |
var e EventDataHandler | |
e.reflashEventInfo() | |
e.writeEventInfoDiffToFile() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment