Skip to content

Instantly share code, notes, and snippets.

@swt02026
Created May 5, 2017 07:04
Show Gist options
  • Save swt02026/24c992046d997816c117659609bde25e to your computer and use it in GitHub Desktop.
Save swt02026/24c992046d997816c117659609bde25e to your computer and use it in GitHub Desktop.
kktix_crawler
package main
import (
"fmt"
"strings"
"log"
"os"
"bufio"
"github.com/PuerkitoBio/goquery"
)
func getNewEventTotalPage(i *int) {
for *i = 1; ; *i++ {
getURL := fmt.Sprintf("https://kktix.com/events?page=%v", *i)
html, err := goquery.NewDocument(getURL)
if err != nil {
log.Fatal("error")
}
item := html.Find("li.clearfix h2 a")
if item.Length() < 1 {
return
}
}
}
func getNewEvent(totalPage int) <-chan *goquery.Selection {
out := make(chan *goquery.Selection)
for i := 1; i < totalPage; i++ {
getURL := fmt.Sprintf("https://kktix.com/events?page=%v", i)
html, err := goquery.NewDocument(getURL)
go func() {
if err != nil {
log.Fatal("error")
} else {
item := html.Find("li.clearfix h2 a")
if item.Length() >= 1 {
out <- item
}
}
}()
}
return out
}
func readEventFile(filename string) *map[string]string {
var m map[string]string
m = make(map[string]string)
f, _ := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0777)
defer f.Close()
for scanner := bufio.NewScanner(f); scanner.Scan(); {
data := scanner.Text()
dataArray := strings.Split(data, " ||| ")
href, title := dataArray[0], dataArray[1]
m[href] = title
}
return &m
}
func getOldEvent() *map[string]string {
return readEventFile("oldDatas.txt")
}
func getDiffEvent() *map[string]string {
return readEventFile("diff.txt")
}
type EventDataHandler struct {
oldDatas map[string]string
newDatas <-chan *goquery.Selection
diffDatas map[string]string
totalPage int
}
func (eventData *EventDataHandler) reflashEventInfo() {
eventData.totalPage = 0
getNewEventTotalPage(&eventData.totalPage)
eventData.newDatas = getNewEvent(eventData.totalPage)
eventData.oldDatas = *getOldEvent()
eventData.diffDatas = *getDiffEvent()
}
func (eventData *EventDataHandler) writeEventInfoDiffToFile() {
f, _ := os.OpenFile("oldDatas.txt", os.O_RDWR|os.O_TRUNC, 0777)
defer f.Close()
diff, _ := os.OpenFile("diff.txt", os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0777)
defer diff.Close()
for i := 1; i < eventData.totalPage; i++ {
(<-eventData.newDatas).Each(func(_ int, s *goquery.Selection) {
title := s.Text()
href, _ := s.Attr("href")
_, ok := eventData.oldDatas[href]
data := href + " ||| " + title
if !ok {
fmt.Fprintln(diff, data)
}
fmt.Fprintln(f, data)
})
}
}
func main() {
var e EventDataHandler
e.reflashEventInfo()
e.writeEventInfoDiffToFile()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment