Created
January 29, 2018 14:03
-
-
Save kisPocok/45367b38e5c614b302e57e288093910d to your computer and use it in GitHub Desktop.
go build . && ./grabber -q "nintendo switch" -top "mario,sonic" -price 60000
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"crypto/tls" | |
"flag" | |
"fmt" | |
"io/ioutil" | |
"log" | |
"net/http" | |
"regexp" | |
"strconv" | |
"strings" | |
"github.com/PuerkitoBio/goquery" | |
"github.com/mmcdole/gofeed" | |
) | |
const highPriceLimit = 60000 | |
const teszveszfeed = "http://www.teszvesz.hu/listings/index.php?ob=16&obd=2&q=%s&rss=1" | |
const jofogasfeed = "https://www.jofogas.hu/budapest?q=%s&o=%d" // keresés a leírásban is: `pf=b` | |
const vaterafeed = "http://www.vatera.hu/rss/?ob=16&obd=2&q=%s" | |
const hardverapro = "https://hardverapro.hu/aprok/szoftver_jatek/%s/?rss=true" | |
func main() { | |
q, price, top := flagship() | |
fmt.Println("Keresendő kifejezés:", q) | |
// Crawling | |
prodList := NewProductList(q, price) | |
prodList = fetchHardverapro(prodList) | |
prodList = fetchVatera(prodList) | |
prodList = fetchTeszvesz(prodList) | |
prodList = fetchJofogas(prodList) | |
// High price products first | |
if price > 0 { | |
fmt.Println("\nMagasabb aru termekek:") | |
prodList.filterConsole().show() | |
} | |
// Highlights | |
if top != "" { | |
prodList.highlights(strings.Split(top, ",")) | |
} | |
// All of | |
fmt.Println("\nMinden:") | |
prodList.show() | |
} | |
type item struct { | |
title string | |
info string | |
price int | |
link string | |
isExpensive bool | |
source string | |
} | |
type Products struct { | |
list []item | |
query string | |
price int | |
} | |
func (p Products) filterConsole() (filteredList Products) { | |
for _, prod := range p.list { | |
if prod.isExpensive { | |
filteredList.list = append(filteredList.list, prod) | |
} | |
} | |
return | |
} | |
func (p Products) filter(fn func(p item) bool) (filteredList Products) { | |
for _, prod := range p.list { | |
if fn(prod) { | |
filteredList.list = append(filteredList.list, prod) | |
} | |
} | |
return | |
} | |
func (p Products) highlights(highlights []string) { | |
for _, s := range highlights { | |
filtered := p.filter(findText(s)) | |
c := len(filtered.list) | |
if c == 0 { | |
continue | |
} | |
fmt.Printf("\n%s (%d):\n", s, c) | |
filtered.show() | |
} | |
} | |
func (p Products) show() { | |
for _, prod := range p.list { | |
fmt.Printf("- %s, %d ft %s\n", prod.title, prod.price, prod.link) | |
} | |
} | |
func NewProductList(q string, price int) Products { | |
return Products{ | |
query: q, | |
price: price, | |
list: make([]item, 0), | |
} | |
} | |
func flagship() (string, int, string) { | |
q := flag.String("q", "nintendo switch", "search query") | |
price := flag.Int("price", highPriceLimit, "set the price of the expensive products") | |
top := flag.String("top", "zelda,mario,fallout", "highlighted words, separated with comma") | |
flag.Parse() | |
return *q, *price, *top | |
} | |
func fetchHardverapro(prodList Products) Products { | |
q := strings.Replace(prodList.query, " ", "_", -1) | |
url := fmt.Sprintf(hardverapro, q) | |
tr := &http.Transport{ | |
TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, | |
} | |
client := &http.Client{Transport: tr} | |
request, _ := http.NewRequest("GET", url, nil) | |
request.Header.Set("Accept", "*/*") | |
rss, _ := client.Do(request) | |
body, _ := ioutil.ReadAll(rss.Body) | |
fp := gofeed.NewParser() | |
feed, _ := fp.ParseString(string(body)) | |
fmt.Printf("Hardverapro találatok száma: %d\n", len(feed.Items)) | |
for _, v := range feed.Items { | |
price := parseTeszveszPrice(v.Description) | |
prodList.list = append(prodList.list, item{ | |
title: parseTeszveszTitle(v.Title), | |
info: v.Description, | |
link: v.Link, | |
price: price, | |
isExpensive: isExpensive(price), | |
source: "hardverapro", | |
}) | |
} | |
return prodList | |
} | |
func fetchJofogas(prodList Products) Products { | |
var page = 1 | |
for { | |
q := strings.Replace(prodList.query, " ", "%20", -1) | |
url := fmt.Sprintf(jofogasfeed, q, page) | |
// TODO response header formázása szar | |
doc, err := goquery.NewDocument(url) | |
if err != nil { | |
log.Fatal(err) | |
} | |
items := doc.Find(".reListElement") | |
fmt.Printf("Jofogas találatok száma: %d\n", len(items.Nodes)) | |
if len(items.Nodes) < 1 { | |
break | |
} | |
items.Each(func(i int, s *goquery.Selection) { | |
t := s.Find("a.subject") | |
link, exists := t.Attr("href") | |
if !exists { | |
return | |
} | |
title := t.Text() | |
priceTag := s.Find("div.priceBox").Text() | |
price := parseJofogasPrice(priceTag) | |
var prod = item{ | |
title: title, | |
info: title, | |
link: link, | |
price: price, | |
isExpensive: isExpensive(price), | |
source: "jofogas", | |
} | |
prodList.list = append(prodList.list, prod) | |
}) | |
page++ | |
if page > 5 { | |
break | |
} | |
} | |
return prodList | |
} | |
func fetchVatera(prodList Products) Products { | |
q := strings.Replace(prodList.query, " ", "+", -1) | |
url := fmt.Sprintf(vaterafeed, q) | |
fp := gofeed.NewParser() | |
feed, _ := fp.ParseURL(url) | |
fmt.Printf("Vatera találatok száma: %d\n", len(feed.Items)) | |
for _, v := range feed.Items { | |
price := parseTeszveszPrice(v.Description) | |
prodList.list = append(prodList.list, item{ | |
title: parseTeszveszTitle(v.Title), | |
info: v.Description, | |
link: v.Link, | |
price: price, | |
isExpensive: isExpensive(price), | |
source: "vatera", | |
}) | |
} | |
return prodList | |
} | |
func fetchTeszvesz(prodList Products) Products { | |
q := strings.Replace(prodList.query, " ", "+", -1) | |
url := fmt.Sprintf(teszveszfeed, q) | |
fp := gofeed.NewParser() | |
feed, _ := fp.ParseURL(url) | |
fmt.Printf("Teszvesz találatok száma: %d\n", len(feed.Items)) | |
/* | |
// TODO kiszedtem, mert vatera lefedi | |
for _, v := range feed.Items { | |
price := parseTeszveszPrice(v.Description) | |
prodList.list = append(prodList.list, item{ | |
title: parseTeszveszTitle(v.Title), | |
info: v.Description, | |
link: v.Link, | |
price: price, | |
isExpensive: isExpensive(price), | |
source: "teszvesz", | |
}) | |
} | |
*/ | |
return prodList | |
} | |
func findText(s string) func(item) bool { | |
return func(p item) bool { | |
return exists(strings.ToLower(s), p.info) | |
} | |
} | |
func exists(needle, s string) bool { | |
re := regexp.MustCompile(needle) | |
s = strings.ToLower(s) | |
found := re.FindAllString(s, 1) | |
if len(found) == 0 || len(found[0]) < 2 { | |
return false | |
} | |
return true | |
} | |
func parseTeszveszTitle(s string) string { | |
re := regexp.MustCompile("(.+)]]>") | |
found := re.FindAllStringSubmatch(s, 1) | |
if len(found) == 0 || len(found[0]) < 2 { | |
return s | |
} | |
return found[0][1] | |
} | |
func parseTeszveszPrice(s string) (p int) { | |
re := regexp.MustCompile("Jelenlegi ára: (.+) Ft<br />") | |
found := re.FindAllStringSubmatch(s, 1) | |
if len(found) == 0 || len(found[0]) < 2 { | |
return 0 | |
} | |
price := strings.Replace(found[0][1], " ", "", -1) | |
p, err := strconv.Atoi(price) | |
if err != nil { | |
p = 0 | |
} | |
return | |
} | |
func parseJofogasPrice(s string) (p int) { | |
re := regexp.MustCompile(" (.+) Ft ") | |
found := re.FindAllStringSubmatch(s, 1) | |
if len(found) == 0 || len(found[0]) < 2 { | |
return 0 | |
} | |
price := strings.Replace(found[0][1], " ", "", -1) | |
p, err := strconv.Atoi(price) | |
if err != nil { | |
p = 0 | |
} | |
return | |
} | |
func isExpensive(price int) bool { | |
return price > highPriceLimit | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment