Last active
November 26, 2017 14:11
-
-
Save hjhee/41d6c5579adeac6aa9bed8e74618f6ed to your computer and use it in GitHub Desktop.
monitor discuz! posts and send notify via pushbullet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"bytes" | |
"encoding/json" | |
"fmt" | |
"io/ioutil" | |
"net/http" | |
"net/url" | |
"os" | |
"strings" | |
"sync" | |
"time" | |
"github.com/PuerkitoBio/goquery" | |
"github.com/op/go-logging" | |
"golang.org/x/text/encoding/simplifiedchinese" | |
"golang.org/x/text/transform" | |
) | |
const filename = "source.txt" // monitors urls from this list | |
const configFile = "pushbullet.json" // refer to PushSetup | |
const period = 30 // fetch period | |
var log = logging.MustGetLogger("dolc") | |
var logFormat = logging.MustStringFormatter( | |
`%{color}%{time:15:04:05.000} %{shortfunc} | %{level:.4s} %{id:03x}%{color:reset} %{message}`, | |
) | |
// HTMLType tells parser how to parse the HTMLPage | |
type HTMLType int | |
const ( | |
// HTMLWebHomepage is the first page of a Tieba post | |
HTMLWebHomepage HTMLType = iota | |
// HTMLWebPage is a page of a Tieba post | |
HTMLWebPage | |
// HTMLJSON is the Lzl Comment in JSON format | |
HTMLJSON | |
// HTMLLocal is a local HTML or JSON file | |
HTMLLocal | |
) | |
// HTMLPage is a job for fetcher and parser | |
type HTMLPage struct { | |
// URL of the Page | |
URL *url.URL | |
// Content is the HTML code of the Page | |
Content []byte | |
// Type indicates different types of Tieba data | |
Type HTMLType | |
// Close http.Response when finished parsing | |
// Response *http.Response | |
} | |
// PushSetup reads pushbullet config from configFile | |
type PushSetup struct { | |
Token string `json:"token"` | |
DeviceIden string `json:"device_iden"` | |
} | |
var pushSetup PushSetup | |
// PushPayload creates a push | |
type PushPayload struct { | |
DeviceIden string `json:"device_iden"` | |
PushType string `json:"type"` | |
Title string `json:"title"` | |
Body string `json:"body"` | |
} | |
var pushReq *http.Request | |
func fetchHTMLFromURL(page *HTMLPage) error { | |
resp, err := http.Get(page.URL.String()) | |
if err != nil { | |
return err | |
} | |
bytes, err := ioutil.ReadAll(resp.Body) | |
if err != nil { | |
return err | |
} | |
page.Content = bytes | |
// page.Response = resp | |
resp.Body.Close() | |
return nil | |
} | |
func gbk2UTF8(s []byte) ([]byte, error) { | |
reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder()) | |
d, e := ioutil.ReadAll(reader) | |
if e != nil { | |
return nil, e | |
} | |
return d, nil | |
} | |
func htmlParse(page *HTMLPage) (string, string, error) { | |
content, err := gbk2UTF8(page.Content) | |
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(content)) | |
if err != nil { | |
return "", "", fmt.Errorf("Error parsing %s: %v", page.URL, err) | |
} | |
posts := doc.Find("a.s.xst") | |
postsFirst := posts.First() | |
body, exist := postsFirst.Attr("href") | |
if !exist { | |
body = postsFirst.Text() | |
} else { | |
body = "https://www.dolc.de/" + body | |
} | |
// posts.Each(func(i int, s *goquery.Selection) { | |
// fmt.Printf("Post%d: %s\n", i, s.Text()) | |
// }) | |
// fmt.Printf("\n") | |
// page.Response.Body.Close() | |
return postsFirst.Text(), body, err | |
} | |
func monitor(done <-chan struct{}, wg *sync.WaitGroup, page *HTMLPage) { | |
defer wg.Done() | |
var title, titleOld, body string | |
for { | |
select { | |
case <-done: | |
return | |
case <-time.After(time.Second * period): | |
titleOld = title | |
fetchHTMLFromURL(page) | |
title, body, _ = htmlParse(page) | |
if title != titleOld && title != "" && titleOld != "" { | |
// if title != titleOld && title != "" { | |
log.Noticef("New Posts: %s\n", title) | |
jVal, _ := json.Marshal(PushPayload{ | |
DeviceIden: pushSetup.DeviceIden, | |
PushType: "note", | |
Title: title, | |
Body: body, | |
}) | |
pushReq.Body = ioutil.NopCloser(bytes.NewBuffer(jVal)) | |
client := &http.Client{} | |
resp, err := client.Do(pushReq) | |
if err != nil { | |
log.Warningf("POST request failed: %s", err) | |
} | |
defer resp.Body.Close() | |
} | |
} | |
} | |
} | |
func main() { | |
logging.SetFormatter(logFormat) | |
log.Infof("monitor started!") | |
file, _ := os.Open(configFile) | |
decoder := json.NewDecoder(file) | |
err := decoder.Decode(&pushSetup) | |
if err != nil { | |
log.Fatal("push sertup failed:", err) | |
} | |
pushReq, err = http.NewRequest("POST", "https://api.pushbullet.com/v2/pushes", nil) | |
pushReq.Header.Set("Access-Token", pushSetup.Token) | |
pushReq.Header.Set("Content-Type", "application/json") | |
if err != nil { | |
log.Fatal("http request build failed:", err) | |
} | |
// closing done to force all goroutines to quit | |
// Go Concurrency Patterns: Pipelines and cancellation | |
// https://blog.golang.org/pipelines | |
done := make(chan struct{}) | |
var wg sync.WaitGroup | |
wg.Add(1) | |
go func() { | |
defer wg.Done() | |
in, err := os.OpenFile(filename, os.O_RDONLY, 0644) | |
if err != nil { | |
log.Fatalf("Error reading url list: %v", err) | |
return | |
} | |
defer in.Close() | |
reader := bufio.NewReader(in) | |
// reading file line by line in go | |
// https://stackoverflow.com/a/41741702/6091246 | |
// case: | |
// If you don't mind that the line could be very long (i.e. use a lot of RAM). It keeps the \n at the end of the string returned. | |
var line string | |
for isEOF := false; !isEOF; { | |
line, err = reader.ReadString('\n') | |
if err != nil { | |
isEOF = true | |
} | |
line = strings.TrimSpace(line) | |
if line == "" { | |
continue | |
} | |
u, err := url.Parse(strings.TrimSpace(line)) | |
if err != nil { | |
log.Warningf("[Fetch] Error parsing %s, skipping\n", line) | |
continue | |
} | |
var pageType HTMLType | |
if u.Scheme == "file" { | |
pageType = HTMLLocal | |
} else { | |
pageType = HTMLWebHomepage | |
} | |
wg.Add(1) | |
go monitor(done, &wg, &HTMLPage{URL: u, Type: pageType}) | |
} | |
}() | |
wg.Wait() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment