Skip to content

Instantly share code, notes, and snippets.

@thesunwave
Created March 7, 2019 08:11
Show Gist options
  • Save thesunwave/816da02a5ed49e2b6316a26dbf125864 to your computer and use it in GitHub Desktop.
Save thesunwave/816da02a5ed49e2b6316a26dbf125864 to your computer and use it in GitHub Desktop.
package main
import (
"encoding/json"
"fmt"
"github.com/gocolly/colly"
"github.com/mb-14/gomarkov"
"io/ioutil"
"strings"
)
type mMTitle struct {
Title string
}
func main() {
//Create a chain of order 2
chain, err := loadModel()
if err != nil {
fmt.Println(err)
chain = gomarkov.NewChain(1)
titles := fetchTitles()
for _, story := range titles {
chain.Add(strings.Split(story.Title, " "))
}
}
next, _ := chain.Generate([]string{"Google"})
fmt.Println(next)
prob, _ := chain.TransitionProbability(" ", []string{"Google"})
fmt.Println(prob)
//The chain is JSON serializable
jsonObj, _ := json.Marshal(chain)
err = ioutil.WriteFile("model.json", jsonObj, 0644)
if err != nil {
fmt.Println(err)
}
newsList := []string{}
for i := 0; i < 1000; i++ {
newsList = append(newsList, generateTitle(chain))
}
result := removeDuplicatesUnordered(newsList)
for _, v := range result {
fmt.Println(v)
}
}
func removeDuplicatesUnordered(elements []string) []string {
encountered := map[string]bool{}
// Create a map of all unique elements.
for v := range elements {
encountered[elements[v]] = true
}
// Place all keys from the map into a slice.
result := []string{}
for key, _ := range encountered {
result = append(result, key)
}
return result
}
func loadModel() (*gomarkov.Chain, error) {
var chain gomarkov.Chain
data, err := ioutil.ReadFile("model.json")
if err != nil {
return &chain, err
}
err = json.Unmarshal(data, &chain)
if err != nil {
return &chain, err
}
return &chain, nil
}
func generateTitle(chain *gomarkov.Chain) string {
tokens := []string{gomarkov.StartToken}
for tokens[len(tokens)-1] != gomarkov.EndToken {
next, _ := chain.Generate(tokens[(len(tokens) - 1):])
tokens = append(tokens, next)
}
return strings.Join(tokens[1:len(tokens)-1], " ")
}
func fetchTitles() []mMTitle {
titles := []mMTitle{}
c := colly.NewCollector()
c.OnHTML(".Link-root", func(e *colly.HTMLElement) {
temp := mMTitle{}
temp.Title = e.ChildText("span")
titles = append(titles, temp)
})
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL)
})
c.Visit("https://meduza.io")
return titles
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment