Skip to content

Instantly share code, notes, and snippets.

@madindo
Created April 5, 2025 08:55
Show Gist options
  • Save madindo/d12f2051f4360b004e572b07af49b684 to your computer and use it in GitHub Desktop.
Save madindo/d12f2051f4360b004e572b07af49b684 to your computer and use it in GitHub Desktop.
Ringkasan Berita Dari Kompas & Detik menggunakan Command line
package main
import (
"fmt"
"log"
"net/url"
"regexp"
"strings"
"github.com/gocolly/colly/v2"
)
func main() {
// Only Kompas and Detik
allowedDomains := []string{
"kompas.com", "www.kompas.com",
"detik.com", "www.detik.com",
}
c := colly.NewCollector(
colly.AllowedDomains(allowedDomains...),
)
spaceCleaner := regexp.MustCompile(`\s+`)
c.OnHTML("a", func(e *colly.HTMLElement) {
rawTitle := strings.TrimSpace(e.Text)
link := e.Attr("href")
// Clean up whitespace
title := spaceCleaner.ReplaceAllString(rawTitle, " ")
// Skip short, empty, or metadata-looking titles
if title == "" ||
strings.Contains(title, "menit lalu") ||
strings.Contains(title, "jam lalu") ||
len(title) < 55 {
return
}
parsedURL, err := url.Parse(link)
if err != nil {
return
}
if parsedURL.Host == "" {
link = e.Request.AbsoluteURL(link)
parsedURL, _ = url.Parse(link)
}
host := parsedURL.Host
path := parsedURL.Path
// Skip external domains
if !strings.Contains(host, "kompas.com") && !strings.Contains(host, "detik.com") {
return
}
// Skip unwanted paths
skipKeywords := []string{
"/tag/", "/tags/", "/kategori/", "/category/", "/video/", "/foto/",
"/indeks", "/index", "/search", "/topik/", "/news/", "/live/", "/trending/",
}
for _, keyword := range skipKeywords {
if strings.Contains(path, keyword) || path == "/" {
return
}
}
// Only allow article-like paths
if !strings.Contains(path, "-") && !strings.Contains(path, "/read") && !strings.Contains(path, "/berita") {
return
}
// Print title in green
greenTitle := "\033[32m" + title + "\033[0m"
fmt.Printf("Title: %s\nLink: %s\n\n", greenTitle, link)
})
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting:", r.URL.String())
})
c.OnError(func(r *colly.Response, err error) {
log.Println("Error:", r.Request.URL, err)
})
startPages := []string{
"https://www.kompas.com",
"https://www.detik.com",
}
for _, page := range startPages {
err := c.Visit(page)
if err != nil {
log.Printf("Failed to visit %s: %v\n", page, err)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment