Skip to content

Instantly share code, notes, and snippets.

@umarquez
Created July 14, 2020 07:51
Show Gist options
  • Save umarquez/06e617d06131172b9cffe750762f4499 to your computer and use it in GitHub Desktop.
Save umarquez/06e617d06131172b9cffe750762f4499 to your computer and use it in GitHub Desktop.
#1mC0D3 Extractor de enlaces https://youtu.be/GVd4gY4KQyg
package main
import (
"flag"
"fmt"
"golang.org/x/net/html"
"log"
"net/http"
"net/url"
)
// links almacenará los enlaces obtenidos
var links map[string]int
func Extract(node *html.Node) {
// Si se trata de un enlace:
if node.Type == html.ElementNode &&
node.Data == "a" {
// iterando sobre los atributos:
for _, attr := range node.Attr {
switch attr.Key {
// Si se trata del destino del enlace
case "href":
// Aumentamos el contador
links[attr.Val]++
}
}
}
// Procesando sub nodos
for n := node.FirstChild; n != nil; n = n.NextSibling {
Extract(n)
}
}
func main() {
// Obteniendo URL por parámetro
strUrl := flag.String(
"url",
"https://golang.org",
"URL a procesar",
)
flag.Parse()
// Validando URL
_, err := url.Parse(*strUrl)
if err != nil {
log.Fatal(err)
}
// Descargando contenido desde la URL
log.Printf("Procesando URL: %v", *strUrl)
resp, err := http.Get(*strUrl)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
// Interpretando HTML
htmlContent, err := html.Parse(resp.Body)
// Obteniendo enlaces
links = make(map[string]int) // inicializando
Extract(htmlContent)
// Imprimiendo resultados
for targetUrl, counter := range links {
fmt.Printf("[%v] %v\n", counter, targetUrl)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment