Created
July 14, 2020 07:51
-
-
Save umarquez/06e617d06131172b9cffe750762f4499 to your computer and use it in GitHub Desktop.
#1mC0D3 Extractor de enlaces https://youtu.be/GVd4gY4KQyg
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"flag" | |
"fmt" | |
"golang.org/x/net/html" | |
"log" | |
"net/http" | |
"net/url" | |
) | |
// links almacenará los enlaces obtenidos | |
var links map[string]int | |
func Extract(node *html.Node) { | |
// Si se trata de un enlace: | |
if node.Type == html.ElementNode && | |
node.Data == "a" { | |
// iterando sobre los atributos: | |
for _, attr := range node.Attr { | |
switch attr.Key { | |
// Si se trata del destino del enlace | |
case "href": | |
// Aumentamos el contador | |
links[attr.Val]++ | |
} | |
} | |
} | |
// Procesando sub nodos | |
for n := node.FirstChild; n != nil; n = n.NextSibling { | |
Extract(n) | |
} | |
} | |
func main() { | |
// Obteniendo URL por parámetro | |
strUrl := flag.String( | |
"url", | |
"https://golang.org", | |
"URL a procesar", | |
) | |
flag.Parse() | |
// Validando URL | |
_, err := url.Parse(*strUrl) | |
if err != nil { | |
log.Fatal(err) | |
} | |
// Descargando contenido desde la URL | |
log.Printf("Procesando URL: %v", *strUrl) | |
resp, err := http.Get(*strUrl) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer resp.Body.Close() | |
// Interpretando HTML | |
htmlContent, err := html.Parse(resp.Body) | |
// Obteniendo enlaces | |
links = make(map[string]int) // inicializando | |
Extract(htmlContent) | |
// Imprimiendo resultados | |
for targetUrl, counter := range links { | |
fmt.Printf("[%v] %v\n", counter, targetUrl) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment