Skip to content

Instantly share code, notes, and snippets.

@Ppang0405
Created March 14, 2025 19:02
Show Gist options
  • Save Ppang0405/c8bb7104fd6640ab6b6b2ce40d88ccb5 to your computer and use it in GitHub Desktop.
Save Ppang0405/c8bb7104fd6640ab6b6b2ce40d88ccb5 to your computer and use it in GitHub Desktop.
package main
import (
"encoding/json"
"io"
"log"
"net/http"
"os"
"path/filepath"
"strings"
"time"
"github.com/gocolly/colly/v2"
)
type pageInfo struct {
StatusCode int
Links map[string]int
Images []string
}
func handler(w http.ResponseWriter, r *http.Request) {
URL := r.URL.Query().Get("url")
if URL == "" {
log.Println("missing URL argument")
return
}
log.Println("visiting", URL)
c := colly.NewCollector()
p := &pageInfo{
Links: make(map[string]int),
Images: []string{},
}
// Create images directory if it doesn't exist
imagesDir := "downloaded_images"
if err := os.MkdirAll(imagesDir, 0755); err != nil {
log.Println("Error creating images directory:", err)
}
// count links
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
link := e.Request.AbsoluteURL(e.Attr("href"))
if link != "" {
p.Links[link]++
}
})
// find and download images
c.OnHTML("img[src]", func(e *colly.HTMLElement) {
imageURL := e.Request.AbsoluteURL(e.Attr("src"))
if imageURL == "" {
return
}
// Add image URL to our result
p.Images = append(p.Images, imageURL)
// Download the image
go downloadImage(imageURL, imagesDir)
})
// extract status code
c.OnResponse(func(r *colly.Response) {
log.Println("response received", r.StatusCode)
p.StatusCode = r.StatusCode
})
c.OnError(func(r *colly.Response, err error) {
log.Println("error:", r.StatusCode, err)
p.StatusCode = r.StatusCode
})
c.Visit(URL)
// dump results
b, err := json.Marshal(p)
if err != nil {
log.Println("failed to serialize response:", err)
return
}
w.Header().Add("Content-Type", "application/json")
w.Write(b)
}
func downloadImage(imageURL, destinationDir string) {
// Extract filename from URL
urlParts := strings.Split(imageURL, "/")
filename := urlParts[len(urlParts)-1]
// Add timestamp to avoid filename collisions
timestamp := time.Now().UnixNano()
extension := filepath.Ext(filename)
baseName := strings.TrimSuffix(filename, extension)
filename = baseName + "_" + string(timestamp) + extension
// Clean the filename
filename = filepath.Clean(filepath.Base(filename))
if filename == "." || filename == "" {
filename = "image_" + string(timestamp) + ".jpg"
}
// Create full path
filePath := filepath.Join(destinationDir, filename)
// Download the image
resp, err := http.Get(imageURL)
if err != nil {
log.Printf("Error downloading image %s: %v", imageURL, err)
return
}
defer resp.Body.Close()
// Create the file
file, err := os.Create(filePath)
if err != nil {
log.Printf("Error creating file for image %s: %v", imageURL, err)
return
}
defer file.Close()
// Copy data to file
_, err = io.Copy(file, resp.Body)
if err != nil {
log.Printf("Error saving image %s: %v", imageURL, err)
return
}
log.Printf("Successfully downloaded image: %s to %s", imageURL, filePath)
}
func main() {
// example usage: curl -s 'http://127.0.0.1:7171/?url=http://go-colly.org/'
addr := ":7171"
http.HandleFunc("/", handler)
log.Println("listening on", addr)
log.Fatal(http.ListenAndServe(addr, nil))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment