Created
March 14, 2025 19:02
-
-
Save Ppang0405/c8bb7104fd6640ab6b6b2ce40d88ccb5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/json" | |
"io" | |
"log" | |
"net/http" | |
"os" | |
"path/filepath" | |
"strings" | |
"time" | |
"github.com/gocolly/colly/v2" | |
) | |
type pageInfo struct { | |
StatusCode int | |
Links map[string]int | |
Images []string | |
} | |
func handler(w http.ResponseWriter, r *http.Request) { | |
URL := r.URL.Query().Get("url") | |
if URL == "" { | |
log.Println("missing URL argument") | |
return | |
} | |
log.Println("visiting", URL) | |
c := colly.NewCollector() | |
p := &pageInfo{ | |
Links: make(map[string]int), | |
Images: []string{}, | |
} | |
// Create images directory if it doesn't exist | |
imagesDir := "downloaded_images" | |
if err := os.MkdirAll(imagesDir, 0755); err != nil { | |
log.Println("Error creating images directory:", err) | |
} | |
// count links | |
c.OnHTML("a[href]", func(e *colly.HTMLElement) { | |
link := e.Request.AbsoluteURL(e.Attr("href")) | |
if link != "" { | |
p.Links[link]++ | |
} | |
}) | |
// find and download images | |
c.OnHTML("img[src]", func(e *colly.HTMLElement) { | |
imageURL := e.Request.AbsoluteURL(e.Attr("src")) | |
if imageURL == "" { | |
return | |
} | |
// Add image URL to our result | |
p.Images = append(p.Images, imageURL) | |
// Download the image | |
go downloadImage(imageURL, imagesDir) | |
}) | |
// extract status code | |
c.OnResponse(func(r *colly.Response) { | |
log.Println("response received", r.StatusCode) | |
p.StatusCode = r.StatusCode | |
}) | |
c.OnError(func(r *colly.Response, err error) { | |
log.Println("error:", r.StatusCode, err) | |
p.StatusCode = r.StatusCode | |
}) | |
c.Visit(URL) | |
// dump results | |
b, err := json.Marshal(p) | |
if err != nil { | |
log.Println("failed to serialize response:", err) | |
return | |
} | |
w.Header().Add("Content-Type", "application/json") | |
w.Write(b) | |
} | |
func downloadImage(imageURL, destinationDir string) { | |
// Extract filename from URL | |
urlParts := strings.Split(imageURL, "/") | |
filename := urlParts[len(urlParts)-1] | |
// Add timestamp to avoid filename collisions | |
timestamp := time.Now().UnixNano() | |
extension := filepath.Ext(filename) | |
baseName := strings.TrimSuffix(filename, extension) | |
filename = baseName + "_" + string(timestamp) + extension | |
// Clean the filename | |
filename = filepath.Clean(filepath.Base(filename)) | |
if filename == "." || filename == "" { | |
filename = "image_" + string(timestamp) + ".jpg" | |
} | |
// Create full path | |
filePath := filepath.Join(destinationDir, filename) | |
// Download the image | |
resp, err := http.Get(imageURL) | |
if err != nil { | |
log.Printf("Error downloading image %s: %v", imageURL, err) | |
return | |
} | |
defer resp.Body.Close() | |
// Create the file | |
file, err := os.Create(filePath) | |
if err != nil { | |
log.Printf("Error creating file for image %s: %v", imageURL, err) | |
return | |
} | |
defer file.Close() | |
// Copy data to file | |
_, err = io.Copy(file, resp.Body) | |
if err != nil { | |
log.Printf("Error saving image %s: %v", imageURL, err) | |
return | |
} | |
log.Printf("Successfully downloaded image: %s to %s", imageURL, filePath) | |
} | |
func main() { | |
// example usage: curl -s 'http://127.0.0.1:7171/?url=http://go-colly.org/' | |
addr := ":7171" | |
http.HandleFunc("/", handler) | |
log.Println("listening on", addr) | |
log.Fatal(http.ListenAndServe(addr, nil)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment