nehayward · April 4, 2019 17:45
diff --git a/imageDownloader.go b/imageDownloader.go
 package main

 import (
 	"bufio"
 	"encoding/csv"
 	"fmt"
 	"io"
 	"log"
 	"os"
 	"strings"

 	"github.com/gocolly/colly"
 )

 type company struct {
 	name string
 	url  string
 }

 func main() {
 	if len(os.Args) < 2 {
 		fmt.Println("Please enter ./imageDownloader companies.csv")
 		return
 	}

 	companiesCSV, _ := os.Open(os.Args[1])
 	reader := csv.NewReader(bufio.NewReader(companiesCSV))
 	var companies []company

 	for {
 		line, error := reader.Read()
 		if error == io.EOF {
 			break
 		} else if error != nil {
 			log.Fatal(error)
 		}
 		companies = append(companies, company{
 			name: line[0],
 			url:  line[1],
 		})
 	}
 	// fmt.Println(companies[8].url)
 	if !strings.HasPrefix(companies[8].url, "http") {
 		companies[8].url = "http://" + companies[8].url
 	}
 	os.MkdirAll("Companies", os.ModePerm)
 	findImage(companies[8].url)
 	// findImage("https://www.golove.org/staff")
 	// downloadImage(companies)
 }

 func downloadImage(companies []company) {
 	for i, company := range companies {
 		os.MkdirAll(fmt.Sprintf("Companies/%d", i+2), os.ModePerm)
 		findImage(company.url)
 	}
 }

 func findImage(url string) {
 	c := colly.NewCollector()

 	// Find and visit all links
 	c.OnHTML("a[href]", func(e *colly.HTMLElement) {
 		// fmt.Println("href")
 		// link := e.Attr("style")
 		// image2 := e.Text
 		// // Print link
 		// fmt.Println(image2)
 		// fmt.Printf("%s\n", link)
 		// Visit link found on page
 		// Only those links are visited which are in AllowedDomains
 		// c.Visit(e.Request.AbsoluteURL(link))
 	})

 	// Find and visit all links
 	c.OnHTML("img[src]", func(e *colly.HTMLElement) {
 		// fmt.Println("HERE")
 		fmt.Println(e.Attr("src"))
 		c.Visit(e.Attr("src"))
 		// Print link

 		// Visit link found on page
 		// Only those links are visited which are in AllowedDomains
 		// c.Visit(e.Request.AbsoluteURL(link))
 	})

 	// c.OnHTML("a[href]", func(e *colly.HTMLElement) {

 	// 	// fmt.Printf("%s\n", )
 	// 	// Visit link found on page
 	// 	// Only those links are visited which are in AllowedDomains
 	// 	// c.Visit(e.Request.AbsoluteURL(link))
 	// })

 	// Set error handler
 	c.OnError(func(r *colly.Response, e error) {

 		log.Println("error:", e, r.Request.URL, string(r.Body))
 	})

 	c.OnRequest(func(r *colly.Request) {
 		fmt.Println("Visiting", r.URL)
 	})

 	c.OnResponse(func(r *colly.Response) {
 		if strings.Index(r.Headers.Get("Content-Type"), "image") > -1 {
 			// r.Save(outputDir + r.FileName())
 			// fmt.Println(r.FileName())
 			// r.Save("Desktop/" + r.FileName())
 			// e := r.Save("/Desktop/" + r.FileName)
 			// err := ioutil.WriteFile("testdata/hello", r.Body, 0644)
 			err := r.Save("Companies/" + r.FileName())
 			if err != nil {
 				log.Fatal(err)
 			}
 			return
 		}
 		// handle further response types...
 	})

 	c.Visit(url)
 }

 // ADD HTTP
 // function addhttp(url string) {
 //     if (!preg_match("~^(?:f|ht)tps?://~i", $url)) {
 //         $url = "http://" . $url;
 //     }
 //     return $url;
 // }
	package main

	import (
	"bufio"
	"encoding/csv"
	"fmt"
	"io"
	"log"
	"os"
	"strings"

	"github.com/gocolly/colly"
	)

	type company struct {
	name string
	url string
	}

	func main() {
	if len(os.Args) < 2 {
	fmt.Println("Please enter ./imageDownloader companies.csv")
	return
	}

	companiesCSV, _ := os.Open(os.Args[1])
	reader := csv.NewReader(bufio.NewReader(companiesCSV))
	var companies []company

	for {
	line, error := reader.Read()
	if error == io.EOF {
	break
	} else if error != nil {
	log.Fatal(error)
	}
	companies = append(companies, company{
	name: line[0],
	url: line[1],
	})
	}
	// fmt.Println(companies[8].url)
	if !strings.HasPrefix(companies[8].url, "http") {
	companies[8].url = "http://" + companies[8].url
	}
	os.MkdirAll("Companies", os.ModePerm)
	findImage(companies[8].url)
	// findImage("https://www.golove.org/staff")
	// downloadImage(companies)
	}

	func downloadImage(companies []company) {
	for i, company := range companies {
	os.MkdirAll(fmt.Sprintf("Companies/%d", i+2), os.ModePerm)
	findImage(company.url)
	}
	}

	func findImage(url string) {
	c := colly.NewCollector()

	// Find and visit all links
	c.OnHTML("a[href]", func(e *colly.HTMLElement) {
	// fmt.Println("href")
	// link := e.Attr("style")
	// image2 := e.Text
	// // Print link
	// fmt.Println(image2)
	// fmt.Printf("%s\n", link)
	// Visit link found on page
	// Only those links are visited which are in AllowedDomains
	// c.Visit(e.Request.AbsoluteURL(link))
	})

	// Find and visit all links
	c.OnHTML("img[src]", func(e *colly.HTMLElement) {
	// fmt.Println("HERE")
	fmt.Println(e.Attr("src"))
	c.Visit(e.Attr("src"))
	// Print link

	// Visit link found on page
	// Only those links are visited which are in AllowedDomains
	// c.Visit(e.Request.AbsoluteURL(link))
	})

	// c.OnHTML("a[href]", func(e *colly.HTMLElement) {

	// // fmt.Printf("%s\n", )
	// // Visit link found on page
	// // Only those links are visited which are in AllowedDomains
	// // c.Visit(e.Request.AbsoluteURL(link))
	// })

	// Set error handler
	c.OnError(func(r *colly.Response, e error) {

	log.Println("error:", e, r.Request.URL, string(r.Body))
	})

	c.OnRequest(func(r *colly.Request) {
	fmt.Println("Visiting", r.URL)
	})

	c.OnResponse(func(r *colly.Response) {
	if strings.Index(r.Headers.Get("Content-Type"), "image") > -1 {
	// r.Save(outputDir + r.FileName())
	// fmt.Println(r.FileName())
	// r.Save("Desktop/" + r.FileName())
	// e := r.Save("/Desktop/" + r.FileName)
	// err := ioutil.WriteFile("testdata/hello", r.Body, 0644)
	err := r.Save("Companies/" + r.FileName())
	if err != nil {
	log.Fatal(err)
	}
	return
	}
	// handle further response types...
	})

	c.Visit(url)
	}

	// ADD HTTP
	// function addhttp(url string) {
	// if (!preg_match("~^(?:f\|ht)tps?://~i", $url)) {
	// $url = "http://" . $url;
	// }
	// return $url;
	// }