Skip to content

Instantly share code, notes, and snippets.

@s3rgeym
Last active December 30, 2024 12:31
Show Gist options
  • Save s3rgeym/872fef2aa28926bef29792e009be1793 to your computer and use it in GitHub Desktop.
Save s3rgeym/872fef2aa28926bef29792e009be1793 to your computer and use it in GitHub Desktop.
package main
import (
"bufio"
"flag"
"fmt"
"os"
"sync"
)
type Config struct {
InputFile string
OutputFile string
Workers int
}
func main() {
config := parseFlags()
urls, err := readInput(config.InputFile)
if err != nil {
fmt.Printf("Error reading input: %v\n", err)
os.Exit(1)
}
results := make(chan string)
var wg sync.WaitGroup
for i := 0; i < config.Workers; i++ {
wg.Add(1)
go worker(urls, results, &wg)
}
outputDone := make(chan struct{})
go func() {
if err := writeOutput(config.OutputFile, results); err != nil {
fmt.Printf("Error writing output: %v\n", err)
os.Exit(1)
}
close(outputDone)
}()
wg.Wait()
close(results)
<-outputDone
}
func parseFlags() Config {
inputFile := flag.String("i", "-", "Input file with URLs (default: stdin)")
outputFile := flag.String("o", "-", "Output file for results (default: stdout)")
workers := flag.Int("w", 4, "Number of workers")
flag.Parse()
return Config{
InputFile: *inputFile,
OutputFile: *outputFile,
Workers: *workers,
}
}
func readInput(filePath string) ([]string, error) {
file := os.Stdin
if filePath != "-" {
var err error
file, err = os.Open(filePath)
if err != nil {
return nil, err
}
defer file.Close()
}
return readLines(file)
}
func writeOutput(filePath string, results <-chan string) error {
file := os.Stdout
if filePath != "-" {
var err error
file, err = os.Create(filePath)
if err != nil {
return err
}
defer file.Close()
}
writer := bufio.NewWriter(file)
defer writer.Flush()
for result := range results {
if _, err := fmt.Fprintln(writer, result); err != nil {
return err
}
writer.Flush()
}
return nil
}
func readLines(file *os.File) ([]string, error) {
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}
func worker(urls []string, results chan<- string, wg *sync.WaitGroup) {
defer wg.Done()
for _, url := range urls {
results <- processURL(url)
}
}
func processURL(url string) string {
return fmt.Sprintf("Processed: %s", url)
}
// sem.Acquire()
// defer sem.Release()
type Semaphore chan struct{}
func NewSemaphore(n int) Semaphore {
return make(chan struct{}, n)
}
func (s Semaphore) Acquire() {
s <- struct{}{}
}
func (s Semaphore) Release() {
<-s
}
@s3rgeym
Copy link
Author

s3rgeym commented Dec 30, 2024

package main

import (
	"bufio"
	"encoding/json"
	"flag"
	"fmt"
	"io"
	"net/http"
	"os"
	"strings"
	"sync"
	"time"

	"github.com/sirupsen/logrus"
)

const (
	defaultUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
	defaultTimeout   = 10 * time.Second
)

type Config struct {
	InputFile     string
	OutputFile    string
	MaxConcurrent int
	UserAgent     string
	Timeout       time.Duration
	LogLevel      string
}

type Result struct {
	URL    string `json:"url"`
	Exists bool   `json:"exists"`
}

type Semaphore chan struct{}

func NewSemaphore(n int) Semaphore {
	return make(chan struct{}, n)
}

func (s Semaphore) Acquire() {
	s <- struct{}{}
}

func (s Semaphore) Release() {
	<-s
}

var log = logrus.New()

func main() {
	config := parseFlags()
	setupLogger(config.LogLevel)

	urls, err := readInput(config.InputFile)
	if err != nil {
		log.Errorf("Error reading input: %v", err)
		os.Exit(1)
	}

	results := make(chan Result)
	var wg sync.WaitGroup

	sem := NewSemaphore(config.MaxConcurrent)

	client := &http.Client{
		Timeout: config.Timeout,
		Jar:     http.DefaultClient.Jar,
	}

	log.Infof("Starting %d workers", config.MaxConcurrent)
	for i := 0; i < config.MaxConcurrent; i++ {
		wg.Add(1)
		go worker(urls, results, sem, client, config.UserAgent, &wg)
	}

	outputDone := make(chan struct{})
	go func() {
		if err := writeOutput(config.OutputFile, results); err != nil {
			log.Errorf("Error writing output: %v", err)
			os.Exit(1)
		}
		close(outputDone)
	}()

	wg.Wait()
	close(results)
	<-outputDone
	log.Info("Processing completed")
}

func parseFlags() Config {
	inputFile := flag.String("i", "-", "Input file with URLs (default: stdin)")
	outputFile := flag.String("o", "-", "Output file for results (default: stdout)")
	maxConcurrent := flag.Int("c", 10, "Maximum concurrent HTTP requests")
	userAgent := flag.String("u", defaultUserAgent, "User-Agent for HTTP requests")
	timeout := flag.Duration("t", defaultTimeout, "Timeout for HTTP requests")
	logLevel := flag.String("l", "info", "Log level (debug, info, warn, error)")
	flag.Parse()

	return Config{
		InputFile:     *inputFile,
		OutputFile:    *outputFile,
		MaxConcurrent: *maxConcurrent,
		UserAgent:     *userAgent,
		Timeout:       *timeout,
		LogLevel:      *logLevel,
	}
}

func setupLogger(logLevel string) {
	log.SetFormatter(&logrus.TextFormatter{
		ForceColors:   true,
		FullTimestamp: true,
	})

	switch logLevel {
	case "debug":
		log.SetLevel(logrus.DebugLevel)
	case "info":
		log.SetLevel(logrus.InfoLevel)
	case "warn":
		log.SetLevel(logrus.WarnLevel)
	case "error":
		log.SetLevel(logrus.ErrorLevel)
	default:
		log.SetLevel(logrus.InfoLevel)
	}
}

func readInput(filePath string) ([]string, error) {
	file := os.Stdin
	if filePath != "-" {
		var err error
		file, err = os.Open(filePath)
		if err != nil {
			return nil, err
		}
		defer file.Close()
	}

	return readLines(file)
}

func writeOutput(filePath string, results <-chan Result) error {
	file := os.Stdout
	if filePath != "-" {
		var err error
		file, err = os.Create(filePath)
		if err != nil {
			return err
		}
		defer file.Close()
	}

	writer := bufio.NewWriter(file)
	defer writer.Flush()

	for result := range results {
		jsonResult, err := json.Marshal(result)
		if err != nil {
			return err
		}
		if _, err := fmt.Fprintln(writer, string(jsonResult)); err != nil {
			return err
		}
		writer.Flush()
	}

	return nil
}

func readLines(file *os.File) ([]string, error) {
	var lines []string
	scanner := bufio.NewScanner(file)
	for scanner.Scan() {
		lines = append(lines, scanner.Text())
	}
	return lines, scanner.Err()
}

func worker(urls []string, results chan<- Result, sem Semaphore, client *http.Client, userAgent string, wg *sync.WaitGroup) {
	defer wg.Done()

	for _, url := range urls {
		sem.Acquire()
		go func(u string) {
			defer sem.Release()
			log.Debugf("Checking URL: %s", u)
			exists, err := checkWpContent(client, u, userAgent)
			if err != nil {
				log.Errorf("Error checking %s: %v", u, err)
			} else {
				results <- Result{URL: u, Exists: exists}
				log.Infof("Processed URL: %s, Exists: %v", u, exists)
			}
		}(url)
	}
}

func checkWpContent(client *http.Client, url, userAgent string) (bool, error) {
	req, err := http.NewRequest("GET", url, nil)
	if err != nil {
		return false, err
	}
	req.Header.Set("User-Agent", userAgent)

	resp, err := client.Do(req)
	if err != nil {
		return false, err
	}
	defer resp.Body.Close()

	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return false, err
	}

	return strings.Contains(string(body), "/wp-content/"), nil
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment