Last active
November 12, 2022 19:49
-
-
Save markdtw/f3049da3fa68fcc0822ff7284b262000 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Mimic a browser when a user only types in the domain name in the address bar | |
without the scheme, e.g., https://, by sending requests to https://<domain>, | |
https://www.<domain>, http://<domain>, and http://www.<domain> to figure out | |
which one leads to the landing page. | |
Usage: | |
$> go build . | |
$> echo facebook.com | domain2URL | |
Results will be written to ./urlgo.txt when we find a proper landing page, or | |
./urlerrgo.txt when we fail. | |
Tested with Go 1.19 | |
Author: markdtw | |
*/ | |
package main | |
import ( | |
"bufio" | |
"fmt" | |
"log" | |
"net/http" | |
"os" | |
"strings" | |
"sync" | |
"time" | |
) | |
const ( | |
userAgent = "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0" | |
) | |
type reqResult struct { | |
success bool | |
out string | |
} | |
func sendRequest(i int, domain string, client *http.Client, outC chan reqResult) { | |
success := false | |
schemes := []string{"https://", "https://www.", "http://", "http://www."} | |
for _, scheme := range schemes { | |
url := scheme + domain | |
req, err := http.NewRequest(http.MethodHead, url, nil) | |
if err != nil { | |
msg := fmt.Sprintf("\t%07d: %s...%s", i, url, err.Error()) | |
log.Println(msg) | |
continue | |
} | |
req.Header.Set("User-Agent", userAgent) | |
req.Header.Set("Host", domain) | |
res, err := client.Do(req) | |
if err != nil { | |
msg := fmt.Sprintf("\t%07d: %s...%s", i, url, err.Error()) | |
log.Println(msg) | |
continue | |
} | |
if res.StatusCode >= 200 && res.StatusCode < 400 { | |
msg := fmt.Sprintf("%07d: %s...GOOD", i, url) | |
log.Println(msg) | |
outC <- reqResult{success: true, out: url} | |
success = true | |
break | |
} else { | |
msg := fmt.Sprintf("\t%07d: %s...%d", i, url, res.StatusCode) | |
log.Println(msg) | |
} | |
} | |
if !success { | |
outC <- reqResult{success: false, out: domain} | |
} | |
} | |
func main() { | |
// to read inputs from stdin | |
scnr := bufio.NewScanner(os.Stdin) | |
// file handling for writing the results | |
fp, err := os.Create("./urlgo.txt") | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer fp.Close() | |
wtr := bufio.NewWriter(fp) | |
defer wtr.Flush() | |
_, err = wtr.WriteString("User-Agent: " + userAgent + "\n\n") | |
if err != nil { | |
log.Fatal(err) | |
} | |
fpe, err := os.Create("./urlerrgo.txt") | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer fpe.Close() | |
wtre := bufio.NewWriter(fpe) | |
defer wtre.Flush() | |
_, err = wtre.WriteString("User-Agent: " + userAgent + "\n\n") | |
if err != nil { | |
log.Fatal(err) | |
} | |
// need this custom client to set the redirect policy and timeout | |
client := &http.Client{ | |
CheckRedirect: func(req *http.Request, via []*http.Request) error { | |
return http.ErrUseLastResponse | |
}, | |
Timeout: 10 * time.Second, | |
} | |
outC := make(chan reqResult) // to receive the results from the goroutines | |
var wg = sync.WaitGroup{} | |
maxGoroutines := 100 // can be tuned | |
sem := make(chan struct{}, maxGoroutines) // to bound the number of goroutines | |
go func() { | |
for result := range outC { | |
if result.success { | |
_, err = wtr.WriteString(result.out + "\n") | |
if err != nil { | |
log.Fatal(err) | |
} | |
} else { | |
_, err = wtre.WriteString(result.out + "\n") | |
if err != nil { | |
log.Fatal(err) | |
} | |
} | |
<-sem // release a slot from the channel | |
wg.Done() | |
} | |
}() | |
i := 0 | |
for { | |
scnr.Scan() | |
line := scnr.Text() | |
if len(line) == 0 { | |
break | |
} | |
domain := strings.TrimSpace(line) | |
sem <- struct{}{} // blocked if the channel is full | |
wg.Add(1) | |
go func(i int) { | |
sendRequest(i, domain, client, outC) | |
}(i) | |
i++ | |
} | |
wg.Wait() | |
close(outC) | |
log.Println("Done!") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment