Last active
July 22, 2022 05:43
-
-
Save yin1999/c868b0b840b5109a0335b54ba4598e96 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"bytes" | |
"flag" | |
"fmt" | |
"io/fs" | |
"net/http" | |
"os" | |
"path/filepath" | |
"regexp" | |
"strings" | |
"time" | |
) | |
var redirect map[string]string | |
var locale = flag.String("locale", "zh-CN", "set locale") | |
var toHTTPS = flag.Bool("s", false, "convert to https") | |
var limit = flag.Int("limit", 200, "set limit (-1 for unlimited)") | |
var exclude = []string{"conflicting", "orphaned"} | |
type localePath []string | |
func (l *localePath) Set(value string) error { | |
if len(value) == 0 { | |
return fmt.Errorf("locale path is empty") | |
} | |
*l = append(*l, value) | |
return nil | |
} | |
func (l localePath) String() string { | |
return "[" + strings.Join(l, ",") + "]" | |
} | |
func main() { | |
dir := flag.String("d", ".", "set walk dir") // example `-d files/zh-cn` to run jobs on `zh-CN` | |
var redirectPath localePath | |
flag.Var(&redirectPath, "l", "set locale path") | |
flag.Parse() | |
var err error | |
redirect, err = loadRedirects(redirectPath...) | |
if err != nil { | |
fmt.Printf("[error] load redirects failed, err: %s\n", err.Error()) | |
return | |
} | |
err = filepath.WalkDir(*dir, visit) | |
if err != nil { | |
fmt.Printf("occur an error: %s\n", err.Error()) | |
} | |
} | |
func loadRedirects(locale ...string) (map[string]string, error) { | |
redirect = make(map[string]string) | |
for _, l := range locale { | |
file, err := os.Open(l) | |
if err != nil { | |
return redirect, err | |
} | |
defer file.Close() | |
scanner := bufio.NewScanner(file) | |
for scanner.Scan() { | |
line := scanner.Text() | |
if len(line) != 0 && line[0] == '#' { | |
continue | |
} | |
split := strings.Fields(line) | |
if len(split) != 2 { | |
continue | |
} | |
originSlug, _ := removeLocale(split[0]) | |
originSlug = strings.ToLower(originSlug) | |
if _, ok := redirect[originSlug]; !ok { // priority: higher -> lower | |
redirect[originSlug], _ = removeLocale(split[1]) | |
} | |
} | |
} | |
return redirect, nil | |
} | |
var allLocal = map[string]struct{}{ | |
"de": {}, | |
"en-us": {}, | |
"en_us": {}, | |
"es": {}, | |
"fr": {}, | |
"ja": {}, | |
"ko": {}, | |
"pl": {}, | |
"ru": {}, | |
"pt-br": {}, | |
"pt_br": {}, | |
"zh-cn": {}, | |
"zh_cn": {}, | |
"zh-tw": {}, | |
"zh_tw": {}, | |
} | |
func removeLocale(uri string) (string, bool) { | |
if len(uri) == 0 { | |
return uri, false | |
} | |
if uri[0] == '/' { | |
uri = uri[1:] | |
} | |
split := strings.SplitN(uri, "/", 2) | |
if len(split) != 2 { | |
return uri, false | |
} | |
if _, ok := allLocal[strings.ToLower(split[0])]; !ok { | |
return uri, false | |
} | |
return split[1], true // remove locale | |
} | |
func visit(path string, d fs.DirEntry, err error) error { | |
if d.IsDir() { | |
for i := range exclude { | |
if strings.HasPrefix(d.Name(), exclude[i]) { | |
return filepath.SkipDir | |
} | |
} | |
return nil | |
} | |
switch filepath.Ext(path) { | |
case ".html": | |
if *limit == 0 { | |
os.Exit(0) | |
} | |
err = resolve(path) | |
} | |
return err | |
} | |
// regexp for match anchor | |
var ( | |
htmlAnchor = regexp.MustCompile(`<a [^>]*href=.*?>(.*?)</a>`) | |
mdAnchor = regexp.MustCompile(`\[([^\]]*)\]\(([^\)]*)\)`) | |
) | |
func resolve(path string) error { | |
data, err := os.ReadFile(path) | |
if err != nil { | |
fmt.Printf("[warn] cannot open file, err: %s\n", err.Error()) | |
} | |
match, index := findAllMacro(data, path) | |
if len(match) == 0 { | |
return nil | |
} | |
return writeFile(path, data, match, index) | |
} | |
// findAllMacro got all the anch with: | |
// match string content | |
// index [2]int{replace_start_index, replace_end_index} | |
func findAllMacro(data []byte, path string) (match [][2]string, index [][2]int) { | |
res := htmlAnchor.FindAllSubmatchIndex(data, -1) | |
if len(res) == 0 { | |
return | |
} | |
match = make([][2]string, 0, len(res)) | |
index = make([][2]int, 0, len(res)) | |
for _, subIndex := range res { | |
if subIndex[0] != 0 && data[subIndex[0]-1] == '!' { // ignore markdown image | |
continue | |
} | |
href, content, err := parseAnchor(string(data[subIndex[0]:subIndex[1]])) | |
if err != nil { | |
fmt.Printf("[warn] parse anchor failed %q, err: %s\n", path, err.Error()) | |
} | |
index = append(index, [...]int{subIndex[0], subIndex[1]}) | |
match = append(match, [2]string{href, content}) | |
} | |
return | |
} | |
func parseAnchor(data string) (href, content string, err error) { | |
index := strings.Index(data, "href") | |
if index == -1 { | |
return "", "", fmt.Errorf("cannot find href") | |
} | |
data = data[index+len(href):] | |
endIndex := strings.Index(data, "</a>") | |
if endIndex == -1 { | |
return "", "", fmt.Errorf("cannot find </a>") | |
} | |
token := rune(0) | |
start, end := 0, 0 | |
for i, b := range data { | |
switch b { | |
case '"', '\'': | |
if token == 0 { | |
token = b | |
if start == 0 { | |
start = i | |
} | |
} else if token == b { | |
if end == 0 { | |
end = i | |
} | |
token = 0 | |
} | |
case '>': | |
if end != 0 && token == 0 { | |
href = data[start+1 : end] | |
content = data[i+1 : endIndex] | |
return | |
} | |
} | |
} | |
return "", "", fmt.Errorf("cannot find href") | |
} | |
// writeFile rplace all anch marco with <a> tag | |
// the param replacer is with content | |
// the param index is with [2]int{replace_start_index, replace_end_index} | |
func writeFile(path string, data []byte, replacer [][2]string, index [][2]int) error { | |
var err error | |
var warn []string | |
buffer := &bytes.Buffer{} | |
start := 0 | |
var formatStr string | |
var w bool | |
formatStr = "<a href=\"%s\">%s</a>" | |
for i := range replacer { | |
replacer[i][0], w = fixUri(replacer[i][0]) | |
if w { | |
warn = append(warn, replacer[i][0]) | |
} | |
} | |
// for _, warnUri := range warn { | |
// fmt.Printf("[warn] uri may wrong in file:'%s' : %s\n", path, warnUri) | |
// } | |
for i, r := range index { | |
if r[0] == 0 && r[1] == 0 { | |
continue | |
} | |
_, err = buffer.Write(data[start:r[0]]) | |
if err != nil { | |
return err | |
} | |
fmt.Fprintf(buffer, formatStr, replacer[i][0], replacer[i][1]) | |
start = r[1] | |
} | |
_, err = buffer.Write(data[start:]) // write the reset of data | |
if err != nil { | |
return err | |
} | |
newData := buffer.Bytes() | |
if bytes.Equal(data, newData) { | |
return nil | |
} | |
file, err := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, 0644) | |
if err != nil { | |
return err | |
} | |
if *limit > 0 { | |
*limit-- | |
} | |
defer file.Close() | |
_, err = file.Write(newData) | |
return err | |
} | |
func init() { | |
http.DefaultClient.Timeout = time.Second * 5 | |
} | |
var domainCache = make(map[string]struct{}) | |
// edge case: | |
// - /en/CSS/Understanding_z-index/Stacking_without_z-index | |
func fixUri(uri string) (newUri string, warn bool) { | |
// if tmpUri, err := url.QueryUnescape(uri); err == nil { // uri decode | |
// uri = tmpUri | |
// } | |
if strings.HasPrefix(uri, "https://developer.mozilla.org/") { | |
uri = uri[len("https://developer.mozilla.org"):] // remove prefix | |
} else if strings.HasPrefix(uri, "http://developer.mozilla.org/") { | |
uri = uri[len("http://developer.mozilla.org"):] // remove prefix | |
} | |
// slug, hasLocal := removeLocale(uri) | |
// if hasLocal { | |
// split := strings.Split(slug, "#") | |
// if newSlug, ok := redirect[strings.ToLower(split[0])]; ok { | |
// split[0] = newSlug | |
// slug = strings.Join(split, "#") | |
// } | |
// if strings.HasPrefix(slug, "https://") { | |
// uri = slug | |
// } else { | |
// uri = fmt.Sprintf("/%s/%s", *locale, slug) | |
// } | |
// } else if strings.HasPrefix(uri, "/docs/") { | |
// uri = fmt.Sprintf("/%s%s", *locale, uri) | |
// } | |
// if *toHTTPS && strings.HasPrefix(uri, "http://") { | |
// domain := strings.Split(uri, "/")[2] | |
// if _, ok := domainCache[domain]; !ok { | |
// resp, err := http.Head(strings.Replace(uri, "http://", "https://", 1)) | |
// if err == nil { | |
// domainCache[domain] = struct{}{} | |
// resp.Body.Close() | |
// } | |
// } | |
// if _, ok := domainCache[domain]; ok { | |
// uri = strings.Replace(uri, "http://", "https://", 1) | |
// } | |
// } | |
// if strings.HasPrefix(uri, "//") { | |
// uri = strings.Replace(uri, "//", "https://", 1) | |
// } | |
return uri, !strings.HasPrefix(uri, "http") && | |
!strings.HasPrefix(uri, "#") && | |
!strings.HasPrefix(uri, "/"+*locale) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment