Skip to content

Instantly share code, notes, and snippets.

@isocroft
Forked from nl5887/decode.go
Created March 9, 2026 01:21
Show Gist options
  • Select an option

  • Save isocroft/0ed7efcc6d3e8cfdea60b14cbded644d to your computer and use it in GitHub Desktop.

Select an option

Save isocroft/0ed7efcc6d3e8cfdea60b14cbded644d to your computer and use it in GitHub Desktop.
Golang implementation for RFC 1342: Non-ASCII Mail Headers
package main
import (
"encoding/base64"
"fmt"
"io"
"io/ioutil"
"mime/quotedprintable"
"regexp"
"strings"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/unicode"
)
func decoder(encoding string) (*encoding.Decoder, error) {
if strings.ToUpper(encoding) == "UTF-8" {
return unicode.UTF8.NewDecoder(), nil
} else if strings.ToUpper(encoding) == "ISO-8859-1" {
return charmap.ISO8859_1.NewDecoder(), nil
} else {
return nil, fmt.Errorf("Unknown encoding")
}
}
func decodeHeader(str string) (string, error) {
re := regexp.MustCompile(`\=\?(?P<charset>.*?)\?(?P<encoding>.*)\?(?P<body>.*?)\?(.*?)\=`)
matches := re.FindAllStringSubmatch(str, -1)
if len(matches) == 0 {
return str, nil
}
for _, match := range matches {
var r io.Reader = strings.NewReader(match[3])
if match[2] == "Q" {
r = quotedprintable.NewReader(r)
} else if match[2] == "B" {
r = base64.NewDecoder(base64.StdEncoding, r)
}
if d, err := decoder(match[1]); err == nil {
r = d.Reader(r)
}
if val, err := ioutil.ReadAll(r); err == nil {
str = strings.Replace(str, match[0], string(val), -1)
} else if err != nil {
fmt.Println(err.Error())
continue
}
}
return str, nil
}
func main() {
fmt.Println(decodeHeader("=?UTF-8?Q?=F3=BE=AC=8D_?= ... Laten we ontmoeten! Ik woon in de buurt .."))
fmt.Println(decodeHeader("=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>"))
}
@isocroft

isocroft commented May 24, 2026

Copy link
Copy Markdown
Author
package gitsifter

/*
	LIMITATIONS / CONCERNS:

	1. Aliasing:
		Handled using alias extraction maps.

	2. Dynamic Calls:
		Static analysis cannot fully resolve:

			PHP:
				$func()

			JS:
				eval()

			Python:
				getattr()

		These require runtime instrumentation.

	3. Performance:
		Files are parsed concurrently per commit
		using goroutines and worker pools.
*/

import (
	"bytes"
	"crypto/sha1"
	"encoding/hex"
	"fmt"
	"path/filepath"
	"regexp"
	"runtime"
	"strings"
	"sync"

	"github.com/go-git/go-billy/v5"
	"github.com/go-git/go-git/v5"
	"github.com/go-git/go-git/v5/plumbing/object"

	sitter "github.com/smacker/go-tree-sitter"
	"github.com/smacker/go-tree-sitter/javascript"
	"github.com/smacker/go-tree-sitter/php"
	"github.com/smacker/go-tree-sitter/python"
	"github.com/smacker/go-tree-sitter/golang"
)

/*
	To determine if an interface changed,
	you cannot just check whether the function
	was edited.

	You must:

	1. Extract parameter list using Tree-sitter
	2. Normalize whitespace/comments
	3. Hash parameter/type/order definition
	4. Compare against previous commit
*/

type InterfaceChange struct {
	FunctionName string
	CommitHash   string
	ChangeCount  int
}

/*
	FunctionSignature represents a normalized
	function interface.
*/
type FunctionSignature struct {
	Name      string
	Signature string
}

/*
	TrackChanges scans git history and tracks
	interface changes across commits.
*/
func TrackChanges(
	repoPath string,
) (map[string]int, error) {

	repo, err := git.PlainOpen(repoPath)
	if err != nil {
		return nil, err
	}

	iter, err := repo.Log(&git.LogOptions{})
	if err != nil {
		return nil, err
	}

       /* @INFO:
		Map:
			functionID -> total count for signature hash mismatch across commits
	*/
	stats := make(map[string]int)

	/* @INFO:
		Map:
			functionID -> previous signature hash
	*/
	previousSignatures := make(map[string]string)

	/* @HINT:
		Protect shared maps during concurrent parsing
	*/
	var mu sync.Mutex

	err = iter.ForEach(func(
		c *object.Commit,
	) error {

		/*
			@HINT:
			Get file tree at this commit
		*/
		tree, err := c.Tree()
		if err != nil {
			return err
		}

		/* @HINT:
			Collect candidate source files
		*/
		sourceFiles := make([]*object.File, 0)

		err = tree.Files().ForEach(func(
			f *object.File,
		) error {

			ext := filepath.Ext(f.Name)

			switch ext {
			case ".js", ".py", ".php", ".go":
				sourceFiles = append(
					sourceFiles,
					f,
				)
			}

			return nil
		})

		if err != nil {
			return err
		}

		/*
			@HINT:
			
			Parse files concurrently for performance.

			Big O(C x F) runtime complexity mitigation:
				- Parallelize within commit
		*/

		workerCount := runtime.NumCPU()

		fileChan := make(chan *object.File)
		errorChan := make(chan error, 1)

		var wg sync.WaitGroup

		for i := 0; i < workerCount; i++ {

			wg.Add(1)

			go func() {
				defer wg.Done()

				for file := range fileChan {

					functions, err := parseFunctionsFromFile(
						file,
					)

					if err != nil {
						errorChan <- err
						return
					}

					mu.Lock()

					/*
						@HINT:
						Compare function signatures
						against previous commit
					*/
					for _, fn := range functions {

						oldSignature,
							exists := previousSignatures[fn.Name]

						/*
							@HINT:
							
							If changed:
								increment count for signature hash mismatch across commits
						*/
						if exists &&
							oldSignature != fn.Signature {

							stats[fn.Name]++
						}

						previousSignatures[fn.Name] =
							fn.Signature
					}

					mu.Unlock()
				}
			}()
		}

		go func() {
			defer close(fileChan)

			for _, file := range sourceFiles {
				fileChan <- file
			}
		}()

		wg.Wait()

		select {
		case err := <-errorChan:
			return err
		default:
		}

		return nil
	})

	return stats, err
}

/*
	parseFunctionsFromFile parses all functions
	and extracts normalized interface hashes.
*/
func parseFunctionsFromFile(
	file *object.File,
) ([]FunctionSignature, error) {

	  reader, err := file.Reader()
         if err != nil {
	       return nil, err
         }

         defer reader.Close()

          /* @HINT:
	       Read file incrementally in chunks
	       instead of loading entire file at once.
         */
         const chunkSize = 32 * 1024   // @INFO: 32KB

         buffer := make([]byte, chunkSize)

        var sourceCode bytes.Buffer

        for {

	      bytesRead, readErr := reader.Read(buffer)

	      if bytesRead > 0 {

		     _, writeErr := sourceCode.Write(
			   buffer[:bytesRead],
		     )

		     if writeErr != nil {
			  return nil, writeErr
		     }
	      }

	      if readErr != nil {

		    /* @HINT:
			  EOF means we're done reading.
		    */
		    if readErr.Error() == "EOF" {
			  break
		    }

		    return nil, readErr
	     }
        }

	extension := filepath.Ext(file.Name)

	parser := sitter.NewParser()

	language, err := getLanguage(extension)
	if err != nil {
		return nil, err
	}

	parser.SetLanguage(language)

	tree, err := parser.ParseCtx(
		nil,
		nil,
		sourceCode.Bytes(),
	)

	if err != nil {
		return nil, err
	}

	root := tree.RootNode()

	functionNodes := make([]*sitter.Node, 0)

	findFunctionNodes(
		root,
		&functionNodes,
	)

	results := make([]FunctionSignature, 0)

	/*
		Alias resolution map:

		Python:
			import long_name as ln

		JS:
			import * as api from "./x";

		PHP:
			use Vendor\Service as Svc;

              Go:
                     import snp "https://github.com/corvis/snipper"
	*/
	aliases := extractAliases(
		sourceCode.Bytes(),
		extension,
	)

	for _, fnNode := range functionNodes {

		functionName := extractFunctionName(
			fnNode,
			sourceCode.Bytes(),
		)

		parameters := extractParameters(
			fnNode,
			sourceCode.Bytes(),
		)

               resultType := extractResultType(
                      fnNode,
			sourceCode.Bytes(),
               )

		/* @HINT:
			Normalize interface:
				remove comments/whitespace
		*/
		normalizedParameters :=
			normalizeInterface(parameters)

               /*   @HINT:
			Resolve aliases
		*/
		functionName =
			resolveAliasedName(
				functionName,
				aliases,
			)

		/*  @HINT:
			Hash normalized params, fuction name and result type
		*/
		signatureHash :=
			hashSignature(functionName + "|" + normalizedParameters + "|" + resultType)

		results = append(
			results,
			FunctionSignature{
				Name:      functionName,
				Signature: signatureHash,
			},
		)
	}

	return results, nil
}

/*
	getLanguage resolves Tree-sitter language.
*/
func getLanguage(
	extension string,
) (*sitter.Language, error) {

	switch extension {

	case ".js":
		return javascript.GetLanguage(), nil

	case ".py":
		return python.GetLanguage(), nil

	case ".php":
		return php.GetLanguage(), nil
		
       case ".go:
              return go.GetLanguage(), nil
              
	default:
		return nil,
			fmt.Errorf(
				"unsupported extension: %s",
				extension,
			)
	}
}

/*
	findFunctionNodes recursively finds
	function definitions.
*/
func findFunctionNodes(
	node *sitter.Node,
	results *[]*sitter.Node,
) {

	if node == nil {
		return
	}

	switch node.Type() {

	case "function_definition",
		"function_declaration",
		"method_definition":

		*results = append(
			*results,
			node,
		)
	}

	for i := 0; i < int(node.ChildCount()); i++ {

		findFunctionNodes(
			node.Child(i),
			results,
		)
	}
}

/*
	extractFunctionName extracts function identifier.
*/
func extractFunctionName(
	node *sitter.Node,
	source []byte,
) string {

	for i := 0; i < int(node.NamedChildCount()); i++ {

		child := node.NamedChild(i)

		if child == nil {
			continue
		}

		switch child.Type() {

		case "identifier",
			"name":

			return string(
				source[
					child.StartByte():
						child.EndByte()
				],
			)
		}
	}

	return "anonymous"
}

/*
	extractParameters extracts raw parameter text.
*/
func extractParameters(
	node *sitter.Node,
	source []byte,
) string {

	for i := 0; i < int(node.NamedChildCount()); i++ {

		child := node.NamedChild(i)

		if child == nil {
			continue
		}

		switch child.Type() {

		case "formal_parameters",
			"parameters":

			return string(
				source[
					child.StartByte():
						child.EndByte()
				],
			)
		}
	}

	return ""
}

/*
	normalizeInterface removes comments
	and unnecessary whitespace.
*/
func normalizeInterface(
	parameters string,
) string {

	/*
		Remove block comments
	*/
	blockComments :=
		regexp.MustCompile(`/\*[\s\S]*?\*/`)

	parameters =
		blockComments.ReplaceAllString(
			parameters,
			"",
		)

	/*
		Remove line comments
	*/
	lineComments :=
		regexp.MustCompile(`//.*|#.*`)

	parameters =
		lineComments.ReplaceAllString(
			parameters,
			"",
		)

	/*
		Collapse whitespace
	*/
	whitespace :=
		regexp.MustCompile(`\s+`)

	parameters =
		whitespace.ReplaceAllString(
			parameters,
			"",
		)

	return strings.TrimSpace(parameters)
}

/*
	hashSignature hashes normalized interface.
*/
func hashSignature(
	normalized string,
) string {

	hash := sha1.Sum(
		[]byte(normalized),
	)

	return hex.EncodeToString(hash[:])
}

/*
	extractAliases extracts import aliases.

	Examples:

	Python:
		import numpy as np

	JavaScript:
		import * as api from "./api"

	PHP:
		use Vendor\Service as Svc;

	Go:
		import svc "project/service"

	Go grouped:
		import (
			db "project/database"
			httpx "project/http"
		)
*/
func extractAliases(
	source []byte,
	extension string,
) map[string]string {

	results := make(map[string]string)

	content := string(source)

	switch extension {

	/*
		Python aliases
	*/
	case ".py":

		/*
			import numpy as np
		*/
		importAliasRegex := regexp.MustCompile(
			`import\s+([a-zA-Z0-9_.]+)\s+as\s+([a-zA-Z0-9_]+)`,
		)

		importMatches :=
			importAliasRegex.FindAllStringSubmatch(
				content,
				-1,
			)

		for _, match := range importMatches {

			if len(match) < 3 {
				continue
			}

			original := strings.TrimSpace(match[1])
			alias := strings.TrimSpace(match[2])

			results[alias] = original
		}

		/*
			from package import module as alias
		*/
		fromImportRegex := regexp.MustCompile(
			`from\s+([a-zA-Z0-9_.]+)\s+import\s+([a-zA-Z0-9_]+)\s+as\s+([a-zA-Z0-9_]+)`,
		)

		fromMatches :=
			fromImportRegex.FindAllStringSubmatch(
				content,
				-1,
			)

		for _, match := range fromMatches {

			if len(match) < 4 {
				continue
			}

			fullImport := fmt.Sprintf(
				"%s.%s",
				match[1],
				match[2],
			)

			alias := strings.TrimSpace(match[3])

			results[alias] = fullImport
		}

	/*
		JavaScript aliases
	*/
	case ".js":

		/*
			import * as api from "./api"
		*/
		namespaceImportRegex := regexp.MustCompile(
			`import\s+\*\s+as\s+([a-zA-Z0-9_]+)\s+from\s+['"]([^'"]+)['"]`,
		)

		namespaceMatches :=
			namespaceImportRegex.FindAllStringSubmatch(
				content,
				-1,
			)

		for _, match := range namespaceMatches {

			if len(match) < 3 {
				continue
			}

			alias := strings.TrimSpace(match[1])
			original := strings.TrimSpace(match[2])

			results[alias] = original
		}

		/*
			import { something as alias } from ...
		*/
		namedImportRegex := regexp.MustCompile(
			`import\s+\{([^}]+)\}\s+from`,
		)

		namedMatches :=
			namedImportRegex.FindAllStringSubmatch(
				content,
				-1,
			)

		for _, match := range namedMatches {

			if len(match) < 2 {
				continue
			}

			imports :=
				strings.Split(match[1], ",")

			for _, item := range imports {

				item = strings.TrimSpace(item)

				if strings.Contains(item, " as ") {

					parts :=
						strings.Split(item, " as ")

					if len(parts) != 2 {
						continue
					}

					original :=
						strings.TrimSpace(parts[0])

					alias :=
						strings.TrimSpace(parts[1])

					results[alias] = original
				}
			}
		}

	/*
		PHP aliases
	*/
	case ".php":

		/*
			use Vendor\Service as Svc;
		*/
		phpUseRegex := regexp.MustCompile(
			`use\s+([a-zA-Z0-9_\\]+)\s+as\s+([a-zA-Z0-9_]+)`,
		)

		phpMatches :=
			phpUseRegex.FindAllStringSubmatch(
				content,
				-1,
			)

		for _, match := range phpMatches {

			if len(match) < 3 {
				continue
			}

			original := strings.TrimSpace(match[1])
			alias := strings.TrimSpace(match[2])

			results[alias] = original
		}

	/*
		Go aliases
	*/
	case ".go":

		/*
			Single-line import aliases:

				import svc "project/service"
		*/
		goSingleImportRegex := regexp.MustCompile(
			`import\s+([a-zA-Z0-9_]+)\s+"([^"]+)"`,
		)

		singleMatches :=
			goSingleImportRegex.FindAllStringSubmatch(
				content,
				-1,
			)

		for _, match := range singleMatches {

			if len(match) < 3 {
				continue
			}

			alias := strings.TrimSpace(match[1])
			original := strings.TrimSpace(match[2])

			results[alias] = original
		}

		/*
			Grouped imports:

				import (
					db "project/database"
					httpx "project/http"
				)
		*/
		groupImportRegex := regexp.MustCompile(
			`import\s*\(([\s\S]*?)\)`,
		)

		groupMatches :=
			groupImportRegex.FindAllStringSubmatch(
				content,
				-1,
			)

		for _, group := range groupMatches {

			if len(group) < 2 {
				continue
			}

			lines :=
				strings.Split(group[1], "\n")

			for _, line := range lines {

				line = strings.TrimSpace(line)

				if line == "" {
					continue
				}

				/*
					db "project/database"
				*/
				parts :=
					regexp.MustCompile(
						`^([a-zA-Z0-9_\.]+)\s+"([^"]+)"`,
					).FindStringSubmatch(line)

				if len(parts) < 3 {
					continue
				}

				alias := strings.TrimSpace(parts[1])
				original := strings.TrimSpace(parts[2])

				results[alias] = original
			}
		}
	}

	return results
}


/*
	extractResultType extracts the return/result type
	from a function definition node for multiple languages.

	Supported:
		- Go
		- JavaScript / TypeScript
		- Python
		- PHP

	Returns:
		- explicit type if available
		- "void" if no return type exists
		- "unknown" if dynamically inferred
*/
func extractResultType(
	node *sitter.Node,
	source []byte,
) string {

	if node == nil {
		return "unknown"
	}

	/*
		Language-specific Tree-sitter nodes:

		Go:
			result
			parameter_list

		TypeScript:
			type_annotation

		Python:
			type

		PHP:
			primitive_type
			union_type
			named_type
	*/

	for i := 0; i < int(node.NamedChildCount()); i++ {

		child := node.NamedChild(i)

		if child == nil {
			continue
		}

		switch child.Type() {

		/*
			========================
			Go
			========================

			func Add(a int, b int) int
			func GetUser() (*User, error)

			Tree-sitter:
				result
		*/
		case "result":

			resultText := strings.TrimSpace(
				string(
					source[
						child.StartByte():
							child.EndByte()
					],
				),
			)

			if resultText == "" {
				return "void"
			}

			return resultText

		/*
			========================
			TypeScript / JavaScript
			========================

			function x(): string

			Tree-sitter:
				type_annotation
		*/
		case "type_annotation":

			typeText := strings.TrimSpace(
				string(
					source[
						child.StartByte():
							child.EndByte()
					],
				),
			)

			typeText =
				strings.TrimPrefix(
					typeText,
					":",
				)

			typeText =
				strings.TrimSpace(typeText)

			if typeText == "" {
				return "unknown"
			}

			return typeText

		/*
			========================
			Python
			========================

			def add(a: int) -> str:

			Tree-sitter:
				type
		*/
		case "type":

			typeText := strings.TrimSpace(
				string(
					source[
						child.StartByte():
							child.EndByte()
					],
				),
			)

			if typeText == "" {
				return "unknown"
			}

			return typeText

		/*
			========================
			PHP
			========================

			function x(): string
			function x(): User|nil

			Tree-sitter:
				primitive_type
				union_type
				named_type
		*/
		case "primitive_type",
			"named_type",
			"union_type":

			typeText := strings.TrimSpace(
				string(
					source[
						child.StartByte():
							child.EndByte()
					],
				),
			)

			if typeText == "" {
				return "unknown"
			}

			return typeText
		}
	}

	/*
		Fallback heuristics for dynamic languages
		where explicit types may not exist.
	*/

	functionBody := strings.TrimSpace(
		string(
			source[
				node.StartByte():
					node.EndByte()
			],
		),
	)

	/*
		Python / JS / PHP heuristic:
			check if function has return statement
	*/
	if strings.Contains(
		functionBody,
		"return ",
	) {

		/*
			Return exists but type is dynamic
		*/
		return "dynamic"
	}

	return "void"
}


/*
	resolveAliasedName resolves aliases back
	to original source names.
*/
func resolveAliasedName(
	functionName string,
	aliases map[string]string,
) string {

	for alias, original := range aliases {

		if strings.HasPrefix(
			functionName,
			alias+".",
		) {

			return strings.Replace(
				functionName,
				alias,
				original,
				1,
			)
		}
	}

	return functionName
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment