Created
April 22, 2024 19:24
-
-
Save yunginnanet/98cdc52223a40a99ebe766e532e9f601 to your computer and use it in GitHub Desktop.
used to extract corrupt/broken base64 encoded attachments from .eml message files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"bytes" | |
"encoding/base64" | |
"encoding/hex" | |
"io" | |
"os" | |
"path/filepath" | |
"strconv" | |
"strings" | |
"sync" | |
) | |
var ( | |
seen = map[string]struct{}{} | |
seenMu sync.RWMutex | |
concurrent = true | |
) | |
func main() { | |
_ = os.MkdirAll("output", 0755) | |
wg := &sync.WaitGroup{} | |
for _, arg := range os.Args[1:] { | |
if arg == "--slow" { | |
concurrent = false | |
continue | |
} | |
f := open(arg) | |
if concurrent { | |
wg.Add(1) | |
go extract(f, wg) | |
} else { | |
extract(f, wg) | |
} | |
} | |
if concurrent { | |
wg.Wait() | |
} | |
} | |
func extract(f io.ReadCloser, wg *sync.WaitGroup) { | |
defer func() { | |
_ = f.Close() | |
if concurrent { | |
wg.Done() | |
} | |
}() | |
xerox := bufio.NewScanner(f) | |
boundaries := map[string]struct{}{} | |
msgID := "" | |
contentType := "" | |
count := 0 | |
inFile := false | |
totalFound := 0 | |
fileBuf := bytes.Buffer{} | |
dumpFile := func() { | |
countStr := "" | |
if count > 1 { | |
countStr = "-" + strconv.Itoa(count) | |
} | |
fname := msgID + countStr + "." + contentType | |
fname = strings.TrimSuffix(fname, "\"") | |
dupStr := "" | |
dupInt := 0 | |
for { | |
if dupInt > 0 { | |
dupStr = strconv.Itoa(dupInt) | |
if !strings.Contains(fname, ".") { | |
fname += "-" + dupStr | |
} else { | |
spl := strings.Split(fname, ".") | |
fname = spl[0] + "-" + dupStr + spl[1] | |
} | |
} | |
seenMu.RLock() | |
if _, ok := seen[fname]; !ok { | |
seenMu.RUnlock() | |
seenMu.Lock() | |
seen[fname] = struct{}{} | |
seenMu.Unlock() | |
break | |
} | |
seenMu.RUnlock() | |
dupInt++ | |
} | |
datStr := string(fileBuf.Bytes()) | |
dat, err := base64.StdEncoding.DecodeString(datStr) | |
if err != nil { | |
println("\tbase64: " + err.Error()) | |
if corruption, ok := err.(base64.CorruptInputError); ok { | |
println("\tdumping remainder after corruption detected...") | |
_, _ = os.Stderr.WriteString(hex.Dump(fileBuf.Bytes()[int64(corruption):])) | |
} | |
} | |
totalFound += len(fileBuf.Bytes()) | |
if err := os.WriteFile(filepath.Join("output", fname), dat, 0666); err != nil { | |
panic(err.Error()) | |
} | |
println("\twrote " + fname) | |
fileBuf.Reset() | |
} | |
totalLen := 0 | |
for xerox.Scan() { | |
totalLen += len(xerox.Bytes()) | |
switch { | |
case inFile: | |
blankLine := strings.TrimSpace(xerox.Text()) == "" | |
_, boundaryFound := boundaries[strings.ReplaceAll(xerox.Text(), "-", "")] | |
if !boundaryFound { | |
for b := range boundaries { | |
if strings.Contains(xerox.Text(), b) { | |
boundaryFound = true | |
} | |
} | |
} | |
if blankLine || boundaryFound || strings.Contains(xerox.Text(), "--") { | |
inFile = false | |
count++ | |
dumpFile() | |
} else { | |
_, _ = fileBuf.WriteString(xerox.Text()) | |
} | |
case strings.Contains(xerox.Text(), "Content-Type"): | |
txt := strings.TrimPrefix(strings.TrimSpace(strings.ToLower(xerox.Text())), "content-type: ") | |
txt = strings.TrimSpace(strings.TrimSuffix(txt, ";")) | |
if !strings.Contains(txt, "text/plain") && strings.Contains(txt, "/") { | |
contentType = strings.Split(txt, "/")[1] | |
} | |
case strings.Contains(strings.ToLower(xerox.Text()), "message-id"): | |
txt := strings.TrimPrefix(strings.TrimSpace(xerox.Text()), "Message-Id: <") | |
txt = strings.TrimPrefix(txt, "Message-ID: <") | |
txt = strings.ReplaceAll(txt, "\"", "") | |
txt = strings.ReplaceAll(txt, "=", "") | |
txt = strings.ReplaceAll(txt, ";", "") | |
switch { | |
case strings.Contains(txt, "@"): | |
msgID = strings.Split(txt, "@")[0] | |
default: | |
msgID = strings.Split(txt, ">")[0] | |
} | |
case strings.Contains(strings.ToLower(xerox.Text()), "boundary="): | |
bnd := strings.ReplaceAll(strings.Split(xerox.Text(), "boundary=")[1], "\"", "") | |
bnd = strings.TrimSpace(bnd) | |
if len(bnd) > 0 { | |
boundaries[bnd] = struct{}{} | |
} | |
case strings.Contains(strings.ToLower(xerox.Text()), ": base64"): | |
inFile = true | |
for { | |
if !xerox.Scan() || xerox.Err() != nil { | |
if xerox.Err() == nil { | |
panic("eof while looking for newline after attachment start!") | |
} | |
println("\t" + xerox.Err().Error() + "while waiting for newline after attachment start") | |
break | |
} | |
if txt := strings.TrimSpace(xerox.Text()); txt != "" { | |
println("\twarn: missing newline after boundary?") | |
println("\tinstead of newline: " + txt) | |
// _, _ = fileBuf.WriteString(txt) | |
} else { | |
break | |
} | |
} | |
} | |
} | |
if inFile || fileBuf.Len() != 0 { | |
println("\terr: ohp! still in file but never got boundary or blank line") | |
count++ | |
dumpFile() | |
} | |
if totalLen-totalFound > 20000 { | |
println(msgID + " lots of leftover!!") | |
} | |
} | |
func open(fs string) io.ReadCloser { | |
println("open: " + fs) | |
f, err := os.Open(fs) | |
if err != nil { | |
println(err.Error()) | |
os.Exit(1) | |
} | |
return f | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment