|
import fs from "node:fs"; |
|
import path from "node:path"; |
|
import { getOutputFilePath, digestSha256, toDisplay } from "./util"; |
|
import { EmlFileMaxSize, FromPattern, getMboxFile, getOutputDir } from "./config"; |
|
import { existsFile, writeFile } from "./fs"; |
|
import { parseEmlData, toJsonText } from "./email"; |
|
|
|
const onDetectionEml = async (eml: Buffer, baseDirPath: string): Promise<void> => { |
|
const sha256 = digestSha256(eml); |
|
const checkPath = path.resolve(baseDirPath, "exists", `${sha256.slice(0, 2)}/${sha256.slice(2)}`); |
|
if (await existsFile(checkPath)) { |
|
return; |
|
} |
|
const mail = await parseEmlData(eml); |
|
await writeFile(`${path.resolve(baseDirPath, "mail", `${getOutputFilePath(mail)}`)}.eml`, eml); |
|
await writeFile(`${path.resolve(baseDirPath, "json", `${getOutputFilePath(mail)}`)}.json`, toJsonText(mail)); |
|
await writeFile(checkPath, ""); |
|
}; |
|
|
|
const main = async (): Promise<void> => { |
|
const MboxFilePath = getMboxFile(); |
|
const OutputDir = getOutputDir(); |
|
const stream = fs.createReadStream(MboxFilePath, { start: 0 }); |
|
|
|
let readBytes = 0; |
|
let emlCount = 0; |
|
let minEmlSize = Number.MAX_VALUE; |
|
let maxEmlSize = Number.MIN_VALUE; |
|
let buf: Buffer = Buffer.from([]); |
|
|
|
stream.on("data", (chunk: Buffer | string) => { |
|
readBytes += chunk.length; |
|
buf = Buffer.concat([buf, Buffer.from(chunk)]); |
|
if (buf.length > EmlFileMaxSize) throw new Error(`Eml size too large: >= ${buf.length} Bytes`); |
|
|
|
let fromPosition = -1; |
|
while (true) { |
|
fromPosition = buf.indexOf(FromPattern); |
|
if (fromPosition === -1) break; |
|
if (++emlCount % 1000 === 0) { |
|
const bytes = toDisplay(readBytes + fromPosition, 15); |
|
const files = toDisplay(emlCount, 8); |
|
const average = toDisplay(Math.round((readBytes + fromPosition) / emlCount), 11); |
|
console.log(`Read: ${files} files, ${bytes} bytes, ${average} bytes/file (average)`); |
|
} |
|
if (fromPosition < minEmlSize) minEmlSize = fromPosition; |
|
if (fromPosition > maxEmlSize) maxEmlSize = fromPosition; |
|
onDetectionEml(buf.slice(0, fromPosition), OutputDir); |
|
buf = buf.slice(fromPosition + 1); |
|
} |
|
}); |
|
|
|
stream.on("end", () => { |
|
onDetectionEml(buf, OutputDir); |
|
console.log(`END: Total ${toDisplay(++emlCount)} files, Total ${toDisplay(readBytes)} bytes, Min: ${toDisplay(minEmlSize)} bytes, Max: ${toDisplay(maxEmlSize)} bytes`); |
|
}); |
|
}; |
|
|
|
main(); |