Created
April 28, 2026 19:02
-
-
Save bozdoganCihangir/29142081dc8839915699f833a729e3d9 to your computer and use it in GitHub Desktop.
otscry.cpp — Heuristic Provenance & Ownership Inference for macOS Dotfiles
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // scan-home-dotfiles.cpp | |
| // Smart macOS dotfile auditor — no hardcoded tech list. | |
| // Infers ownership/category from filesystem state, Spotlight metadata, | |
| // extended attributes, package-manager presence, and content fingerprints. | |
| // | |
| // Build: | |
| // clang++ -std=c++17 -O2 -o scan-home-dotfiles /tmp/scan-home-dotfiles.sh | |
| // Run: | |
| // ./scan-home-dotfiles # default table | |
| // ./scan-home-dotfiles --stale-days 180 | |
| // ./scan-home-dotfiles --sort size|age|name | |
| // ./scan-home-dotfiles --format table|json|csv | |
| // ./scan-home-dotfiles --check-open # use lsof to detect live files (slow) | |
| // ./scan-home-dotfiles --no-size # skip recursive du | |
| // ./scan-home-dotfiles --max-depth N # content fingerprint depth (default 2) | |
| #include <algorithm> | |
| #include <array> | |
| #include <chrono> | |
| #include <cstdio> | |
| #include <cstdlib> | |
| #include <cstring> | |
| #include <ctime> | |
| #include <filesystem> | |
| #include <fstream> | |
| #include <iomanip> | |
| #include <iostream> | |
| #include <map> | |
| #include <optional> | |
| #include <sstream> | |
| #include <string> | |
| #include <sys/stat.h> | |
| #include <sys/xattr.h> | |
| #include <unistd.h> | |
| #include <unordered_map> | |
| #include <unordered_set> | |
| #include <vector> | |
| namespace fs = std::filesystem; | |
| using std::string; | |
| using std::vector; | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // CLI | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| struct Opts { | |
| int stale_days = 90; | |
| string sort_by = "name"; // name | size | age | |
| string format = "table"; // table | json | csv | |
| bool compute_size = true; | |
| bool check_open = false; | |
| int max_depth = 2; | |
| bool color = true; | |
| }; | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Shell helper — popen and capture stdout (trimmed) | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| static string sh(const string& cmd) { | |
| std::array<char, 4096> buf{}; | |
| string out; | |
| FILE* p = popen(cmd.c_str(), "r"); | |
| if (!p) return out; | |
| while (fgets(buf.data(), buf.size(), p)) out.append(buf.data()); | |
| pclose(p); | |
| while (!out.empty() && (out.back() == '\n' || out.back() == '\r')) out.pop_back(); | |
| return out; | |
| } | |
| static bool path_exists(const string& p) { | |
| struct stat st{}; | |
| return ::stat(p.c_str(), &st) == 0; | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Extended-attribute reader (macOS) | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| static string xattr_read(const fs::path& p, const char* name) { | |
| ssize_t sz = getxattr(p.c_str(), name, nullptr, 0, 0, XATTR_NOFOLLOW); | |
| if (sz <= 0) return {}; | |
| string buf(static_cast<size_t>(sz), '\0'); | |
| sz = getxattr(p.c_str(), name, buf.data(), buf.size(), 0, XATTR_NOFOLLOW); | |
| if (sz <= 0) return {}; | |
| buf.resize(static_cast<size_t>(sz)); | |
| return buf; | |
| } | |
| static bool has_quarantine(const fs::path& p) { | |
| return getxattr(p.c_str(), "com.apple.quarantine", nullptr, 0, 0, XATTR_NOFOLLOW) > 0; | |
| } | |
| // kMDItemWhereFroms is a binary plist; extract printable URLs heuristically | |
| static vector<string> where_froms(const fs::path& p) { | |
| string raw = xattr_read(p, "com.apple.metadata:kMDItemWhereFroms"); | |
| vector<string> urls; | |
| if (raw.empty()) return urls; | |
| string cur; | |
| for (char c : raw) { | |
| if (std::isprint(static_cast<unsigned char>(c)) && c != '\t') cur.push_back(c); | |
| else { | |
| if (cur.size() >= 8 && (cur.rfind("http", 0) == 0 || cur.rfind("file:", 0) == 0)) | |
| urls.push_back(cur); | |
| cur.clear(); | |
| } | |
| } | |
| if (cur.size() >= 8 && cur.rfind("http", 0) == 0) urls.push_back(cur); | |
| return urls; | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Spotlight metadata (mdls) | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| struct MdMeta { | |
| string content_type; // e.g. public.folder, com.apple.cfnetwork-cookies | |
| string kind; // human-readable kind | |
| string creator_code; // legacy 4-char creator | |
| string creator_app; // best-effort: derived from kMDItemContentCreationDate sibling? not reliable | |
| }; | |
| static MdMeta mdls_meta(const fs::path& p) { | |
| MdMeta m; | |
| string out = sh("mdls -name kMDItemContentType -name kMDItemKind -name kMDItemFSCreatorCode " | |
| "\"" + string(p) + "\" 2>/dev/null"); | |
| auto field = [&](const string& key) -> string { | |
| auto pos = out.find(key); | |
| if (pos == string::npos) return {}; | |
| auto eq = out.find('=', pos); | |
| if (eq == string::npos) return {}; | |
| auto nl = out.find('\n', eq); | |
| string v = out.substr(eq + 1, nl - eq - 1); | |
| // trim quotes & spaces | |
| while (!v.empty() && (v.front() == ' ' || v.front() == '"')) v.erase(v.begin()); | |
| while (!v.empty() && (v.back() == ' ' || v.back() == '"')) v.pop_back(); | |
| if (v == "(null)") v.clear(); | |
| return v; | |
| }; | |
| m.content_type = field("kMDItemContentType"); | |
| m.kind = field("kMDItemKind"); | |
| m.creator_code = field("kMDItemFSCreatorCode"); | |
| return m; | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // PATH / Applications / Homebrew probes — auto-discovery, no hardcoded list | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| static vector<string> split_path(const string& s, char sep = ':') { | |
| vector<string> out; | |
| string cur; | |
| for (char c : s) { | |
| if (c == sep) { if (!cur.empty()) out.push_back(cur); cur.clear(); } | |
| else cur.push_back(c); | |
| } | |
| if (!cur.empty()) out.push_back(cur); | |
| return out; | |
| } | |
| static std::optional<string> find_in_path(const string& name) { | |
| const char* env = std::getenv("PATH"); | |
| if (!env) return std::nullopt; | |
| for (const auto& dir : split_path(env)) { | |
| fs::path bin = fs::path(dir) / name; | |
| if (path_exists(bin) && access(bin.c_str(), X_OK) == 0) return bin.string(); | |
| } | |
| return std::nullopt; | |
| } | |
| static std::optional<string> find_app_bundle(const string& stem) { | |
| // case-insensitive scan of /Applications and ~/Applications | |
| string lower = stem; | |
| std::transform(lower.begin(), lower.end(), lower.begin(), | |
| [](unsigned char c) { return std::tolower(c); }); | |
| for (const char* root : { "/Applications", "/System/Applications" }) { | |
| if (!fs::exists(root)) continue; | |
| for (const auto& e : fs::directory_iterator(root, fs::directory_options::skip_permission_denied)) { | |
| string fn = e.path().filename().string(); | |
| if (fn.size() > 4 && fn.substr(fn.size() - 4) == ".app") { | |
| string base = fn.substr(0, fn.size() - 4); | |
| string base_l = base; | |
| std::transform(base_l.begin(), base_l.end(), base_l.begin(), | |
| [](unsigned char c) { return std::tolower(c); }); | |
| if (base_l == lower || base_l.find(lower) != string::npos) | |
| return e.path().string(); | |
| } | |
| } | |
| } | |
| return std::nullopt; | |
| } | |
| static std::optional<string> find_brew_pkg(const string& stem) { | |
| for (const char* root : { "/opt/homebrew/Cellar", "/usr/local/Cellar", | |
| "/opt/homebrew/Caskroom", "/usr/local/Caskroom" }) { | |
| if (!fs::exists(root)) continue; | |
| fs::path p = fs::path(root) / stem; | |
| if (fs::exists(p)) return p.string(); | |
| } | |
| return std::nullopt; | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Directory content fingerprint — what kind of dir is this? | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| struct DirSig { | |
| uint64_t total_bytes = 0; | |
| uint64_t file_count = 0; | |
| uint64_t dir_count = 0; | |
| uint64_t cache_hits = 0; // names containing "cache" | |
| uint64_t lock_hits = 0; // *.lock, *.lck | |
| uint64_t db_hits = 0; // *.sqlite, *.db, *.ldb | |
| uint64_t config_hits = 0; // config.{json,yaml,toml,ini} | |
| uint64_t log_hits = 0; // *.log | |
| string dominant_ext; // most common extension | |
| int max_depth = 0; | |
| }; | |
| static void walk_dir(const fs::path& root, DirSig& s, int depth, int limit, bool size_too) { | |
| std::error_code ec; | |
| if (depth > s.max_depth) s.max_depth = depth; | |
| std::unordered_map<string, uint64_t> ext_count; | |
| for (auto it = fs::recursive_directory_iterator( | |
| root, fs::directory_options::skip_permission_denied, ec); | |
| it != fs::recursive_directory_iterator(); it.increment(ec)) { | |
| if (ec) { ec.clear(); continue; } | |
| if (it.depth() >= limit) { it.disable_recursion_pending(); } | |
| const auto& p = it->path(); | |
| string fn = p.filename().string(); | |
| string fn_lower = fn; | |
| std::transform(fn_lower.begin(), fn_lower.end(), fn_lower.begin(), | |
| [](unsigned char c) { return std::tolower(c); }); | |
| if (it->is_directory(ec)) { | |
| s.dir_count++; | |
| if (fn_lower.find("cache") != string::npos) s.cache_hits++; | |
| continue; | |
| } | |
| s.file_count++; | |
| if (size_too) { | |
| std::error_code se; | |
| auto sz = it->file_size(se); | |
| if (!se) s.total_bytes += sz; | |
| } | |
| // Extension tally | |
| string ext = p.extension().string(); | |
| if (!ext.empty()) ext_count[ext]++; | |
| if (ext == ".lock" || ext == ".lck") s.lock_hits++; | |
| else if (ext == ".sqlite" || ext == ".db" || ext == ".ldb" || | |
| ext == ".sqlite3") s.db_hits++; | |
| else if (ext == ".log") s.log_hits++; | |
| if (fn_lower == "config.json" || fn_lower == "config.yaml" || | |
| fn_lower == "config.yml" || fn_lower == "config.toml" || | |
| fn_lower == "config.ini" || fn_lower == "settings.json") | |
| s.config_hits++; | |
| // Hard cap to keep huge dirs fast | |
| if (s.file_count > 50000) { it.disable_recursion_pending(); break; } | |
| } | |
| uint64_t best = 0; | |
| for (auto& [k, v] : ext_count) if (v > best) { best = v; s.dominant_ext = k; } | |
| (void)depth; | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Live-process check via lsof (optional, slow) | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| static bool is_open_now(const fs::path& p) { | |
| string out = sh("lsof +D \"" + string(p) + "\" 2>/dev/null | head -1"); | |
| return !out.empty(); | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Inference engine | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| struct Inference { | |
| string category; // system | shell | tool | app | brew | downloaded | |
| // | likely-cache | likely-config | likely-state | |
| // | orphan | unknown | |
| string owner_hint; | |
| double confidence = 0.0; // 0–1 | |
| vector<string> reasons; | |
| }; | |
| static const std::unordered_set<string> APPLE_EXACT = { | |
| ".CFUserTextEncoding", ".DS_Store", ".Trash", ".Spotlight-V100", ".fseventsd", | |
| ".DocumentRevisions-V100", ".TemporaryItems", ".PKInstallSandboxManager", | |
| ".PKInstallSandboxManager-SystemSoftware", ".com.apple.timemachine.donotpresent", | |
| ".apdisk", ".file", ".vol" | |
| }; | |
| // Pattern detectors only — no app-specific names | |
| static bool looks_shell_state(const string& name) { | |
| const std::array<string, 6> suffixes{ "_history", "_sessions", "_session", | |
| "rc", "profile", "env" }; | |
| string n = name; | |
| if (!n.empty() && n.front() == '.') n.erase(0, 1); | |
| for (const auto& s : suffixes) | |
| if (n.size() >= s.size() && | |
| n.compare(n.size() - s.size(), s.size(), s) == 0) return true; | |
| return false; | |
| } | |
| static Inference infer(const fs::path& entry, const Opts& opts) { | |
| Inference out; | |
| string fn = entry.filename().string(); | |
| string stem = (fn.size() > 1 && fn.front() == '.') ? fn.substr(1) : fn; | |
| // 1. Apple-shipped exact match | |
| if (APPLE_EXACT.count(fn)) { | |
| out.category = "system"; | |
| out.confidence = 1.0; | |
| out.reasons.push_back("matches Apple-shipped name"); | |
| return out; | |
| } | |
| // 2. Shell-state pattern | |
| if (looks_shell_state(fn)) { | |
| out.category = "shell"; | |
| out.confidence = 0.9; | |
| out.reasons.push_back("name matches shell-state pattern"); | |
| out.owner_hint = "shell runtime"; | |
| } | |
| // 3. Provenance — was this downloaded? | |
| auto urls = where_froms(entry); | |
| bool quarantined = has_quarantine(entry); | |
| if (!urls.empty() || quarantined) { | |
| if (out.category.empty()) out.category = "downloaded"; | |
| out.confidence = std::max(out.confidence, 0.7); | |
| if (!urls.empty()) out.reasons.push_back("WhereFroms: " + urls.front()); | |
| if (quarantined) out.reasons.push_back("quarantine xattr present"); | |
| } | |
| // 4. Tool/app discovery — strip leading dot, look in PATH / Applications / Homebrew | |
| if (auto bin = find_in_path(stem)) { | |
| if (out.category.empty() || out.category == "downloaded") out.category = "tool"; | |
| out.owner_hint = *bin; | |
| out.confidence = std::max(out.confidence, 0.85); | |
| out.reasons.push_back("binary in PATH: " + *bin); | |
| } else if (auto app = find_app_bundle(stem)) { | |
| if (out.category.empty() || out.category == "downloaded") out.category = "app"; | |
| out.owner_hint = *app; | |
| out.confidence = std::max(out.confidence, 0.8); | |
| out.reasons.push_back("app bundle: " + *app); | |
| } else if (auto pkg = find_brew_pkg(stem)) { | |
| if (out.category.empty() || out.category == "downloaded") out.category = "brew"; | |
| out.owner_hint = *pkg; | |
| out.confidence = std::max(out.confidence, 0.85); | |
| out.reasons.push_back("homebrew package: " + *pkg); | |
| } else if (out.category.empty() && stem.size() > 1) { | |
| // Name looks like a tool (lowercase letters) but no binary anywhere | |
| bool tool_like = std::all_of(stem.begin(), stem.end(), | |
| [](unsigned char c) { return std::isalnum(c) || c == '-' || c == '_' || c == '.'; }); | |
| if (tool_like) { | |
| out.category = "orphan"; | |
| out.confidence = 0.5; | |
| out.reasons.push_back("name resembles a tool but no binary/app/pkg found"); | |
| } | |
| } | |
| // 5. Spotlight content type (for macOS-managed types we missed) | |
| auto md = mdls_meta(entry); | |
| if (!md.content_type.empty() && md.content_type.rfind("com.apple.", 0) == 0 | |
| && out.category.empty()) { | |
| out.category = "system"; | |
| out.confidence = std::max(out.confidence, 0.75); | |
| out.reasons.push_back("Spotlight content-type: " + md.content_type); | |
| } | |
| // 6. Directory content fingerprint refines category | |
| if (fs::is_directory(entry)) { | |
| DirSig sig; | |
| walk_dir(entry, sig, 0, opts.max_depth, opts.compute_size); | |
| if (sig.cache_hits > 0 || (sig.dominant_ext == ".cache")) { | |
| if (out.category.empty() || out.category == "unknown") out.category = "likely-cache"; | |
| out.reasons.push_back("contains cache dirs/files"); | |
| out.confidence = std::max(out.confidence, 0.7); | |
| } | |
| if (sig.config_hits > 0) { | |
| if (out.category.empty() || out.category == "unknown") out.category = "likely-config"; | |
| out.reasons.push_back("contains config.{json,yaml,toml,...}"); | |
| out.confidence = std::max(out.confidence, 0.7); | |
| } | |
| if (sig.db_hits > 0) { | |
| if (out.category.empty() || out.category == "unknown") out.category = "likely-state"; | |
| out.reasons.push_back("contains sqlite/db files (" + | |
| std::to_string(sig.db_hits) + ")"); | |
| out.confidence = std::max(out.confidence, 0.7); | |
| } | |
| if (sig.lock_hits > 0) | |
| out.reasons.push_back("has lock files (" + std::to_string(sig.lock_hits) + ")"); | |
| if (sig.log_hits > 0) | |
| out.reasons.push_back("has log files (" + std::to_string(sig.log_hits) + ")"); | |
| if (!sig.dominant_ext.empty()) | |
| out.reasons.push_back("dominant ext: " + sig.dominant_ext); | |
| out.reasons.push_back(std::to_string(sig.file_count) + " files / " | |
| + std::to_string(sig.dir_count) + " dirs / depth " | |
| + std::to_string(sig.max_depth)); | |
| } | |
| // 7. Open-file probe (optional — slow) | |
| if (opts.check_open && is_open_now(entry)) { | |
| out.reasons.push_back("currently open by a live process"); | |
| out.confidence = std::max(out.confidence, 0.95); | |
| } | |
| if (out.category.empty()) { | |
| out.category = "unknown"; | |
| out.confidence = 0.2; | |
| out.reasons.push_back("no signals matched"); | |
| } | |
| return out; | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Entry record + utilities | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| struct Row { | |
| string name; | |
| string type; // dir | file | link | other | |
| uint64_t bytes = 0; | |
| int64_t age_days = 0; | |
| time_t atime = 0; | |
| time_t mtime = 0; | |
| bool stale = false; | |
| Inference inf; | |
| }; | |
| static string human_size(uint64_t b) { | |
| char buf[32]; | |
| if (b >= (1ULL << 30)) std::snprintf(buf, sizeof buf, "%.1fG", b / double(1ULL << 30)); | |
| else if (b >= (1ULL << 20)) std::snprintf(buf, sizeof buf, "%.1fM", b / double(1ULL << 20)); | |
| else if (b >= (1ULL << 10)) std::snprintf(buf, sizeof buf, "%.1fK", b / double(1ULL << 10)); | |
| else std::snprintf(buf, sizeof buf, "%lluB", (unsigned long long)b); | |
| return buf; | |
| } | |
| static string fmt_date(time_t t) { | |
| char buf[16]; | |
| std::tm tm{}; | |
| localtime_r(&t, &tm); | |
| std::strftime(buf, sizeof buf, "%Y-%m-%d", &tm); | |
| return buf; | |
| } | |
| static uint64_t entry_size(const fs::path& p, bool compute) { | |
| if (!compute) return 0; | |
| std::error_code ec; | |
| if (fs::is_regular_file(p, ec)) { | |
| auto s = fs::file_size(p, ec); | |
| return ec ? 0 : s; | |
| } | |
| uint64_t total = 0; | |
| for (auto it = fs::recursive_directory_iterator( | |
| p, fs::directory_options::skip_permission_denied, ec); | |
| it != fs::recursive_directory_iterator(); it.increment(ec)) { | |
| if (ec) { ec.clear(); continue; } | |
| std::error_code se; | |
| if (it->is_regular_file(se)) total += it->file_size(se); | |
| } | |
| return total; | |
| } | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| // Main | |
| // ───────────────────────────────────────────────────────────────────────────── | |
| int main(int argc, char** argv) { | |
| Opts o; | |
| for (int i = 1; i < argc; ++i) { | |
| string a = argv[i]; | |
| auto next = [&](const char* msg) -> string { | |
| if (i + 1 >= argc) { std::cerr << "missing value for " << msg << "\n"; std::exit(2); } | |
| return argv[++i]; | |
| }; | |
| if (a == "--stale-days") o.stale_days = std::stoi(next("--stale-days")); | |
| else if (a == "--sort") o.sort_by = next("--sort"); | |
| else if (a == "--format") o.format = next("--format"); | |
| else if (a == "--max-depth") o.max_depth = std::stoi(next("--max-depth")); | |
| else if (a == "--no-size") o.compute_size = false; | |
| else if (a == "--no-color") o.color = false; | |
| else if (a == "--check-open") o.check_open = true; | |
| else if (a == "-h" || a == "--help") { | |
| std::cout << | |
| "scan-home-dotfiles — smart macOS dotfile audit\n" | |
| " --stale-days N mark stale older than N days (default 90)\n" | |
| " --sort name|size|age\n" | |
| " --format table|json|csv\n" | |
| " --max-depth N content fingerprint depth (default 2)\n" | |
| " --no-size skip recursive sizing\n" | |
| " --no-color\n" | |
| " --check-open lsof probe per entry (slow but accurate)\n"; | |
| return 0; | |
| } | |
| else { std::cerr << "unknown arg: " << a << "\n"; return 2; } | |
| } | |
| if (!isatty(STDOUT_FILENO) || o.format != "table") o.color = false; | |
| const char* home = std::getenv("HOME"); | |
| if (!home) { std::cerr << "HOME not set\n"; return 1; } | |
| fs::path H(home); | |
| auto now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); | |
| vector<Row> rows; | |
| std::error_code ec; | |
| for (auto& e : fs::directory_iterator(H, fs::directory_options::skip_permission_denied, ec)) { | |
| string fn = e.path().filename().string(); | |
| if (fn.empty() || fn[0] != '.') continue; | |
| if (fn == "." || fn == "..") continue; | |
| Row r; | |
| r.name = fn; | |
| std::error_code te; | |
| if (fs::is_symlink(e.path(), te)) r.type = "link"; | |
| else if (fs::is_directory(e.path(), te)) r.type = "dir"; | |
| else if (fs::is_regular_file(e.path(), te)) r.type = "file"; | |
| else r.type = "other"; | |
| struct stat st{}; | |
| if (lstat(e.path().c_str(), &st) == 0) { | |
| r.atime = st.st_atime; | |
| r.mtime = st.st_mtime; | |
| } | |
| r.age_days = (now - r.atime) / 86400; | |
| r.stale = r.age_days >= o.stale_days; | |
| r.bytes = entry_size(e.path(), o.compute_size); | |
| r.inf = infer(e.path(), o); | |
| rows.push_back(std::move(r)); | |
| } | |
| // Sort | |
| if (o.sort_by == "size") | |
| std::sort(rows.begin(), rows.end(), [](auto& a, auto& b) { return a.bytes > b.bytes; }); | |
| else if (o.sort_by == "age") | |
| std::sort(rows.begin(), rows.end(), [](auto& a, auto& b) { return a.age_days > b.age_days; }); | |
| else | |
| std::sort(rows.begin(), rows.end(), | |
| [](auto& a, auto& b) { return a.name < b.name; }); | |
| // Color helpers | |
| auto C = [&](const char* code) -> string { return o.color ? code : ""; }; | |
| const string DIM = C("\033[2m"), RED = C("\033[31m"), YEL = C("\033[33m"), | |
| GRN = C("\033[32m"), CYN = C("\033[36m"), BLU = C("\033[34m"), | |
| BLD = C("\033[1m"), RST = C("\033[0m"); | |
| auto cat_color = [&](const string& c) { | |
| if (c == "system" || c == "shell") return DIM; | |
| if (c == "tool" || c == "app" || c == "brew") return GRN; | |
| if (c == "likely-cache" || c == "likely-state") return BLU; | |
| if (c == "likely-config" || c == "downloaded") return CYN; | |
| if (c == "orphan") return RED; | |
| return YEL; | |
| }; | |
| // Output | |
| if (o.format == "csv") { | |
| std::cout << "name,type,category,confidence,owner,size_bytes,size_human,age_days," | |
| "last_access,last_modified,stale,reasons\n"; | |
| for (auto& r : rows) { | |
| string reasons; | |
| for (size_t i = 0; i < r.inf.reasons.size(); ++i) { | |
| if (i) reasons += " | "; | |
| reasons += r.inf.reasons[i]; | |
| } | |
| std::cout << r.name << "," << r.type << "," << r.inf.category << "," | |
| << std::fixed << std::setprecision(2) << r.inf.confidence << "," | |
| << "\"" << r.inf.owner_hint << "\"," | |
| << r.bytes << "," << human_size(r.bytes) << "," << r.age_days << "," | |
| << fmt_date(r.atime) << "," << fmt_date(r.mtime) << "," | |
| << (r.stale ? "true" : "false") << ",\"" << reasons << "\"\n"; | |
| } | |
| return 0; | |
| } | |
| if (o.format == "json") { | |
| std::cout << "[\n"; | |
| for (size_t i = 0; i < rows.size(); ++i) { | |
| auto& r = rows[i]; | |
| std::cout << " {\"name\":\"" << r.name << "\"," | |
| << "\"type\":\"" << r.type << "\"," | |
| << "\"category\":\"" << r.inf.category << "\"," | |
| << "\"confidence\":" << std::fixed << std::setprecision(2) << r.inf.confidence << "," | |
| << "\"owner\":\"" << r.inf.owner_hint << "\"," | |
| << "\"size_bytes\":" << r.bytes << "," | |
| << "\"age_days\":" << r.age_days << "," | |
| << "\"stale\":" << (r.stale ? "true" : "false") << "," | |
| << "\"reasons\":["; | |
| for (size_t j = 0; j < r.inf.reasons.size(); ++j) { | |
| if (j) std::cout << ","; | |
| std::cout << "\"" << r.inf.reasons[j] << "\""; | |
| } | |
| std::cout << "]}" << (i + 1 < rows.size() ? "," : "") << "\n"; | |
| } | |
| std::cout << "]\n"; | |
| return 0; | |
| } | |
| // Table | |
| std::cout << BLD << "Smart dotfile audit — " << H.string() << RST << "\n" | |
| << DIM << "Stale: " << o.stale_days << "d | Sort: " << o.sort_by | |
| << " | Depth: " << o.max_depth | |
| << (o.check_open ? " | lsof: on" : "") << RST << "\n\n"; | |
| std::cout << BLD | |
| << std::left | |
| << std::setw(36) << "NAME" | |
| << std::setw(14) << "CATEGORY" | |
| << std::setw(6) << "CONF" | |
| << std::setw(6) << "TYPE" | |
| << std::setw(9) << "SIZE" | |
| << std::setw(7) << "AGE" | |
| << std::setw(12) << "LAST-ACC" | |
| << "OWNER / TOP REASON" << RST << "\n"; | |
| std::cout << string(120, '-') << "\n"; | |
| uint64_t total_bytes = 0; | |
| std::map<string, std::pair<uint64_t, uint64_t>> by_cat; // cat -> (count, bytes) | |
| for (auto& r : rows) { | |
| total_bytes += r.bytes; | |
| auto& [cnt, byt] = by_cat[r.inf.category]; | |
| cnt++; byt += r.bytes; | |
| char conf[8]; | |
| std::snprintf(conf, sizeof conf, "%.2f", r.inf.confidence); | |
| string flag; | |
| if (r.stale) flag += YEL + string("STALE ") + RST; | |
| if (r.inf.category == "orphan") flag += RED + string("ORPHAN ") + RST; | |
| if (r.inf.category == "unknown") flag += CYN + string("?") + RST; | |
| string owner = r.inf.owner_hint; | |
| if (owner.empty() && !r.inf.reasons.empty()) owner = r.inf.reasons.front(); | |
| if (owner.size() > 60) owner = owner.substr(0, 57) + "..."; | |
| std::cout << std::left << std::setw(36) << r.name | |
| << cat_color(r.inf.category) << std::setw(14) << r.inf.category << RST | |
| << std::setw(6) << conf | |
| << std::setw(6) << r.type | |
| << std::setw(9) << human_size(r.bytes) | |
| << std::setw(6) << (std::to_string(r.age_days) + "d") << " " | |
| << std::setw(12) << fmt_date(r.atime) | |
| << owner << " " << flag << "\n"; | |
| } | |
| std::cout << "\n" << BLD << "Summary" << RST << "\n" | |
| << " Entries: " << rows.size() | |
| << " Total size: " << human_size(total_bytes) << "\n"; | |
| for (auto& [c, p] : by_cat) | |
| std::cout << " " << cat_color(c) << std::left << std::setw(14) << c | |
| << RST << p.first << " entries " << human_size(p.second) << "\n"; | |
| std::cout << "\n" << BLD << "Cleanup candidates" << RST | |
| << DIM << " (stale + orphan/unknown, by size desc)" << RST << "\n"; | |
| auto cand = rows; | |
| std::sort(cand.begin(), cand.end(), | |
| [](auto& a, auto& b) { return a.bytes > b.bytes; }); | |
| for (auto& r : cand) { | |
| if (r.stale && (r.inf.category == "orphan" || r.inf.category == "unknown")) { | |
| std::cout << " " << std::left << std::setw(36) << r.name | |
| << std::setw(9) << human_size(r.bytes) | |
| << std::setw(7) << (std::to_string(r.age_days) + "d") | |
| << DIM << r.inf.category; | |
| if (!r.inf.reasons.empty()) std::cout << " — " << r.inf.reasons.front(); | |
| std::cout << RST << "\n"; | |
| } | |
| } | |
| std::cout << "\n" << DIM | |
| << "Categories: system / shell = OS-shipped; tool / app / brew = " | |
| << "auto-discovered owner; likely-cache / likely-config / likely-state = " | |
| << "inferred from contents; downloaded = has provenance metadata; " | |
| << "orphan = name looks like a tool but nothing found; unknown = no signals." | |
| << RST << "\n"; | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment