Last active
August 13, 2024 22:56
-
-
Save MaskRay/9574d2e4676a6eb5cbd76bf71fbe98c9 to your computer and use it in GitHub Desktop.
cminify: shorten function/variable/type names in a C source file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| cmake_minimum_required(VERSION 3.14) | |
| project(cminify LANGUAGES C CXX) | |
| add_executable(cminify "") | |
| set(DEFAULT_CMAKE_BUILD_TYPE Release) | |
| set_property(TARGET cminify PROPERTY CXX_STANDARD 17) | |
| set_property(TARGET cminify PROPERTY CXX_STANDARD_REQUIRED ON) | |
| set_property(TARGET cminify PROPERTY CXX_EXTENSIONS OFF) | |
| find_package(Clang REQUIRED) | |
| if(CLANG_LINK_CLANG_DYLIB) | |
| target_link_libraries(cminify PRIVATE clang-cpp) | |
| else() | |
| target_link_libraries(cminify PRIVATE | |
| clangIndex | |
| clangFormat | |
| clangTooling | |
| clangToolingInclusions | |
| clangToolingCore | |
| clangFrontend | |
| clangParse | |
| clangSerialization | |
| clangSema | |
| clangAST | |
| clangLex | |
| clangDriver | |
| clangBasic | |
| ) | |
| endif() | |
| if(LLVM_LINK_LLVM_DYLIB) | |
| target_link_libraries(cminify PRIVATE LLVM) | |
| else() | |
| target_link_libraries(cminify PRIVATE LLVMOption LLVMSupport) | |
| if(LLVM_VERSION_MAJOR GREATER_EQUAL 16) | |
| target_link_libraries(cminify PRIVATE LLVMTargetParser) | |
| endif() | |
| endif() | |
| if(NOT LLVM_ENABLE_RTTI) | |
| # releases.llvm.org libraries are compiled with -fno-rtti | |
| # The mismatch between lib{clang,LLVM}* and cminify can make libstdc++ std::make_shared return nullptr | |
| # _Sp_counted_ptr_inplace::_M_get_deleter | |
| if(MSVC) | |
| target_compile_options(cminify PRIVATE /GR-) | |
| else() | |
| target_compile_options(cminify PRIVATE -fno-rtti) | |
| endif() | |
| endif() | |
| target_sources(cminify PRIVATE main.cc) | |
| foreach(include_dir ${LLVM_INCLUDE_DIRS} ${CLANG_INCLUDE_DIRS}) | |
| get_filename_component(include_dir_realpath ${include_dir} REALPATH) | |
| # Don't add as SYSTEM if they are in CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES. | |
| # It would reorder the system search paths and cause issues with libstdc++'s | |
| # use of #include_next. See https://github.com/MaskRay/ccls/pull/417 | |
| if(NOT "${include_dir_realpath}" IN_LIST CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES) | |
| target_include_directories(cminify SYSTEM PRIVATE ${include_dir}) | |
| endif() | |
| endforeach() | |
| install(TARGETS cminify RUNTIME DESTINATION bin) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * MiniASTConsumer collects identifiers in `used` and rename candidates (in the main file) in `d2name`. | |
| * MiniASTConsumer iterates over `d2name` and assigns new names. | |
| * Renamer creates clang::tooling::Replacement instances. | |
| * HandleTranslationUnit calls clang::tooling::applyAllReplacements. | |
| */ | |
| #include <clang/AST/ASTConsumer.h> | |
| #include <clang/AST/Decl.h> | |
| #include <clang/AST/RecursiveASTVisitor.h> | |
| #include <clang/Basic/FileManager.h> | |
| #include <clang/Basic/LangOptions.h> | |
| #include <clang/Basic/SourceManager.h> | |
| #include <clang/Basic/TargetInfo.h> | |
| #include <clang/Driver/Action.h> | |
| #include <clang/Driver/Compilation.h> | |
| #include <clang/Driver/Driver.h> | |
| #include <clang/Driver/Tool.h> | |
| #include <clang/Format/Format.h> | |
| #include <clang/Frontend/CompilerInstance.h> | |
| #include <clang/Frontend/FrontendAction.h> | |
| #include <clang/Lex/Lexer.h> | |
| #include <clang/Lex/PreprocessorOptions.h> | |
| #include <clang/Tooling/Core/Replacement.h> | |
| #include <llvm/ADT/CachedHashString.h> | |
| #include <llvm/ADT/DenseSet.h> | |
| #include <llvm/ADT/MapVector.h> | |
| #include <llvm/ADT/STLExtras.h> | |
| #include <llvm/Support/Host.h> | |
| #include <llvm/Support/Path.h> | |
| #include <llvm/Support/raw_ostream.h> | |
| #include <memory> | |
| #include <vector> | |
| #include <assert.h> | |
| #include <err.h> | |
| #include <unistd.h> | |
| using namespace clang; | |
| using namespace llvm; | |
| namespace { | |
| std::unique_ptr<CompilerInvocation> buildCompilerInvocation(ArrayRef<const char *> args) { | |
| IntrusiveRefCntPtr<DiagnosticsEngine> diags( | |
| CompilerInstance::createDiagnostics(new DiagnosticOptions, new IgnoringDiagConsumer, true)); | |
| driver::Driver d(args[0], llvm::sys::getDefaultTargetTriple(), *diags, "cminify", llvm::vfs::getRealFileSystem()); | |
| d.setCheckInputsExist(false); | |
| std::unique_ptr<driver::Compilation> comp(d.BuildCompilation(args)); | |
| if (!comp) | |
| return nullptr; | |
| const driver::JobList &jobs = comp->getJobs(); | |
| if (jobs.size() != 1 || !isa<driver::Command>(*jobs.begin())) | |
| return nullptr; | |
| const driver::Command &cmd = cast<driver::Command>(*jobs.begin()); | |
| if (StringRef(cmd.getCreator().getName()) != "clang") | |
| return nullptr; | |
| const llvm::opt::ArgStringList &cc_args = cmd.getArguments(); | |
| auto ci = std::make_unique<CompilerInvocation>(); | |
| if (!CompilerInvocation::CreateFromArgs(*ci, cc_args, *diags)) | |
| return nullptr; | |
| ci->getDiagnosticOpts().IgnoreWarnings = true; | |
| ci->getFrontendOpts().DisableFree = false; | |
| return ci; | |
| } | |
| SmallVector<StringRef, 0> ignores; | |
| MapVector<Decl *, std::string> d2name; | |
| DenseSet<CachedHashStringRef> used; | |
| std::string newCode; | |
| struct Collector : RecursiveASTVisitor<Collector> { | |
| SourceManager &sm; | |
| Collector(ASTContext &ctx) : sm(ctx.getSourceManager()) {} | |
| bool VisitFunctionDecl(FunctionDecl *fd) { | |
| if (fd->isOverloadedOperator() || !fd->getIdentifier()) | |
| return true; | |
| used.insert(CachedHashStringRef(fd->getName())); | |
| if (!fd->isDefined()) | |
| return true; | |
| std::string name = fd->getNameAsString(); | |
| if (sm.isWrittenInMainFile(fd->getLocation())) { | |
| if (!is_contained(ignores, name)) | |
| d2name[fd->getCanonicalDecl()] = "_f"; | |
| for (ParmVarDecl *param : fd->parameters()) | |
| VisitVarDecl(param); | |
| } | |
| return true; | |
| } | |
| bool VisitVarDecl(VarDecl *vd) { | |
| if (!vd->getIdentifier()) | |
| return true; | |
| used.insert(CachedHashStringRef(vd->getName())); | |
| auto kind = vd->isThisDeclarationADefinition(); | |
| if (kind != VarDecl::Definition || !sm.isWrittenInMainFile(vd->getLocation())) | |
| return true; | |
| d2name[vd->getCanonicalDecl()] = "_v"; | |
| return true; | |
| } | |
| bool VisitTagDecl(TagDecl *td) { | |
| used.insert(CachedHashStringRef(td->getName())); | |
| if (!td->isThisDeclarationADefinition() || !sm.isWrittenInMainFile(td->getLocation())) | |
| return true; | |
| d2name[td->getCanonicalDecl()] = "_t"; | |
| return true; | |
| } | |
| bool VisitTypedefNameDecl(TypedefNameDecl *d) { | |
| if (d->isTransparentTag() || !sm.isWrittenInMainFile(d->getLocation())) | |
| return true; | |
| d2name[d->getCanonicalDecl()] = "_t"; | |
| return true; | |
| } | |
| }; | |
| struct Renamer : RecursiveASTVisitor<Renamer> { | |
| SourceManager &sm; | |
| tooling::Replacements &reps; | |
| Renamer(ASTContext &ctx, tooling::Replacements &reps) : sm(ctx.getSourceManager()), reps(reps) {} | |
| void replace(CharSourceRange csr, StringRef newText) { cantFail(reps.add(tooling::Replacement(sm, csr, newText))); } | |
| bool VisitFunctionDecl(FunctionDecl *fd) { | |
| auto *canon = fd->getCanonicalDecl(); | |
| auto it = d2name.find(canon); | |
| if (it != d2name.end()) | |
| replace(CharSourceRange::getTokenRange(fd->getLocation()), it->second); | |
| return true; | |
| } | |
| bool VisitVarDecl(VarDecl *vd) { | |
| auto *canon = vd->getCanonicalDecl(); | |
| auto it = d2name.find(canon); | |
| if (it != d2name.end()) | |
| replace(CharSourceRange::getTokenRange(vd->getLocation()), it->second); | |
| return true; | |
| } | |
| bool VisitDeclRefExpr(DeclRefExpr *dre) { | |
| Decl *d = dre->getDecl(); | |
| if (!(isa<FunctionDecl>(d) || isa<VarDecl>(d))) | |
| return true; | |
| auto it = d2name.find(d->getCanonicalDecl()); | |
| if (it != d2name.end()) | |
| replace(CharSourceRange::getTokenRange(SourceRange(dre->getBeginLoc(), dre->getEndLoc())), it->second); | |
| return true; | |
| } | |
| bool VisitTagDecl(TagDecl *d) { | |
| auto *canon = d->getCanonicalDecl(); | |
| if (d->getTypedefNameForAnonDecl()) | |
| return true; | |
| if (auto it = d2name.find(canon); it != d2name.end()) | |
| replace(CharSourceRange::getTokenRange(d->getLocation()), it->second); | |
| return true; | |
| } | |
| bool VisitTagTypeLoc(TagTypeLoc tl) { | |
| TagDecl *td = tl.getDecl()->getCanonicalDecl(); | |
| if (td->getTypedefNameForAnonDecl()) | |
| return true; | |
| if (auto it = d2name.find(td); it != d2name.end()) | |
| replace(CharSourceRange::getTokenRange(tl.getNameLoc()), it->second); | |
| return true; | |
| } | |
| bool VisitTypedefNameDecl(TypedefNameDecl *d) { | |
| if (auto it = d2name.find(d->getCanonicalDecl()); it != d2name.end()) | |
| replace(CharSourceRange::getTokenRange(d->getLocation()), it->second); | |
| return true; | |
| } | |
| bool VisitTypedefTypeLoc(TypedefTypeLoc tl) { | |
| TypedefNameDecl *td = tl.getTypedefNameDecl(); | |
| if (auto it = d2name.find(td); it != d2name.end()) | |
| replace(CharSourceRange::getTokenRange(tl.getNameLoc()), it->second); | |
| return true; | |
| } | |
| }; | |
| struct MiniASTConsumer : ASTConsumer { | |
| ASTContext *ctx; | |
| int n_fn = 0, n_var = 0, n_type = 0; | |
| void Initialize(ASTContext &ctx) override { this->ctx = &ctx; } | |
| static std::string getName(StringRef prefix, int &id) { | |
| static const char digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; | |
| std::string newName; | |
| for (;;) { | |
| newName = std::string(1, prefix[id % prefix.size()]); | |
| if (int i = id / prefix.size()) | |
| while (newName += digits[i % 62], i /= 62); | |
| id++; | |
| if (!used.contains(CachedHashStringRef(newName))) break; | |
| } | |
| return newName; | |
| } | |
| bool HandleTopLevelDecl(DeclGroupRef dgr) override { | |
| for (auto s : {"j0", "j1", "jn", "j0f", "j1f", "jnf", "j0l", "j1l", "jnl"}) | |
| used.insert(CachedHashStringRef(s)); | |
| for (auto s : {"y0", "y1", "yn", "y0f", "y1f", "ynf", "y0l", "y1l", "ynl"}) | |
| used.insert(CachedHashStringRef(s)); | |
| Collector c(*ctx); | |
| for (Decl *d : dgr) | |
| c.TraverseDecl(d); | |
| for (auto &[d, name] : d2name) { | |
| if (name == "_f") | |
| name = getName("abcdefghijklm", n_fn); | |
| else if (name == "_v") { | |
| int old_n_var = n_var; | |
| auto newName = getName("nopqrstuvwxyz", n_var); | |
| if (newName.size() < static_cast<VarDecl *>(d)->getName().size()) | |
| name = newName; | |
| else { | |
| name = static_cast<VarDecl *>(d)->getName(); | |
| n_var = old_n_var; | |
| } | |
| } else if (name == "_t") | |
| name = getName("ABCDEFGHIJKLMNOPQRSTUVWXYZ", n_type); | |
| } | |
| return true; | |
| } | |
| void HandleTranslationUnit(ASTContext &ctx) override { | |
| tooling::Replacements reps; | |
| Renamer c(ctx, reps); | |
| c.TraverseDecl(ctx.getTranslationUnitDecl()); | |
| auto &sm = ctx.getSourceManager(); | |
| StringRef code = sm.getBufferData(sm.getMainFileID()); | |
| auto res = tooling::applyAllReplacements(code, reps); | |
| if (!res) | |
| errx(2, "failed to apply replacements: %s", toString(res.takeError()).c_str()); | |
| newCode = *res; | |
| } | |
| }; | |
| struct MiniAction : ASTFrontendAction { | |
| std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &ci, | |
| StringRef inFile) override { | |
| return std::make_unique<MiniASTConsumer>(); | |
| } | |
| }; | |
| void reformat() { | |
| auto buf = MemoryBuffer::getMemBuffer(newCode, "", true); | |
| format::FormatStyle style = cantFail(format::getStyle("LLVM", "-", "LLVM", newCode, nullptr)); | |
| style.ColumnLimit = 9999; | |
| style.IndentWidth = 0; | |
| style.ContinuationIndentWidth = 0; | |
| style.SpaceBeforeAssignmentOperators = false; | |
| style.SpaceBeforeParens = format::FormatStyle::SBPO_Never; | |
| style.AlignEscapedNewlines = format::FormatStyle::ENAS_DontAlign; | |
| format::FormattingAttemptStatus status; | |
| std::vector<tooling::Range> ranges{{0, unsigned(newCode.size())}}; | |
| tooling::Replacements reps = format::reformat(style, newCode, ranges, "-", &status); | |
| auto res = tooling::applyAllReplacements(newCode, reps); | |
| if (!res) | |
| errx(2, "failed to apply replacements: %s", toString(res.takeError()).c_str()); | |
| newCode = *res; | |
| } | |
| } | |
| int main(int argc, char *argv[]) { | |
| std::vector<const char *> args{argv[0], "-fsyntax-only"}; | |
| bool inplace = false; | |
| const char *outfile = "/dev/stdout"; | |
| const char usage[] = R"(Usage: %s [-i] [-f fun]... a.c | |
| Options: | |
| -i edit a.c in place\n)"; | |
| for (int i = 1; i < argc; i++) { | |
| StringRef opt(argv[i]); | |
| if (opt[0] != '-') | |
| args.push_back(argv[i]); | |
| else if (opt == "-h") { | |
| fputs(usage, stdout); | |
| return 0; | |
| } else if (opt == "-i") | |
| inplace = true; | |
| else if (opt == "-f" && i + 1 < argc) | |
| ignores.push_back(argv[++i]); | |
| else if (opt == "-o" && i + 1 < argc) | |
| outfile = argv[++i]; | |
| else { | |
| fputs(usage, stderr); | |
| return 1; | |
| } | |
| } | |
| ignores.push_back("main"); | |
| auto ci = buildCompilerInvocation(args); | |
| if (!ci) | |
| errx(1, "failed to build CompilerInvocation"); | |
| auto inst = std::make_unique<CompilerInstance>(std::make_shared<PCHContainerOperations>()); | |
| IgnoringDiagConsumer dc; | |
| inst->setInvocation(std::move(ci)); | |
| inst->createDiagnostics(&dc, false); | |
| inst->getDiagnostics().setIgnoreAllWarnings(true); | |
| inst->setTarget(TargetInfo::CreateTargetInfo(inst->getDiagnostics(), inst->getInvocation().TargetOpts)); | |
| if (!inst->hasTarget()) | |
| errx(1, "hasTarget returns false"); | |
| inst->createFileManager(llvm::vfs::getRealFileSystem()); | |
| inst->setSourceManager(new SourceManager(inst->getDiagnostics(), inst->getFileManager(), true)); | |
| MiniAction action; | |
| if (!action.BeginSourceFile(*inst, inst->getFrontendOpts().Inputs[0])) | |
| errx(2, "failed to parse"); | |
| if (Error e = action.Execute()) | |
| errx(2, "failed to execute"); | |
| action.EndSourceFile(); | |
| reformat(); | |
| std::error_code ec; | |
| raw_fd_ostream(inplace ? inst->getFrontendOpts().Inputs[0].getFile() : outfile, ec, sys::fs::OF_None) << newCode; | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Q: Why is
memcpy(d, s, (size_t)n);not traversed?