Clang LLVM为JIT编译功能提供了不良功能指针

clang+llvm provides a bad function pointers for JIT compiled functions

本文关键字:功能 不良 指针 Clang JIT 编译 LLVM      更新时间:2023-10-16

我正在尝试使用clang llvm 3.6来编译几个C函数(每个C函数最终都可以很大)。

不幸的是,i LLVM提供的功能指针使程序segfault。

到目前为止,我有以下代码:

#include <iostream>
#include <clang/CodeGen/CodeGenAction.h>
#include <clang/Basic/DiagnosticOptions.h>
#include <clang/Basic/TargetInfo.h>
#include <clang/Basic/SourceManager.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Frontend/CompilerInvocation.h>
#include <clang/Frontend/FrontendDiagnostic.h>
#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <clang/Frontend/Utils.h>
#include <clang/Parse/ParseAST.h>
#include <clang/Lex/Preprocessor.h>
#include <llvm/Analysis/Passes.h>
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
#include <llvm/ExecutionEngine/MCJIT.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/IR/Verifier.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/Bitcode/ReaderWriter.h>
#include <llvm/Support/ManagedStatic.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/raw_os_ostream.h>
#include <llvm/Linker/Linker.h>
int main(int argc, char *argv[]) {
    using namespace llvm;
    using namespace clang;
    static const char* clangArgv [] = {"program", "-x", "c", "string-input"};
    static const int clangArgc = sizeof (clangArgv) / sizeof (clangArgv[0]);
    // C functions to be compiled (they could eventually be extremely large)
    std::map<std::string, std::string> func2Source;
    func2Source["getOne"] = "int getOne() {return 1;}";
    func2Source["getTwo"] = "int getTwo() {return 2;}";
    llvm::InitializeAllTargets();
    llvm::InitializeAllAsmPrinters();
    std::unique_ptr<llvm::Linker> linker;
    std::unique_ptr<llvm::LLVMContext> context(new llvm::LLVMContext());
    std::unique_ptr<llvm::Module> module;
    /**
    * add each C function to the same module
    */
    for (const auto& p : func2Source) {
        const std::string& source = p.second;
        IntrusiveRefCntPtr<DiagnosticOptions> diagOpts = new DiagnosticOptions();
        TextDiagnosticPrinter *diagClient = new TextDiagnosticPrinter(llvm::errs(), &*diagOpts); // will be owned by diags
        IntrusiveRefCntPtr<DiagnosticIDs> diagID(new DiagnosticIDs());
        IntrusiveRefCntPtr<DiagnosticsEngine> diags(new DiagnosticsEngine(diagID, &*diagOpts, diagClient));
        ArrayRef<const char *> args(clangArgv + 1, // skip program name
                                    clangArgc - 1);
        std::unique_ptr<CompilerInvocation> invocation(createInvocationFromCommandLine(args, diags));
        if (invocation.get() == nullptr) {
            std::cerr << "Failed to create compiler invocation" << std::endl;
            exit(1);
        }
        CompilerInvocation::setLangDefaults(*invocation->getLangOpts(), IK_C,
                                            LangStandard::lang_unspecified);
        invocation->getFrontendOpts().DisableFree = false; // make sure we free memory (by default it does not)
        // Create a compiler instance to handle the actual work.
        CompilerInstance compiler;
        compiler.setInvocation(invocation.release());
        // Create the compilers actual diagnostics engine.
        compiler.createDiagnostics(); //compiler.createDiagnostics(argc, const_cast<char**> (argv));
        if (!compiler.hasDiagnostics()) {
            std::cerr << "No diagnostics" << std::endl;
            exit(1);
        }
        // Create memory buffer with source text
        std::unique_ptr<llvm::MemoryBuffer> buffer = llvm::MemoryBuffer::getMemBufferCopy(source, "SIMPLE_BUFFER");
        if (buffer.get() == nullptr) {
            std::cerr << "Failed to create memory buffer" << std::endl;
            exit(1);
        }
        // Remap auxiliary name "string-input" to memory buffer
        PreprocessorOptions& po = compiler.getInvocation().getPreprocessorOpts();
        po.addRemappedFile("string-input", buffer.release());
        // Create and execute the frontend to generate an LLVM bitcode module.
        clang::EmitLLVMOnlyAction action(context.get());
        if (!compiler.ExecuteAction(action)) {
            std::cerr << "Failed to emit LLVM bitcode" << std::endl;
            exit(1);
        }
        std::unique_ptr<llvm::Module> module1 = action.takeModule();
        if (module1.get() == nullptr) {
            std::cerr << "No module" << std::endl;
            exit(1);
        }
        if (linker.get() == nullptr) {
            module.reset(module1.release());
            linker.reset(new llvm::Linker(module.get()));
        } else {
            if (linker->linkInModule(module1.release())) {
                std::cerr << "LLVM failed to link module" << std::endl;
                exit(1);
            }
        }
    }
    llvm::InitializeNativeTarget();

    llvm::Module* m = module.get();
    std::string errStr;
    std::unique_ptr<llvm::ExecutionEngine> executionEngine(EngineBuilder(std::move(module))
                                                        .setErrorStr(&errStr)
                                                        .setEngineKind(EngineKind::JIT)
                                                        .setMCJITMemoryManager(std::unique_ptr<SectionMemoryManager>(new SectionMemoryManager()))
                                                        .setVerifyModules(true)
                                                        .create());
    if (!executionEngine.get()) {
        std::cerr << "Could not create ExecutionEngine: " + errStr << std::endl;
        exit(1);
    }
    executionEngine->finalizeObject();
    /**
    * Lets try to use each function
    */
    for (const auto& p : func2Source) {
        const std::string& funcName = p.first;
        llvm::Function* func = m->getFunction(funcName);
        if (func == nullptr) {
            std::cerr << "Unable to find function '" << funcName << "' in LLVM module" << std::endl;
            exit(1);
        }
        // Validate the generated code, checking for consistency.
        llvm::raw_os_ostream os(std::cerr);
        bool failed = llvm::verifyFunction(*func, &os);
        if (failed) {
            std::cerr << "Failed to verify function '" << funcName << "' in LLVM module" << std::endl;
            exit(1);
        }
#if 1
        func->dump(); // Dump the function for exposition purposes.
        // JIT the function, returning a function pointer.
        void *fPtr = executionEngine->getPointerToFunction(func); ///// BAD function pointer!!!!
        // Cast it to the right type (takes no arguments, returns a double) so we
        // can call it as a native function.
        int (*funcPtr)();
        *(int **) (&funcPtr) = *(int **) fPtr;
        int v = (*funcPtr)();
        std::cout << "return: " << v << std::endl;
#else // THIS DOES NOT WORK EITHER:
        // JIT the function, returning a function pointer.
        uint64_t fPtr = executionEngine->getFunctionAddress(funcName); ///// BAD function pointer!!!!
        if (fPtr == 0) {
            std::cerr << "Unable to find function '" << funcName << "' in LLVM module" << std::endl;
            exit(1);
        }
        int (*funcPtr)();
        *(int **) (&funcPtr) = *(int **) fPtr;
        int v = (*funcPtr)();
        std::cout << "return: " << v << std::endl;
#endif
    }
}

任何人可以帮助我解决问题吗?

(我正在Linux-ubuntu 15.04中运行此操作)

此任务令人难以置信:

*(int **) (&funcPtr) = *(int **) fPtr;

编写int*,然后将其用作下一行的函数指针,不仅违反了严格的注意,而且数据指针通常不够大,无法容纳整个代码指针。

安全的方法是

memcpy(funcPtr, fPtr, sizeof funcPtr);

funcPtr = reinterpret_cast<decltype(funcPtr)>(fPtr);