使用 libclang 从内存中的 C 代码生成程序集

Generate assembly from C code in memory using libclang

本文关键字:代码生成 程序集 libclang 内存 使用      更新时间:2023-10-16

我需要实现一个库,该库使用 LLVM/Clang 作为后端将 C 代码编译为 eBPF 字节码。代码将从内存中读取,我也需要在内存中获取生成的汇编代码。

到目前为止,我已经能够使用以下代码编译为 LLVM IR:

#include <string>
#include <vector>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Basic/DiagnosticOptions.h>
#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <clang/CodeGen/CodeGenAction.h>
#include <clang/Basic/TargetInfo.h>
#include <llvm/Support/TargetSelect.h>
using namespace std;
using namespace clang;
using namespace llvm;
int main() {
    constexpr auto testCodeFileName = "test.cpp";
    constexpr auto testCode = "int test() { return 2+2; }";
    // Prepare compilation arguments
    vector<const char *> args;
    args.push_back(testCodeFileName);
    // Prepare DiagnosticEngine 
    DiagnosticOptions DiagOpts;
    TextDiagnosticPrinter *textDiagPrinter =
            new clang::TextDiagnosticPrinter(errs(),
                                         &DiagOpts);
    IntrusiveRefCntPtr<clang::DiagnosticIDs> pDiagIDs;
    DiagnosticsEngine *pDiagnosticsEngine =
            new DiagnosticsEngine(pDiagIDs,
                                         &DiagOpts,
                                         textDiagPrinter);
    // Initialize CompilerInvocation
    CompilerInvocation *CI = new CompilerInvocation();
    CompilerInvocation::CreateFromArgs(*CI, &args[0], &args[0] +     args.size(), *pDiagnosticsEngine);
    // Map code filename to a memoryBuffer
    StringRef testCodeData(testCode);
    unique_ptr<MemoryBuffer> buffer = MemoryBuffer::getMemBufferCopy(testCodeData);
    CI->getPreprocessorOpts().addRemappedFile(testCodeFileName, buffer.get());

    // Create and initialize CompilerInstance
    CompilerInstance Clang;
    Clang.setInvocation(CI);
    Clang.createDiagnostics();
    // Set target (I guess I can initialize only the BPF target, but I don't know how)
    InitializeAllTargets();
    const std::shared_ptr<clang::TargetOptions> targetOptions = std::make_shared<clang::TargetOptions>();
    targetOptions->Triple = string("bpf");
    TargetInfo *pTargetInfo = TargetInfo::CreateTargetInfo(*pDiagnosticsEngine,targetOptions);
    Clang.setTarget(pTargetInfo);
    // Create and execute action
    // CodeGenAction *compilerAction = new EmitLLVMOnlyAction();
    CodeGenAction *compilerAction = new EmitAssemblyAction();
    Clang.ExecuteAction(*compilerAction);
    buffer.release();
}

为了编译,我使用以下CMakeLists.txt:

cmake_minimum_required(VERSION 3.3.2)
project(clang_backend CXX)
set(CMAKE_CXX_COMPILER "clang++")
execute_process(COMMAND llvm-config --cxxflags OUTPUT_VARIABLE LLVM_CONFIG OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND llvm-config --libs OUTPUT_VARIABLE LLVM_LIBS OUTPUT_STRIP_TRAILING_WHITESPACE)
set(CMAKE_CXX_FLAGS ${LLVM_CONFIG})
set(CLANG_LIBS clang clangFrontend clangDriver clangSerialization clangParse
    clangCodeGen  clangSema clangAnalysis clangEdit clangAST clangLex
    clangBasic )
add_executable(clang_backend main.cpp)
target_link_libraries(clang_backend ${CLANG_LIBS})
target_link_libraries(clang_backend ${LLVM_LIBS})

如果我理解正确,如果我将编译器操作更改为 EmitAssemblyAction(),我应该能够生成汇编代码,但我可能没有初始化某些内容,因为我在 llvm::TargetPassConfig::addPassesToHandleExceptions (this=this@entry=0x6d8d30) 中遇到分段错误,网址为/tmp/llvm-3.7.1.src/lib/CodeGen/Passes.cpp:419

此行的代码为:

switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {

有没有人有一个例子或知道我错过了什么?

因此,如果您在断言的情况下编译LLVM,则错误会更加清晰,它实际上会告诉您需要做什么:

x: .../src/llvm/lib/CodeGen/LLVMTargetMachine.cpp:63: 
void llvm::LLVMTargetMachine::initAsmInfo(): 
Assertion `TmpAsmInfo && "MCAsmInfo not initialized. " 
"Make sure you include the correct TargetSelect.h" 
"and that InitializeAllTargetMCs() is being invoked!"' failed.

(我添加了一些换行符,因为它打印为一行长行)。

main开头添加所需的InitializeAllTargetMCs()后,我得到了另一个错误。查看编译器的对象文件生成,我"猜测"这是另一个InitializeAll*调用的问题。经过一点测试,事实证明您还需要InitializeAllAsmPrinters(); - 考虑到您想要生成汇编代码,这是有道理的。

我不完全确定如何"查看"代码的结果,但是将这两个添加到main的开头会使它运行完成而不是断言,退出错误或崩溃 - 这通常是朝着正确方向迈出的好一步。

所以这就是main在"我的"代码中的样子:

int main() {
    constexpr auto testCodeFileName = "test.cpp";
    constexpr auto testCode = "int test() { return 2+2; }";
    InitializeAllTargetMCs();
    InitializeAllAsmPrinters();
    // Prepare compilation arguments
    vector<const char *> args;
    args.push_back(testCodeFileName);
    // Prepare DiagnosticEngine 
    DiagnosticOptions DiagOpts;
    TextDiagnosticPrinter *textDiagPrinter =
            new clang::TextDiagnosticPrinter(errs(),
                                         &DiagOpts);
    IntrusiveRefCntPtr<clang::DiagnosticIDs> pDiagIDs;
    DiagnosticsEngine *pDiagnosticsEngine =
            new DiagnosticsEngine(pDiagIDs,
                                         &DiagOpts,
                                         textDiagPrinter);
    // Initialize CompilerInvocation
    CompilerInvocation *CI = new CompilerInvocation();
    CompilerInvocation::CreateFromArgs(*CI, &args[0], &args[0] +     args.size(), *pDiagnosticsEngine);
    // Map code filename to a memoryBuffer
    StringRef testCodeData(testCode);
    unique_ptr<MemoryBuffer> buffer = MemoryBuffer::getMemBufferCopy(testCodeData);
    CI->getPreprocessorOpts().addRemappedFile(testCodeFileName, buffer.get());

    // Create and initialize CompilerInstance
    CompilerInstance Clang;
    Clang.setInvocation(CI);
    Clang.createDiagnostics();
    // Set target (I guess I can initialize only the BPF target, but I don't know how)
    InitializeAllTargets();
    const std::shared_ptr<clang::TargetOptions> targetOptions = std::make_shared<clang::TargetOptions>();
    targetOptions->Triple = string("bpf");
    TargetInfo *pTargetInfo = TargetInfo::CreateTargetInfo(*pDiagnosticsEngine,targetOptions);
    Clang.setTarget(pTargetInfo);
    // Create and execute action
    // CodeGenAction *compilerAction = new EmitLLVMOnlyAction();
    CodeGenAction *compilerAction = new EmitAssemblyAction();
    Clang.ExecuteAction(*compilerAction);
    buffer.release();
}

我强烈建议,如果你想用clang&LLVM进行开发,你构建一个Clang&LLVM的调试版本 - 这将有助于追踪"为什么",并尽早发现问题和更明显的地方。将-DCMAKE_BUILD_TYPE=Debugcmake一起使用以获得这种味道。

我让LLVM和Clang构建的完整脚本:

export CC=clang
export CXX=clang++ 
cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=/usr/local/llvm-debug -DLLVM_TAR
GETS_TO_BUILD=X86 ../llvm

[我使用的是 3.8 的后期预发布版本来测试这一点,但我非常怀疑它在这方面与 3.7.1 有很大不同]

如果有人遇到类似的问题,我已经能够从内存编译/到内存,通过 stdin 发送代码并从 stdout 获取输出。

我不知道是否有其他方法可以实现这一点,也许使用 clang::D river,但是阅读 Clang/LLVM 源代码,我发现我需要执行以获取对象的操作是 EmitObjAction(),并且似乎如果输入不是从 stdin接收到的,此操作总是生成一个 .o 文件。

因此,我在执行操作之前替换了管道的 stdin/stdout,这样我就可以避免生成文件。

#include <string>
#include <vector>
#include <sstream>
#include <iostream>
#include <cstdio>
#include <unistd.h>
#include <fcntl.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Basic/DiagnosticOptions.h>
#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <clang/CodeGen/CodeGenAction.h>
#include <clang/Basic/TargetInfo.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/IR/Module.h>
using namespace std;
using namespace clang;
using namespace llvm;
int main(int argc, char *argv[])
{ 
    // code to compile for the eBPF virtual machine
    constexpr auto testCode = "int main() { return get_nbs(); }";
    // Send code through a pipe to stdin
    int codeInPipe[2];
    pipe2(codeInPipe, O_NONBLOCK);
    write(codeInPipe[1], (void *) testCode, strlen(testCode));
    close(codeInPipe[1]); // We need to close the pipe to send an EOF
    dup2(codeInPipe[0], STDIN_FILENO);
    // Prepare reception of code through stdout
    int codeOutPipe[2];
    pipe(codeOutPipe);
    dup2(codeOutPipe[1], STDOUT_FILENO);
    // Initialize various LLVM/Clang components
    InitializeAllTargetMCs();   
    InitializeAllAsmPrinters();
    InitializeAllTargets();
    // Prepare compilation arguments
    vector<const char *> args;
    args.push_back("--target=bpf"); // Target is bpf assembly
    args.push_back("-xc"); // Code is in c language
    args.push_back("-"); // Read code from stdin
    CompilerInvocation *CI = createInvocationFromCommandLine(makeArrayRef(args) , NULL);
    // Create CompilerInstance
    CompilerInstance Clang;
    Clang.setInvocation(CI);
    // Initialize CompilerInstace
    Clang.createDiagnostics();
    // Create and execute action
    CodeGenAction *compilerAction; 
    compilerAction = new EmitObjAction();
    Clang.ExecuteAction(*compilerAction);
    // Get compiled object (be carefull with buffer size)
    close(codeInPipe[0]);
    char objBuffer[2048];
    read(codeOutPipe[0], objBuffer, 2048); 
    return 0;
}