如何将CVPixelBufferRef转换为张量

How to transform CVPixelBufferRef to a Tensor

本文关键字：张量转换 CVPixelBufferRef 更新时间：2023-10-16

Tensorflow提供了一个教程，其中输出是一个(1,244,244,3)张量;但是我需要输出为(100,100,3)。我确信问题是将相机图像转换为张量，因为当我加载文件时，我的模型运行得很好(yay)。我修改了代码，看起来像这样，请帮我找到错误(我不c++太多)

tensorflow::Tensor BufferToTensor(CVPixelBufferRef pixelBuffer) {
assert(pixelBuffer != NULL);
OSType sourcePixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer);
int doReverseChannels;
if (kCVPixelFormatType_32ARGB == sourcePixelFormat) {
    doReverseChannels = 1;
} else if (kCVPixelFormatType_32BGRA == sourcePixelFormat) { //look out what bgra is
    doReverseChannels = 0;
} else {
    assert(false);  // Unknown source format
}
const int sourceRowBytes = (int)CVPixelBufferGetBytesPerRow(pixelBuffer);
const int image_width = (int)CVPixelBufferGetWidth(pixelBuffer);
const int fullHeight = (int)CVPixelBufferGetHeight(pixelBuffer);
CVPixelBufferLockBaseAddress(pixelBuffer, 0);
unsigned char *sourceBaseAddr =
(unsigned char *)(CVPixelBufferGetBaseAddress(pixelBuffer));
int image_height;
unsigned char *sourceStartAddr;
if (fullHeight <= image_width) {
    image_height = fullHeight;
    sourceStartAddr = sourceBaseAddr;
} else {
    image_height = image_width;
    const int marginY = ((fullHeight - image_width) / 2);
    sourceStartAddr = (sourceBaseAddr + (marginY * sourceRowBytes));
}
const int image_channels = 3;
const int wanted_width = 100;
const int wanted_height = 100;
const int wanted_channels = 3;
assert(image_channels >= wanted_channels);
tensorflow::Tensor image_tensor(
                                tensorflow::DT_FLOAT,
                                tensorflow::TensorShape(
                                                        { wanted_height, wanted_width, wanted_channels}));
auto image_tensor_mapped = image_tensor.tensor<float, 3>();
tensorflow::uint8 *in = sourceStartAddr;
float *out = image_tensor_mapped.data();
for (int y = 0; y < wanted_height; ++y) {
    float *out_row = out + (y * wanted_width * wanted_channels);
    for (int x = 0; x < wanted_width; ++x) {
        const int in_x = (y * image_width) / wanted_width;
        const int in_y = (x * image_height) / wanted_height;
        tensorflow::uint8 *in_pixel =
        in + (in_y * image_width * image_channels) + (in_x * image_channels);
        float *out_pixel = out_row + (x * wanted_channels);
        for (int c = 0; c < wanted_channels; ++c) {
            out_pixel[c] = in_pixel[c];
        }
    }
}
return image_tensor;
}

我必须通过简单地改变最后一个循环来解决这个问题:

out_pixel[0] = in_pixel[2];
out_pixel[1] = in_pixel[1];
out_pixel[2] = in_pixel[0];