Decoding pcm_s16le with FFMPEG?

本文关键字：FFMPEG with s16le pcm Decoding 更新时间：2023-10-16

我有一个问题解码wav文件使用ffmpeg。我是新来的，不太习惯。

在我的应用程序中，我必须输入音频文件，并得到一个数组的样本工作。我使用ffmpeg创建了一个函数，该函数输入文件的路径，开始输出样本的时间位置以及以秒为单位解码的块的长度。

我没有名气，所以我不得不创建一个gdrive目录，在那里你可以看到问题和我工作过的文件。

网址:https://goo.gl/8KnjAj

当我尝试解码文件harp.wav一切都运行良好，我可以绘制样本图像plot-harp.png

该文件是WAV文件，编码为:pcm_u8, 11025 Hz, 1通道，u8, 88 kb/s

当我试图解码文件demo-unprocessed.wav时出现问题。它输出一系列没有意义的样本。它输出一系列绘制成图像graph1-demo.jpg所示的样本。

该文件是WAV文件，编码为:pcm_s16le, 44100 Hz, 1通道，s16, 705 kb/s

IDK在我的代码中的问题是，我已经检查了代码前后与FFMPEG解码，它的工作绝对很好。

下面是dataReader.cpp的代码:

/* Start by including the necessary */
#include "dataReader.h"
#include <cstdlib>
#include <iostream>
#include <fstream>
#ifdef __cplusplus
extern "C" {
#endif
    #include <libavcodec/avcodec.h> 
    #include <libavformat/avformat.h>
    #include <libavutil/avutil.h>
#ifdef __cplusplus 
}
#endif
using namespace std;
/* initialization function for audioChunk */
audioChunk::audioChunk(){
    data=NULL;
    size=0;
    bitrate=0;
}
/* function to get back chunk lenght in seconds */
int audioChunk::getTimeLenght(){
    return size/bitrate;
}
/* initialization function for audioChunk_dNorm */
audioChunk_dNorm::audioChunk_dNorm(){
    data=NULL;
    size=0;
    bitrate=0;
}
/* function to get back chunk lenght in seconds */
int audioChunk_dNorm::getTimeLenght(){
    return size/bitrate;
}
/* function to normalize audioChunk into audioChunk_dNorm */
void audioChunk_dNorm::fillAudioChunk(audioChunk* cnk){
    size=cnk->size;
    bitrate=cnk->bitrate;
    double min=cnk->data[0];
    double max=cnk->data[0];
    for(int i=0;i<cnk->size;i++){
        if(*(cnk->data+i)>max) max=*(cnk->data+i);
        else if(*(cnk->data+i)<min) min=*(cnk->data+i);
    }
    data=new double[size];
    for(int i=0;i<size;i++){
        //data[i]=cnk->data[i]+256*data[i+1];
        if(data[i]!=255) data[i]=2*((cnk->data[i])-(max-min)/2)/(max-min);
        else data[i]=0;
    }
    cout<<"bitrate "<<bitrate<<endl;
}

audioChunk readData(const char* path_name, const double start_time, const double lenght){
    /* inizialize audioChunk */
    audioChunk output;
    /* Check input times */
    if((start_time<0)||(lenght<0)) {
        cout<<"Input times should be positive";
        return output;
    }
    /* Start FFmpeg */
    av_register_all();
    /* Initialize the frame to read the data and verify memory allocation */
    AVFrame* frame = av_frame_alloc();
    if (!frame)
    {
        cout << "Error allocating the frame" << endl;
        return output;
    }
    /* Initialization of the Context, to open the file */
    AVFormatContext* formatContext = NULL;
    /* Opening the file, and check if it has opened */
    if (avformat_open_input(&formatContext, path_name, NULL, NULL) != 0)
    {
        av_frame_free(&frame);
        cout << "Error opening the file" << endl;
        return output;
    }
    /* Find the stream info, if not found, exit */
    if (avformat_find_stream_info(formatContext, NULL) < 0)
    {
        av_frame_free(&frame);
        avformat_close_input(&formatContext);
        cout << "Error finding the stream info" << endl;
        return output;
    }
    /* Check inputs to verify time input */
    if(start_time>(formatContext->duration/1000000)){
        cout<< "Error, start_time is over file duration"<<endl;
        av_frame_free(&frame);
        avformat_close_input(&formatContext);
        return output;
    }
    /* Chunk = number of samples to output */
    long long int chunk = ((formatContext->bit_rate)*lenght/8);
    /* Start = address of sample where start to read */
    long long int start = ((formatContext->bit_rate)*start_time/8);
    /* Tot_sampl = number of the samples in the file */
    long long int tot_sampl = (formatContext->bit_rate)*(formatContext->duration)/8000000;
    /* Set the lenght of chunk to avoid segfault and to read all the file */
    if (start+chunk>tot_sampl) {chunk = tot_sampl-start;}
    if (lenght==0) {start = 0; chunk = tot_sampl;}
    /* initialize the array to output */
    output.data = new unsigned char[chunk];
    output.bitrate = formatContext->bit_rate;
    output.size=chunk;
    av_dump_format(formatContext,0,NULL,0);
    cout<<chunk<<" n of sample to read"<<endl;
    cout<<start<<" start"<<endl;
    cout<<output.bitrate<<" bitrate"<<endl;
    cout<<tot_sampl<<" total sample"<<endl;

    /* Find the audio Stream, if no audio stream are found, clean and exit */
    AVCodec* cdc = NULL;
    int streamIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &cdc, 0);
    if (streamIndex < 0)
    {
        av_frame_free(&frame);
        avformat_close_input(&formatContext);
        cout << "Could not find any audio stream in the file" << endl;
        return output;
    }
    /* Open the audio stream to read data  in audioStream */
    AVStream* audioStream = formatContext->streams[streamIndex];
    /* Initialize the codec context */
    AVCodecContext* codecContext = audioStream->codec;
    codecContext->codec = cdc;
    /* Open the codec, and verify if it has opened */
    if (avcodec_open2(codecContext, codecContext->codec, NULL) != 0)
    {
        av_frame_free(&frame);
        avformat_close_input(&formatContext);
        cout << "Couldn't open the context with the decoder" << endl;
        return output;
    }
    /* Initialize buffer to store compressed packets */
    AVPacket readingPacket;
    av_init_packet(&readingPacket);

    int j=0;
    int count = 0; 
    while(av_read_frame(formatContext, &readingPacket)==0){
        if((count+readingPacket.size)>start){
            if(readingPacket.stream_index == audioStream->index){
                AVPacket decodingPacket = readingPacket;
                // Audio packets can have multiple audio frames in a single packet
                while (decodingPacket.size > 0){
                    // Try to decode the packet into a frame
                    // Some frames rely on multiple packets, so we have to make sure the frame is finished before
                    // we can use it
                    int gotFrame = 0;
                    int result = avcodec_decode_audio4(codecContext, frame, &gotFrame, &decodingPacket);
                    count += result;
                    if (result >= 0 && gotFrame)
                    {
                        decodingPacket.size -= result;
                        decodingPacket.data += result;
                        int a;
                        for(int i=0;i<result-1;i++){
                            *(output.data+j)=frame->data[0][i];
                            j++;
                            if(j>=chunk) break;
                        }
                        // We now have a fully decoded audio frame
                    }
                    else
                    {
                        decodingPacket.size = 0;
                        decodingPacket.data = NULL;
                    }
                    if(j>=chunk) break;
                }
            }              
        }else count+=readingPacket.size;
        // To prevent memory leak, must free packet.
        av_free_packet(&readingPacket);
        if(j>=chunk) break;
    }
    // Some codecs will cause frames to be buffered up in the decoding process. If the CODEC_CAP_DELAY flag
    // is set, there can be buffered up frames that need to be flushed, so we'll do that
    if (codecContext->codec->capabilities & CODEC_CAP_DELAY)
    {
        av_init_packet(&readingPacket);
        // Decode all the remaining frames in the buffer, until the end is reached
        int gotFrame = 0;
        int a;
        int result=avcodec_decode_audio4(codecContext, frame, &gotFrame, &readingPacket);
        while (result >= 0 && gotFrame)
        {
            // We now have a fully decoded audio frame
            for(int i=0;i<result-1;i++){
                *(output.data+j)=frame->data[0][i];
                j++;
                if(j>=chunk) break;
            }
            if(j>=chunk) break;
        }
    }
    // Clean up!
    av_free(frame);
    avcodec_close(codecContext);
    avformat_close_input(&formatContext);
    cout<<"Ended Reading, "<<j<<" samples read"<<endl;
    output.size=j;
    return output;
}

这是dataReader.h

/* 
 * File:   dataReader.h
 * Author: davide
 *
 * Created on 27 luglio 2015, 11.11
 */
#ifndef DATAREADER_H
#define DATAREADER_H
/* function that reads a file and outputs an array of samples
 * @ path_name = the path of the file to read
 * @ start_time = the position where to start the data reading, 0 = start
 *                the time is in seconds, it can hold to 10e-6 seconds
 * @ lenght = the lenght of the frame to extract the data, 
 *            0 = read all the file (do not use with big files)
 *            if lenght > of file duration, it reads through the end of file.
 *            the time is in seconds, it can hold to 10e-6 seconds  
 */
#include <stdint.h>
class audioChunk{
public:
    uint8_t *data;
    unsigned int size;
    int bitrate;
    int getTimeLenght();
    audioChunk();
};
class audioChunk_dNorm{
public:
    double* data;
    unsigned int size;
    int bitrate;
    int getTimeLenght();
    void fillAudioChunk(audioChunk* cnk);
    audioChunk_dNorm();
};
audioChunk readData(const char* path_name, const double start_time, const double lenght);
#endif  /* DATAREADER_H */

最后是应用程序的main.cpp。

/* 
 * File:   main.cpp
 * Author: davide
 *
 * Created on 28 luglio 2015, 17.04
 */
#include <cstdlib>
#include "dataReader.h"
#include "transforms.h"
#include "tognuplot.h"
#include <fstream>
#include <iostream>
using namespace std;
/*
 * 
 */
int main(int argc, char** argv) {
    audioChunk *chunk1=new audioChunk;
    audioChunk_dNorm *normChunk1=new audioChunk_dNorm;
    *chunk1=readData("./audio/demo-unprocessed.wav",0,1);
    normChunk1->fillAudioChunk(chunk1);
    ofstream file1;
    file1.open("./file/2wave.txt", std::ofstream::trunc);
    if(file1.is_open()) {
        for(int i=0;i<chunk1->size;i++) {
            int a=chunk1->data[i];
            file1<<i<<" "<<a<<endl;
        }
    }
    else cout<<"Error opening file";
    file1.close();
    return 0;
}

我不明白为什么输出是这样的。解码器是否可能无法将样本(pcm_16le, 16位)转换为FFMPEG AVFrame ?数据，存储样本AD uint8_t?如果是这样，是否有办法使FFMPEG工作的音频文件，存储样本在超过8位?

文件graph1-demo_good.jpg是样本应该是怎样的，用我所做的一个工作的LIBSNDFILE应用程序提取。

EDIT:似乎程序无法转换解码的数据，存储在一对uint8_t unsigned char中的小端序字节对，转换为目标格式(我设置为unsigned char[])，因为它将比特存储为小端序16字节。所以数据进入audioChunk。data是正确的，但是我不能把它作为unsigned char来读，而是作为一对小端字节来读。

我查看了chunk1->data使用gdb指向的内存。(x /256xh 0x18dddf0，以十六进制形式转储前256个半字)。它看起来像有符号的16位值，因为它开始时有很多0, 0xFFFF和0x0001。

所以您的代码需要请求ffmpeg转换为特定的格式。IDK怎么做最好，不好意思