如何使用媒体基础生成"moov before mdat" MP4视频文件
How to generate "moov before mdat" MP4 video files with Media Foundation
我发现Microsoft Media Foundation生成的MPEG4文件中MDAT原子在MOOV原子之前。流媒体需要在MDAT之前进行MOOV。我假设我的问题的解决方案是在创建sink时使用MF_MPEG4SINK_MOOV_BEFORE_MDAT属性,但我似乎无法让它产生效果。我的代码基本上与http://blogs.msdn.com/b/eternalcoding/archive/2013/03/06/developing-a-winrt-component-to-create-a-video-file-using-media-foundation.aspx相同。在该示例中,在MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS设置之前,我将属性设置为UINT32 TRUE。
如果您不能让编码器在开始时输出带有moov原子的文件,您总是可以在之后纠正该文件。这两个实用程序都应该在Windows上运行,并能完成任务。(它们都处理。mp4文件没有问题,尽管名称中有'qt')
https://github.com/danielgtaylor/qtfaststart http://ffmpeg.zeranoe.com/blog/?p=59虽然这篇文章已经过去很长时间了,但在MDAT之前,我也遇到了使用MediaFoundation编码的MOOV问题。微软关于这个主题的文档很少。该设置需要在创建对象时应用于MFT视频接收器或接收器。
我设法启用了我想要的功能,但录制中的视频是空帧,即使文件大小相同,mp4box信息声明支持渐进式下载,视频长度等。音频很好。GOP也没有出现在信息中,所以仍然存在配置错误。
然后我又读了一些关于H264和MP4结构的文章,
对我来说关键是MPEG容器需要是一个碎片化的MP4容器,所以简单地将容器类型设置为FMPEG4而不是MPEG4就可以了。下面是SinkWriter的初始化,它可以很好地使这个特性工作。
这是我对SinkWriter的完整初始化。
ComPtr<ID3D11Device> device;
ComPtr<ID3D11Device> dx3Device; // multithread configuration.
ComPtr<ID3D10Multithread> dx3MultiThread;
ComPtr<IMFDXGIDeviceManager> manager;
unsigned videoQuality = 50;
unsigned videoBitrate = FPS * width * height; // DEFAULT_BITRATE;
videoBitrate = DEFAULT_BITRATE;
// Audio Input
const UINT SamplesPerSecond = BaseSampleRate;
const UINT AverageBytesPerSecond = SamplesPerSecond / sizeof(FLOAT);
const UINT ChannelCount = 2; // Converted
const UINT BitsPerSample = 16; // Converted
MFStartup(MF_VERSION, MFSTARTUP_NOSOCKET);
_Clock = new Clock();
// Create a random access stream in memory
// CHK(MFCreateMFByteStreamOnStreamEx((IUnknown*)videoStream, &m_SpByteStream));
// Create a temporary working MP4.
IMFByteStreamEx::CreateInstance((IUnknown*)videoStream, IID_IMFByteStream, &m_SpByteStream);
// Create the Sink Writer
ComPtr<IMFAttributes> spAttr;
ComPtr<IMFMediaType> audioOutputType;
ComPtr<IMFMediaType> spVideoTypeIn;
ComPtr<IMFMediaType> spVideoTypeOut;
CHK(MFCreateAttributes(&spAttr, 10));
CHK(spAttr->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, TRUE));
CHK(spAttr->SetUINT32(MF_READWRITE_DISABLE_CONVERTERS, FALSE));
CHK(spAttr->SetUINT32(MF_SINK_WRITER_DISABLE_THROTTLING, TRUE));
CHK(spAttr->SetUINT32(MF_LOW_LATENCY, TRUE));
CHK(spAttr->SetGUID(MF_TRANSCODE_CONTAINERTYPE, MFTranscodeContainerType_FMPEG4));
CHK(spAttr->SetUINT32(MF_MPEG4SINK_MOOV_BEFORE_MDAT, TRUE))
// Setup the output video media type
HRESULT hr = 0;
D3D_FEATURE_LEVEL levels[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0 };
CHK(D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, D3D11_CREATE_DEVICE_VIDEO_SUPPORT | D3D11_CREATE_DEVICE_BGRA_SUPPORT,
levels, ARRAYSIZE(levels), D3D11_SDK_VERSION, &device, nullptr, nullptr));
UINT token;
CHK(MFCreateDXGIDeviceManager(&token, &manager));
HANDLE deviceHandle;
CHK(manager->ResetDevice(reinterpret_cast<IUnknown*>(device.Get()), token));
if (SUCCEEDED(manager->OpenDeviceHandle(&deviceHandle))) {
// https://learn.microsoft.com/en-au/windows/desktop/medfound/supporting-direct3d-11-video-decoding-in-media-foundation
// make sure we are using the same device
hr = manager->GetVideoService(deviceHandle, IID_PPV_ARGS(&dx3Device));
hr = dx3Device->QueryInterface(IID_PPV_ARGS(&dx3MultiThread));
dx3MultiThread->SetMultithreadProtected(TRUE);
}
CHK(spAttr->SetUnknown(MF_SINK_WRITER_D3D_MANAGER, manager.Get()));
CHK(MFCreateSinkWriterFromURL(L".mp4v", m_SpByteStream.Get(), spAttr.Get(), &m_SpSinkWriter));
//// Video In Format
CHK(MFCreateMediaType(&spVideoTypeIn));
CHK(spVideoTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video));
CHK(spVideoTypeIn->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_RGB32));
CHK(spVideoTypeIn->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive));
CHK(MFSetAttributeSize(spVideoTypeIn.Get(), MF_MT_FRAME_SIZE, m_Width, m_Height));
CHK(MFSetAttributeRatio(spVideoTypeIn.Get(), MF_MT_FRAME_RATE, m_FramesPerSecond, 1));
CHK(MFSetAttributeRatio(spVideoTypeIn.Get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1));
CHK(spVideoTypeIn->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, FALSE));
CHK(spVideoTypeIn->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, TRUE));
// Video Out format
CHK(MFCreateMediaType(&spVideoTypeOut));
CHK(spVideoTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video));
CHK(spVideoTypeOut->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264));
CHK(spVideoTypeOut->SetUINT32(MF_MT_COMPRESSED, FALSE));
CHK(spVideoTypeOut->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, FALSE));
CHK(spVideoTypeOut->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, FALSE));
CHK(spVideoTypeOut->SetUINT32(MF_MT_AVG_BITRATE, videoBitrate ));
CHK(spVideoTypeOut->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive));
CHK(spVideoTypeOut->SetUINT32(MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_High));
CHK(MFSetAttributeSize(spVideoTypeOut.Get(), MF_MT_FRAME_SIZE, m_Width, m_Height));
CHK(MFSetAttributeRatio(spVideoTypeOut.Get(), MF_MT_FRAME_RATE, m_FramesPerSecond , 1));
CHK(MFSetAttributeRatio(spVideoTypeOut.Get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1));
spVideoTypeOut->SetUINT32(MF_MT_SAMPLE_SIZE, 1);
MFSetAttributeSize(spVideoTypeOut.Get(), MF_MT_FRAME_RATE_RANGE_MAX, m_FramesPerSecond, 1);
MFSetAttributeSize(spVideoTypeOut.Get(), MF_MT_FRAME_RATE_RANGE_MIN, m_FramesPerSecond / 2, 1);
// Audio In Format
ComPtr<IMFMediaType> spAudioTypeIn;
CHK(MFCreateMediaType(&spAudioTypeIn));
CHK(spAudioTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio));
CHK(spAudioTypeIn->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM));
CHK(spAudioTypeIn->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, BitsPerSample));
CHK(spAudioTypeIn->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, BaseSampleRate));
CHK(spAudioTypeIn->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, ChannelCount));
CHK(spAudioTypeIn->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, AverageBytesPerSec)); // 32bit converted to 16
CHK(spAudioTypeIn->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, 4));
CHK(spAudioTypeIn->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, FALSE));
CHK(spAudioTypeIn->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, TRUE));
CHK(MFCreateMediaType(&audioOutputType));
CHK(audioOutputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio));
CHK(audioOutputType->SetUINT32(MF_MT_AVG_BITRATE, 16000));
CHK(audioOutputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_AAC));
CHK(audioOutputType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, SamplesPerSecond));
CHK(audioOutputType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, BitsPerSample / ((BitsPerSample > 16) ? 2 : 1)));
CHK(audioOutputType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, ChannelCount));
CHK(audioOutputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, 12000)); // AverageBytesPerSecond));
CHK(audioOutputType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, 1));
CHK(audioOutputType->SetUINT32(MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, 0x29));
CHK(audioOutputType->SetUINT32(MF_MT_AUDIO_PREFER_WAVEFORMATEX, 1));
CHK(audioOutputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, FALSE));
CHK(audioOutputType->SetUINT32(MF_MT_FIXED_SIZE_SAMPLES, TRUE));
// Add Video out stream
ComPtr<IMFAttributes> encoderAttributes;
if (TRUE) { // Experimental
CHK(MFCreateAttributes(&encoderAttributes, 12));
if (TRUE) {
unsigned force_keyframe_every_nframes = 11;
unsigned force_bframe_every_nframes = 2;
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncMPVGOPSize, force_keyframe_every_nframes));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncMPVDefaultBPictureCount, force_bframe_every_nframes));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncNumWorkerThreads, 6));
}
if (TRUE) {
// constant quality for screen captures
CHK(encoderAttributes->SetUINT32(CODECAPI_AVLowLatencyMode, TRUE));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncCommonRealTime, 1));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncAdaptiveMode, eAVEncAdaptiveMode_Resolution));
CHK(encoderAttributes->SetGUID(CODECAPI_AVEncCodecType, CODECAPI_GUID_AVEncH264Video));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncCommonMultipassMode, 2));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncCommonRateControlMode, eAVEncCommonRateControlMode_PeakConstrainedVBR));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncCommonMeanBitRate, DEFAULT_BITRATE));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncCommonStreamEndHandling, eAVEncCommonStreamEndHandling_EnsureComplete));
CHK(encoderAttributes->SetUINT32(CODECAPI_AVEncVideoContentType, eAVEncVideoContentType_FixedCameraAngle));
}
}
CHK(m_SpSinkWriter->AddStream(spVideoTypeOut.Get(), &m_VideoStreamIndex));
CHK(m_SpSinkWriter->SetInputMediaType(m_VideoStreamIndex, spVideoTypeIn.Get(), encoderAttributes.Get()));
CHK(m_SpSinkWriter->AddStream(audioOutputType.Get(), &m_AudioStreamIndex));
CHK(m_SpSinkWriter->SetInputMediaType(m_AudioStreamIndex, spAudioTypeIn.Get(), nullptr));
_Clock->Start();
m_ClockStart = clock();
CHK(m_SpSinkWriter->BeginWriting());
你读了注释了吗http://msdn.microsoft.com/en-us/library/windows/desktop/hh870256%28v=vs.85%29.aspx ?
"为了使mpeg4接收器使用此属性,传入的字节流不能是慢速查找或远程"
请检查您的IMFByteStream的功能?
MFBYTESTREAM_IS_REMOTE和MFBYTESTREAM_HAS_SLOW_SEEK应该被清除。
如果你的IMFByteStream不符合条件,那么首先创建一个文件MDAT->MOOV,然后重新创建一个新文件MOOV->MDAT。
- Arduino无法编译名为SP:"Expected ')' before '(' token"的变量
- 为什么我的功能在使用 goto 时会给我带来"expected primary-expression before '}' token"?
- 如何修复此错误"expected primary expression before 'int'"?
- 为什么在main()之前断言会导致语法错误"error: expected ')' before numeric constant"?
- 使用结构和数组时的"expected primary expression before '.' token"
- 如何解决"Expected expression before " = " token"?
- 错误:创建模板类时"expected unqualified-id before '{' token"
- AWS Lambda C++ 开发工具包"Process exited before completing request"
- 为什么包含标头会导致"use of auto before deduction of auto"错误?
- 为什么我总是"error: expected unqualified-id before 'case'"?
- 使用自定义iocontext时,找不到FFMPEG库M4A MOOV原子
- 外部"C"导致错误"expected '(' before string constant"
- 如何解决"expected constructor, destructor, or type conversion before ‘(’ token"错误?
- 这个编译器错误是什么意思 - "qualified-id in declaration before ‘=’ token" C++?
- 即使我没有 #define 任何东西或错过分号,如何修复"expected unqualfied-id before string constant"?
- "[Error] expected unqualified-Id before 'if'"这个代码块是什么意思
- memory_order_seq_cst操作的"happens before"关系和"precedes in a single total order"关系有什么区别?
- 获取链表上的"expected ‘)’ before ‘&’ token"到 BST 构造函数签名
- 推导模板类重载方法的地址会导致"error: expected primary-expression before ‘decltype’"
- 如何使用媒体基础生成"moov before mdat" MP4视频文件