Handling MF_E_TRANSFORM_STREAM_CHANGE from video decoder MFT
I am trying to decode even just a single H264 frame with the H264 Decoder MFT
, but I've been having problems with ProcessOutput()
. I've reduced the bad HRESULT
's as much as I can, but I'm currently stuck on dealing with MF_E_TRANSFORM_STREAM_CHANGE
. This occurs after I set the pSample equal to my allocated output_sample
and call ProcessOutput()
, since this decoder requires you allocate your own sample. I tried resetting the output type using SetOutputType()
to what I had in my configure_decoder()
function, but alas I get a bad HRESULT. Not sure what to do next.
// libs
#pragma comment(lib, "D3D11.lib")
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mf.lib")
#pragma comment(lib, "evr.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "Winmm.lib")
// std
#include <iostream>
#include <string>
#include <fstream>
// Windows
#include <windows.h>
#include <atlbase.h>
// DirectX
#include <d3d11.h>
// Media Foundation
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mferror.h>
// Others
#include <chrono>
#include <thread>
#include <direct.h> // for mkdir()
#include <Codecapi.h> // for CODECAPI_AVDecVideoAcceleration_H264
#include <comdef.h>
// Custom
// Constants
constexpr UINT decode_width = 1920;
constexpr UINT decode_height = 1080;
HRESULT init_mf()
{
HRESULT hr = S_OK;
if (FAILED(hr = CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED)))
return hr;
if (FAILED(hr = MFStartup(MF_VERSION)))
return hr;
std::cout << "- Initialized Media Foundation" << std::endl;
return hr;
}
HRESULT init_dxgi(CComPtr<ID3D11Device>& out_device, CComPtr<ID3D11DeviceContext>& in_context)
{
HRESULT hr = S_OK;
if (FAILED(hr = D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, D3D11_CREATE_DEVICE_VIDEO_SUPPORT /*|
D3D11_CREATE_DEVICE_DEBUG*/, nullptr, 0, D3D11_SDK_VERSION, &out_device, nullptr, &in_context)))
return hr;
std::cout << "- Initialized DXGI" << std::endl;
return hr;
}
HRESULT get_decoder(CComPtr<IMFTransform>& out_transform, CComPtr<IMFActivate>& out_activate,
CComPtr<IMFAttributes>& out_attributes)
{
HRESULT hr = S_OK;
// Find the decoder
CComHeapPtr<IMFActivate*> activate_raw;
uint32_t activateCount = 0;
// Input & output types
const MFT_REGISTER_TYPE_INFO in_info = { MFMediaType_Video, MFVideoFormat_H264 };
const MFT_REGISTER_TYPE_INFO out_info = { MFMediaType_Video, MFVideoFormat_NV12 };
// Get decoders matching the specified attributes
if (FAILED(hr = MFTEnum2(MFT_CATEGORY_VIDEO_DECODER, MFT_ENUM_FLAG_SYNCMFT | MFT_ENUM_FLAG_SORTANDFILTER, &in_info, &out_info,
nullptr, &activate_raw, &activateCount)))
return hr;
// Choose the first returned decoder
out_activate = activate_raw[0];
// Memory management
for (int i = 1; i < activateCount; i++)
activate_raw[i]->Release();
// Activate
if (FAILED(hr = out_activate->ActivateObject(IID_PPV_ARGS(&out_transform))))
return hr;
// Get attributes
if (FAILED(hr = out_transform->GetAttributes(&out_attributes)))
return hr;
std::cout << "- get_decoder() Found " << activateCount << " decoders" << std::endl;
return hr;
}
HRESULT configure_decoder(const CComPtr<IMFTransform>& in_transform, CComPtr<IMFDXGIDeviceManager>& in_device_manager,
const DWORD in_input_stream_id, const DWORD output_stream_id
)
{
HRESULT hr = S_OK;
// Sets or clears the Direct3D Device Manager for DirectX Video Acceleration (DXVA).
/*if (FAILED(hr = in_transform->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, reinterpret_cast<ULONG_PTR>(in_device_manager.p))))
return hr;*/
// Input type, I have no idea how to do this
CComPtr<IMFMediaType> input_type;
/*if (FAILED(hr = MFCreateMediaType(&inputType)))
return hr;*/
if (FAILED(hr = in_transform->GetInputAvailableType(in_input_stream_id, 0, &input_type)))
return hr;
// Input type settings
if (FAILED(hr = input_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
return hr;
if (FAILED(hr = input_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264)))
return hr;
if (FAILED(hr = input_type->SetUINT32(MF_MT_AVG_BITRATE, 30000000)))
return hr;
if (FAILED(hr = MFSetAttributeSize(input_type, MF_MT_FRAME_SIZE, decode_width, decode_height)))
return hr;
if (FAILED(hr = MFSetAttributeRatio(input_type, MF_MT_FRAME_RATE, 60, 1)))
return hr;
if (FAILED(hr = input_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlaceMode::MFVideoInterlace_Progressive)))
return hr;
// Set input type
if (FAILED(hr = in_transform->SetInputType(in_input_stream_id, input_type, 0)))
return hr;
// Create output type
CComPtr<IMFMediaType> output_type;
if (FAILED(hr = MFCreateMediaType(&output_type)))
return hr;
// Set output type
if (FAILED(hr = output_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
return hr;
if (FAILED(hr = output_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12)))
return hr;
if (FAILED(hr = MFSetAttributeSize(output_type, MF_MT_FRAME_SIZE, decode_width, decode_height)))
return hr;
if (FAILED(hr = MFSetAttributeRatio(output_type, MF_MT_FRAME_RATE, 60, 1)))
return hr;
if (FAILED(hr = MFSetAttributeRatio(output_type, MF_MT_PIXEL_ASPECT_RATIO, 1, 1)))
return hr;
/*if (FAILED(hr = output_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlaceMode::MFVideoInterlace_Progressive)))
return hr;*/
// Set output type
if (FAILED(hr = in_transform->SetOutputType(output_stream_id, output_type, 0)))
return hr;
std::cout << "- Set decoder configuration" << std::endl;
// AMD decoder crashes on this line
/*DWORD flags;
if (FAILED(hr = inTransform->GetInputStatus(0, &flags)))
return hr;*/
return hr;
}
HRESULT configure_color_conversion(IMFTransform* in_color_transform)
{
HRESULT hr = S_OK;
CComPtr<IMFMediaType> input_type;
if (FAILED(hr = MFCreateMediaType(&input_type)))
return hr;
if (FAILED(hr = input_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
return hr;
if (FAILED(hr = input_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12)))
return hr;
if (FAILED(hr = MFSetAttributeSize(input_type, MF_MT_FRAME_SIZE, decode_width, decode_height)))
return hr;
if (FAILED(hr = in_color_transform->SetInputType(0, input_type, 0)))
return hr;
CComPtr<IMFMediaType> output_type;
if (FAILED(hr = MFCreateMediaType(&output_type)))
return hr;
if (FAILED(hr = output_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
return hr;
if (FAILED(hr = output_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_ARGB32)))
return hr;
if (FAILED(hr = MFSetAttributeSize(output_type, MF_MT_FRAME_SIZE, decode_width, decode_height)))
return hr;
if (FAILED(hr = in_color_transform->SetOutputType(0, output_type, 0)))
return hr;
return hr;
}
HRESULT color_convert(IMFTransform* in_transform, ID3D11Texture2D* in_texture, IMFSample** p_sample_out)
{
HRESULT hr = S_OK;
// Copy texture, since the one that desktop duplication generates can die at random
CD3D11_TEXTURE2D_DESC desc;
in_texture->GetDesc(&desc);
CComPtr<ID3D11Device> device;
in_texture->GetDevice(&device);
const CD3D11_TEXTURE2D_DESC copy_desc(desc.Format, desc.Width, desc.Height, 1, 1, D3D11_BIND_SHADER_RESOURCE |
D3D11_BIND_RENDER_TARGET);
CComPtr<ID3D11Texture2D> copy_texture;
device->CreateTexture2D(©_desc, nullptr, ©_texture);
CComPtr<ID3D11DeviceContext> device_context;
device->GetImmediateContext(&device_context);
device_context->CopyResource(copy_texture, in_texture);
in_texture = copy_texture;
// Create buffer
CComPtr<IMFMediaBuffer> input_buffer;
if (FAILED(hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), in_texture, 0, false, &input_buffer)))
return hr;
// Create sample
CComPtr<IMFSample> input_sample;
if (FAILED(hr = MFCreateSample(&input_sample)))
return hr;
if (FAILED(hr = input_sample->AddBuffer(input_buffer)))
return hr;
// Set input sample times
if (FAILED(hr = input_sample->SetSampleTime(100)))
return hr;
if (FAILED(hr = input_sample->SetSampleDuration(1000)))
return hr;
// Process input
if (FAILED(hr = in_transform->ProcessInput(0, input_sample, 0)))
return hr;
// Process output
DWORD status;
MFT_OUTPUT_DATA_BUFFER output_buffer;
output_buffer.pSample = nullptr;
output_buffer.pEvents = nullptr;
output_buffer.dwStreamID = 0;
output_buffer.dwStatus = 0;
MFT_OUTPUT_STREAM_INFO mft_stream_info;
ZeroMemory(&mft_stream_info, sizeof(MFT_OUTPUT_STREAM_INFO));
if (FAILED(hr = in_transform->GetOutputStreamInfo(0, &mft_stream_info)))
return hr;
ATLASSERT(mft_stream_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES);
if (FAILED(hr = in_transform->ProcessOutput(0, 1, &output_buffer, &status)))
return hr;
*p_sample_out = output_buffer.pSample;
// Test output to file
/*IMFMediaBuffer* buffer;
if (FAILED(hr = outputBuffer.pSample->ConvertToContiguousBuffer(&buffer)))
return hr;
unsigned char* data;
DWORD length;
if (FAILED(hr = buffer->GetCurrentLength(&length)))
return hr;
if (FAILED(hr = buffer->Lock(&data, nullptr, &length)))
return hr;
std::ofstream fout;
fout.open("raw.nv12", std::ios::binary | std::ios::out);
fout.write((char*)data, length);
fout.close();
if (FAILED(hr = buffer->Unlock()))
return hr;*/
// End test output
return hr;
}
int main()
{
HRESULT hr;
if (FAILED(hr = init_mf()))
return hr;
// Initialize DXGI
CComPtr<ID3D11Device> device;
CComPtr<ID3D11DeviceContext> context;
if (FAILED(hr = init_dxgi(device, context)))
return hr;
// Create device manager
CComPtr<IMFDXGIDeviceManager> device_manager;
UINT resetToken;
if (FAILED(hr = MFCreateDXGIDeviceManager(&resetToken, &device_manager)))
return hr;
// https://docs.microsoft.com/en-us/windows/win32/api/dxva2api/nf-dxva2api-idirect3ddevicemanager9-resetdevice
// When you first create the Direct3D device manager, call this method with a pointer to the Direct3D device.
if (FAILED(hr = device_manager->ResetDevice(device, resetToken)))
return hr;
// Get decoder
CComPtr<IMFTransform> decoder_transform;
CComPtr<IMFActivate> decoder_activate;
CComPtr<IMFAttributes> decoder_attributes;
CComQIPtr<IMFMediaEventGenerator> decoder_event_generator;
if (FAILED(hr = get_decoder(decoder_transform, decoder_activate, decoder_attributes)))
return hr;
// Get the name of the decoder
CComHeapPtr<wchar_t> friendly_name;
uint32_t friendly_name_length;
if (FAILED(hr = decoder_activate->GetAllocatedString(MFT_FRIENDLY_NAME_Attribute, &friendly_name, &friendly_name_length)))
return hr;
std::wcout << "- Selected decoder: " << static_cast<wchar_t const*>(friendly_name) << std::endl;
// Enable hardware acceleration
if (FAILED(hr = decoder_attributes->SetUINT32(CODECAPI_AVDecVideoAcceleration_H264, true)))
return hr;
// Enable low-latency mode - otherwise the decoder will require many input frames before it's able produce any output
if (FAILED(hr = decoder_attributes->SetUINT32(CODECAPI_AVLowLatencyMode, true)))
return hr;
// Get decoder stream IDs
DWORD input_stream_id, output_stream_id;
hr = decoder_transform->GetStreamIDs(1, &input_stream_id, 1, &output_stream_id);
if (hr == E_NOTIMPL) // Doesn't mean failed, see remarks
{ // https://docs.microsoft.com/en-us/windows/win32/api/mftransform/nf-mftransform-imftransform-getstreamids
input_stream_id = 0;
output_stream_id = 0;
hr = S_OK;
}
if (FAILED(hr))
return hr;
// Init decoder-related objects/variables
if (FAILED(hr = configure_decoder(decoder_transform, device_manager, input_stream_id, output_stream_id)))
return hr;
// Apparently you can do this, idek man
decoder_event_generator = decoder_transform;
// Init color conversion-related objects/variables
IMFTransform* color_transform;
if (FAILED(hr = CoCreateInstance(CLSID_VideoProcessorMFT, nullptr, CLSCTX_INPROC_SERVER,
IID_IMFTransform, (void**)&color_transform)))
return hr;
if (FAILED(hr = color_transform->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, reinterpret_cast<ULONG_PTR>(device_manager.p))))
return hr;
if (FAILED(hr = configure_color_conversion(color_transform)))
return hr;
if (FAILED(hr = decoder_transform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL)))
return hr;
if (FAILED(hr = decoder_transform->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL)))
return hr;
if (FAILED(hr = decoder_transform->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL)))
return hr;
int frameIndex = 1;
do
{
// Read frame data from .h264 file
std::ifstream fin("..\\CaptureAndEncode\\Encoded Frames\\frame" + std::to_string(frameIndex) + ".h264", std::ios::binary |
std::ios::in);
if (!fin)
throw std::runtime_error("Invalid file path specified");
// Get file length
fin.seekg(0, std::ios::end);
size_t length = fin.tellg();
fin.seekg(0, std::ios::beg);
// Variables
CComPtr<IMFMediaBuffer> input_buffer;
CComPtr<IMFSample> input_sample;
DWORD cbMaxLength, cbCurrentLength;
BYTE* pBuffer;
// Create memory buffer and make the underlying array readable/writeable by using Lock()
if (FAILED(hr = MFCreateMemoryBuffer(length, &input_buffer)))
return hr;
if (FAILED(hr = input_buffer->Lock(&pBuffer, &cbMaxLength, &cbCurrentLength)))
return hr;
// Copy frame data from file to array;
fin.read((char*)pBuffer, length);
fin.close();
// Unlock it again (no longer readable/writeable)
if (FAILED(hr = input_buffer->Unlock()))
return hr;
if (FAILED(hr = input_buffer->SetCurrentLength(length)))
return hr;
// Create sample and add the buffer to it
if (FAILED(hr = MFCreateSample(&input_sample)))
return hr;
if (FAILED(hr = input_sample->AddBuffer(input_buffer)))
return hr;
// Process input
hr = decoder_transform->ProcessInput(0, input_sample, 0);
if (SUCCEEDED(hr))
std::cout << "decoder_transform::ProcessInput() - SUCCESS" << std::endl;
else if (hr == MF_E_NOTACCEPTING)
std::cout << "decoder_transform::ProcessInput() - MF_E_NOTACCEPTING" << std::endl;
else
std::cout << "decoder_transform::ProcessInput() - ERROR" << std::endl;
// Output H264 -> NV12
DWORD status;
MFT_OUTPUT_DATA_BUFFER output_buffer;
output_buffer.pSample = nullptr;
output_buffer.pEvents = nullptr;
output_buffer.dwStreamID = 0;
output_buffer.dwStatus = 0;
MFT_OUTPUT_STREAM_INFO mft_stream_info;
ZeroMemory(&mft_stream_info, sizeof(MFT_OUTPUT_STREAM_INFO));
if (FAILED(hr = decoder_transform->GetOutputStreamInfo(0, &mft_stream_info)))
return hr;
if ((mft_stream_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) == 0 &&
(mft_stream_info.dwFlags & MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES) == 0)
{
std::cout << "This decoder requires that we allocate the output sample ourselves" << std::endl;
CComPtr<IMFSample> output_sample;
CComPtr<IMFMediaBuffer> output_media_buffer;
if (FAILED(hr = MFCreateSample(&output_sample)))
return hr;
if (FAILED(hr = MFCreateMemoryBuffer(mft_stream_info.cbSize, &output_media_buffer)))
return hr;
if (FAILED(hr = output_sample->AddBuffer(output_media_buffer)))
return hr;
output_buffer.pSample = output_sample.Detach();
}
hr = decoder_transform->ProcessOutput(0, 1, &output_buffer, &status);
if (SUCCEEDED(hr))
{
std::cout << "decoder_transform::ProcessOutput() - SUCCESS" << std::endl;
// Test output to file
IMFMediaBuffer* buffer;
if (FAILED(hr = output_buffer.pSample->ConvertToContiguousBuffer(&buffer)))
return hr;
unsigned char* data;
DWORD length;
if (FAILED(hr = buffer->GetCurrentLength(&length)))
return hr;
if (FAILED(hr = buffer->Lock(&data, nullptr, &length)))
return hr;
std::ofstream fout;
fout.open("raw.nv12", std::ios::binary | std::ios::out);
fout.write((char*)data, length);
fout.close();
if (FAILED(hr = buffer->Unlock()))
return hr;
// End test output
}
else if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT)
std::cout << "decoder_transform::ProcessOutput() - MF_E_TRANSFORM_NEED_MORE_INPUT" << std::endl;
else if (hr == E_INVALIDARG)
std::cout << "decoder_transform::ProcessOutput() - E_INVALIDARG" << std::endl;
else if (hr == MF_E_TRANSFORM_STREAM_CHANGE)
{
std::cout << "decoder_transform::ProcessOutput() - MF_E_TRANSFORM_STREAM_CHANGE" << std::endl;
CComPtr<IMFMediaType> output_type;
if (FAILED(hr = decoder_transform->GetOutputAvailableType(0, 0, &output_type)))
return hr;
// Create output type
/*if (FAILED(hr = MFCreateMediaType(&output_type)))
return hr;*/
// Reconfigure output settings
if (FAILED(hr = output_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)))
return hr;
if (FAILED(hr = output_type->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_NV12)))
return hr;
if (FAILED(hr = MFSetAttributeRatio(output_type, MF_MT_PIXEL_ASPECT_RATIO, 1, 1)))
return hr;
// Set output type
if (FAILED(hr = decoder_transform->SetOutputType(output_stream_id, output_type, 0)))
return hr;
}
frameIndex++;
input_sample.Release();
} while (frameIndex <= 60);
return hr;
}
You just need to follow this at Handling Stream Changes:
- The client calls
IMFTransform::GetOutputAvailableType
. This method returns an updated set of output types.- The client calls
SetOutputType
to set a new output type.- The client resumes calling
ProcessInput
/ProcessOutput
.
In the question body above you are trying to do 3 without doing 2. Most likely your media type is somewhat different from MFT's so it is likely to reject it and it blocks the processing until this is resolved.