dxva output sync

🧩 Syntax:
// INF_PATCH[experimental]
#if 1
static void Flush(ID3D11VideoContext *video_context)
{
    ID3D11DeviceContext *ctx_d3d11 = NULL;
    ID3D11VideoContext_QueryInterface(video_context, &IID_ID3D11DeviceContext, (void**) &ctx_d3d11);
    
    if (ctx_d3d11)
       ID3D11DeviceContext_Flush(ctx_d3d11);

   static ID3D11Query* pQuery = NULL;

   if (!pQuery)
   {
      D3D11_QUERY_DESC queryDesc;
      queryDesc.Query = D3D11_QUERY_EVENT;
      queryDesc.MiscFlags = 0;

      ID3D11Device* pDevice = NULL;
      ID3D11DeviceContext_GetDevice(ctx_d3d11, &pDevice);

      ID3D11Device_CreateQuery(pDevice, &queryDesc, &pQuery);
    }

    ID3D11DeviceContext_End(ctx_d3d11, pQuery);
    UINT64 data;

    while (ID3D11DeviceContext_GetData(ctx_d3d11, pQuery, &data, sizeof(data), 0) == S_FALSE)
    {
        // Wait for the GPU to finish executing all commands.
        SwitchToThread();
    }
}
#endif // INF_PATCH[experimental]


int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
                              const void *pp, unsigned pp_size,
                              const void *qm, unsigned qm_size,
                              int (*commit_bs_si)(AVCodecContext *,
                                                  DECODER_BUFFER_DESC *bs,
                                                  DECODER_BUFFER_DESC *slice))
{
    AVDXVAContext *ctx = DXVA_CONTEXT(avctx);
    unsigned               buffer_count = 0;
#if CONFIG_D3D11VA
    D3D11_VIDEO_DECODER_BUFFER_DESC buffer11[4];
#endif
#if CONFIG_DXVA2
    DXVA2_DecodeBufferDesc          buffer2[4];
#endif
    DECODER_BUFFER_DESC             *buffer = NULL, *buffer_slice = NULL;
    int result, runs = 0;
    HRESULT hr;
    unsigned type;
    FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);

    if (sctx->decoder_ref) {
        result = frame_add_buf(frame, sctx->decoder_ref);
        if (result < 0)
            return result;
    }

    do {
        ff_dxva2_lock(avctx);
#if CONFIG_D3D11VA
        if (ff_dxva2_is_d3d11(avctx))
            hr = ID3D11VideoContext_DecoderBeginFrame(D3D11VA_CONTEXT(ctx)->video_context, D3D11VA_CONTEXT(ctx)->decoder,
                                                      get_surface(avctx, frame),
                                                      0, NULL);
#endif
#if CONFIG_DXVA2
        if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
            hr = IDirectXVideoDecoder_BeginFrame(DXVA2_CONTEXT(ctx)->decoder,
                                                 get_surface(avctx, frame),
                                                 NULL);
#endif
        if (hr != E_PENDING || ++runs > 50)
            break;
        ff_dxva2_unlock(avctx);
        av_usleep(2000);
    } while(1);

    if (FAILED(hr)) {
        av_log(avctx, AV_LOG_ERROR, "Failed to begin frame: 0x%x\n", (unsigned)hr);
        ff_dxva2_unlock(avctx);
        return -1;
    }

#if CONFIG_D3D11VA
    if (ff_dxva2_is_d3d11(avctx)) {
        buffer = &buffer11[buffer_count];
        type = D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS;
    }
#endif
#if CONFIG_DXVA2
    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
        buffer = &buffer2[buffer_count];
        type = DXVA2_PictureParametersBufferType;
    }
#endif
    result = ff_dxva2_commit_buffer(avctx, ctx, buffer,
                                    type,
                                    pp, pp_size, 0);
    if (result) {
        av_log(avctx, AV_LOG_ERROR,
               "Failed to add picture parameter buffer\n");
        goto end;
    }
    buffer_count++;

    if (qm_size > 0) {
#if CONFIG_D3D11VA
        if (ff_dxva2_is_d3d11(avctx)) {
            buffer = &buffer11[buffer_count];
            type = D3D11_VIDEO_DECODER_BUFFER_INVERSE_QUANTIZATION_MATRIX;
        }
#endif
#if CONFIG_DXVA2
        if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
            buffer = &buffer2[buffer_count];
            type = DXVA2_InverseQuantizationMatrixBufferType;
        }
#endif
        result = ff_dxva2_commit_buffer(avctx, ctx, buffer,
                                        type,
                                        qm, qm_size, 0);
        if (result) {
            av_log(avctx, AV_LOG_ERROR,
                   "Failed to add inverse quantization matrix buffer\n");
            goto end;
        }
        buffer_count++;
    }

#if CONFIG_D3D11VA
    if (ff_dxva2_is_d3d11(avctx)) {
        buffer       = &buffer11[buffer_count + 0];
        buffer_slice = &buffer11[buffer_count + 1];
    }
#endif
#if CONFIG_DXVA2
    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
        buffer       = &buffer2[buffer_count + 0];
        buffer_slice = &buffer2[buffer_count + 1];
    }
#endif

    result = commit_bs_si(avctx,
                          buffer,
                          buffer_slice);
    if (result) {
        av_log(avctx, AV_LOG_ERROR,
               "Failed to add bitstream or slice control buffer\n");
        goto end;
    }
    buffer_count += 2;

    /* TODO Film Grain when possible */

    assert(buffer_count == 1 + (qm_size > 0) + 2);

#if CONFIG_D3D11VA
    if (ff_dxva2_is_d3d11(avctx))
        hr = ID3D11VideoContext_SubmitDecoderBuffers(D3D11VA_CONTEXT(ctx)->video_context,
                                                     D3D11VA_CONTEXT(ctx)->decoder,
                                                     buffer_count, buffer11);
#endif
#if CONFIG_DXVA2
    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
        DXVA2_DecodeExecuteParams exec = {
            .NumCompBuffers     = buffer_count,
            .pCompressedBuffers = buffer2,
            .pExtensionData     = NULL,
        };
        hr = IDirectXVideoDecoder_Execute(DXVA2_CONTEXT(ctx)->decoder, &exec);
    }
#endif
    if (FAILED(hr)) {
        av_log(avctx, AV_LOG_ERROR, "Failed to execute: 0x%x\n", (unsigned)hr);
        result = -1;
    }

end:
#if CONFIG_D3D11VA
    if (ff_dxva2_is_d3d11(avctx))
        hr = ID3D11VideoContext_DecoderEndFrame(D3D11VA_CONTEXT(ctx)->video_context, D3D11VA_CONTEXT(ctx)->decoder);
#endif
#if CONFIG_DXVA2
    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
        hr = IDirectXVideoDecoder_EndFrame(DXVA2_CONTEXT(ctx)->decoder, NULL);
#endif
    ff_dxva2_unlock(avctx);
    if (FAILED(hr)) {
        av_log(avctx, AV_LOG_ERROR, "Failed to end frame: 0x%x\n", (unsigned)hr);
        result = -1;
    }

    // INF_PATCH[experimental]

#if 1
   
    ID3D11DeviceContext4 *ctx_d3d11 = NULL;
    ID3D11DeviceContext4_QueryInterface(D3D11VA_CONTEXT(ctx)->video_context, &IID_ID3D11DeviceContext4, (void**) &ctx_d3d11);

    static ID3D11Fence* fence = NULL;

    if (!fence)
    {
       ID3D11Device* device = NULL;
       ID3D11DeviceContext_GetDevice(ctx_d3d11, &device);

       ID3D11Device5* device_5 = NULL;
       ID3D11Device_QueryInterface(device, &IID_ID3D11Device5, (void**)&device_5);

       ID3D11Device5_CreateFence(device_5, 0, D3D11_FENCE_FLAG_NONE, &IID_ID3D11Fence, (void**) &fence);
     }
     
     static UINT64 video_fence_counter = 0;
     video_fence_counter++;

     static HANDLE hevent_app = 0;

     if (!hevent_app)
     {
         SECURITY_ATTRIBUTES sa;
         sa.nLength = sizeof(sa);
         sa.lpSecurityDescriptor = NULL;
         sa.bInheritHandle = TRUE;
         hevent_app = CreateEvent(&sa, FALSE, FALSE, NULL);
         
         // Obtain an event handle valid in the parent process - probably have to CreateEvent in parent & pass down here like we do with the other one.
         // HANDLE hevent_parent;
         //::DuplicateHandle(::GetCurrentProcess(), hevent_app, m_process_handle, &hevent_parent, 0, FALSE, DUPLICATE_SAME_ACCESS);
     }

     ID3D11Fence_SetEventOnCompletion(fence,  video_fence_counter, hevent_app);

     ID3D11DeviceContext4_Signal(ctx_d3d11, fence, video_fence_counter);

     Flush(D3D11VA_CONTEXT(ctx)->video_context);

     // TODO: Wait in DX12 engine prior to useage of associated surface.
     // We will need 1 fence/event per reference frame surface.

     // First pass testing. Busy wait immediately after issuing 
     WaitForSingleObjectEx(hevent_app, INFINITE, FALSE);

#endif

    return result;
}