avcodec/nvenc: fix hw accelerated transcode with bframes
hw accelerated transcode (h264_cuvid -> h264_nvenc with -hwaccel cuvid) was
broken after the filtergraph initialization was changed to intialize decoder
first followed by encoder (commit af1761f7b5b1b72197dc40934953b775c2d951cc).
During initialzing encoder with bframes, local buffers are allocated
internally in encoder which fails since no cuda context is available. Now
pushing the correct cuda context before encoder initialization fixes the issue.
Also adding push/pop cuda ctx during create/destroy/map/unmap resources and
destroy encoder session.
Signed-off-by: Timo Rothenpieler <[email protected]>
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 160e642..f79b9a5 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -392,9 +392,21 @@
return 0;
fail3:
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
ctx->nvencoder = NULL;
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
fail2:
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
ctx->cu_context_internal = NULL;
@@ -1008,6 +1020,8 @@
NV_ENC_PRESET_CONFIG preset_config = { 0 };
NVENCSTATUS nv_status = NV_ENC_SUCCESS;
AVCPBProperties *cpb_props;
+ CUresult cu_res;
+ CUcontext dummy;
int res = 0;
int dw, dh;
@@ -1098,7 +1112,20 @@
if (res)
return res;
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
+
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
if (nv_status != NV_ENC_SUCCESS) {
return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
}
@@ -1201,6 +1228,9 @@
static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
+ NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+ CUresult cu_res;
+ CUcontext dummy;
int i, res;
ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces));
@@ -1222,9 +1252,28 @@
if (!ctx->output_surface_ready_queue)
return AVERROR(ENOMEM);
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
for (i = 0; i < ctx->nb_surfaces; i++) {
if ((res = nvenc_alloc_surface(avctx, i)) < 0)
+ {
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
return res;
+ }
+ }
+
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
}
return 0;
@@ -1268,8 +1317,16 @@
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+ CUresult cu_res;
+ CUcontext dummy;
int i;
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
/* the encoder has to be flushed before it can be closed */
if (ctx->nvencoder) {
NV_ENC_PIC_PARAMS params = { .version = NV_ENC_PIC_PARAMS_VER,
@@ -1311,6 +1368,12 @@
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
ctx->nvencoder = NULL;
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
if (ctx->cu_context_internal)
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
ctx->cu_context = ctx->cu_context_internal = NULL;
@@ -1842,8 +1905,20 @@
if (output_ready(avctx, !frame)) {
av_fifo_generic_read(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
res = process_output_surface(avctx, pkt, tmpoutsurf);
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
if (res)
return res;