avcodec/nvenc: surface allocation reduction This patch aims to reduce the number of input/output surfaces NVENC allocates per session. Previous default sets allocated surfaces to 32 (unless there is user specified param or lookahead involved). Having large number of surfaces consumes extra video memory (esp for higher resolution encoding). The patch changes the surfaces calculation for default, B-frames, lookahead scenario respectively. The other change involves surface selection. Previously, if a session allocates x surfaces, only x-1 surfaces are used (due to combination of output delay and lock toggle logic). To prevent unused surfaces, changing surface rotation to using predefined fifo. Signed-off-by: Timo Rothenpieler <[email protected]>

commit: 8de3458a07376b0a96772e586b6dba5e93432f52 [log] [tgz]
author: Ben Chang <[email protected]> Tue Apr 25 21:57:56 2017
committer: Timo Rothenpieler <[email protected]> Wed Apr 26 19:57:54 2017
tree: 1349f65fd1d08cdc1f1dc7372cf4027b8a2d3af4
parent: 78a5fc4579deb63e1e6b93cd4d6e2ec2dceff931 [diff] [blame]
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index cf05455..00766c2 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c

@@ -644,16 +644,34 @@
 static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx)
 {
     NvencContext *ctx = avctx->priv_data;
-    int nb_surfaces = 0;
+    // default minimum of 4 surfaces
+    // multiply by 2 for number of NVENCs on gpu (hardcode to 2)
+    // another multiply by 2 to avoid blocking next PBB group
+    int nb_surfaces = FFMAX(4, ctx->encode_config.frameIntervalP * 2 * 2);
 
+    // lookahead enabled
     if (ctx->rc_lookahead > 0) {
-        nb_surfaces = ctx->rc_lookahead + ((ctx->encode_config.frameIntervalP > 0) ? ctx->encode_config.frameIntervalP : 0) + 1 + 4;
-        if (ctx->nb_surfaces < nb_surfaces) {
+        // +1 is to account for lkd_bound calculation later
+        // +4 is to allow sufficient pipelining with lookahead
+        nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->encode_config.frameIntervalP + 1 + 4));
+        if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0)
+        {
             av_log(avctx, AV_LOG_WARNING,
                    "Defined rc_lookahead requires more surfaces, "
                    "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
-            ctx->nb_surfaces = nb_surfaces;
         }
+        ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces);
+    } else {
+        if (ctx->encode_config.frameIntervalP > 1 && ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0)
+        {
+            av_log(avctx, AV_LOG_WARNING,
+                   "Defined b-frame requires more surfaces, "
+                   "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
+            ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces);
+        }
+        else if (ctx->nb_surfaces <= 0)
+            ctx->nb_surfaces = nb_surfaces;
+        // otherwise use user specified value
     }
 
     ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces));
@@ -1086,6 +1104,7 @@
     NvencContext *ctx = avctx->priv_data;
     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+    NvencSurface* tmp_surface = &ctx->surfaces[idx];
 
     NVENCSTATUS nv_status;
     NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
@@ -1121,8 +1140,6 @@
         ctx->surfaces[idx].height = allocSurf.height;
     }
 
-    ctx->surfaces[idx].lockCount = 0;
-
     /* 1MB is large enough to hold most output frames.
      * NVENC increases this automaticaly if it is not enough. */
     allocOut.size = 1024 * 1024;
@@ -1141,6 +1158,8 @@
     ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer;
     ctx->surfaces[idx].size = allocOut.size;
 
+    av_fifo_generic_write(ctx->unused_surface_queue, &tmp_surface, sizeof(tmp_surface), NULL);
+
     return 0;
 }
 
@@ -1156,6 +1175,11 @@
     ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
     if (!ctx->timestamp_list)
         return AVERROR(ENOMEM);
+
+    ctx->unused_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
+    if (!ctx->unused_surface_queue)
+        return AVERROR(ENOMEM);
+
     ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
     if (!ctx->output_surface_queue)
         return AVERROR(ENOMEM);
@@ -1222,6 +1246,7 @@
     av_fifo_freep(&ctx->timestamp_list);
     av_fifo_freep(&ctx->output_surface_ready_queue);
     av_fifo_freep(&ctx->output_surface_queue);
+    av_fifo_freep(&ctx->unused_surface_queue);
 
     if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) {
         for (i = 0; i < ctx->nb_surfaces; ++i) {
@@ -1305,16 +1330,14 @@
 
 static NvencSurface *get_free_frame(NvencContext *ctx)
 {
-    int i;
+    NvencSurface *tmp_surf;
 
-    for (i = 0; i < ctx->nb_surfaces; i++) {
-        if (!ctx->surfaces[i].lockCount) {
-            ctx->surfaces[i].lockCount = 1;
-            return &ctx->surfaces[i];
-        }
-    }
+    if (!(av_fifo_size(ctx->unused_surface_queue) > 0))
+        // queue empty
+        return NULL;
 
-    return NULL;
+    av_fifo_generic_read(ctx->unused_surface_queue, &tmp_surf, sizeof(tmp_surf), NULL);
+    return tmp_surf;
 }
 
 static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface,
@@ -1712,7 +1735,6 @@
         }
 
         if (res) {
-            inSurf->lockCount = 0;
             return res;
         }
 
@@ -1790,8 +1812,7 @@
         if (res)
             return res;
 
-        av_assert0(tmpoutsurf->lockCount);
-        tmpoutsurf->lockCount--;
+        av_fifo_generic_write(ctx->unused_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
 
         *got_packet = 1;
     } else {
commit	8de3458a07376b0a96772e586b6dba5e93432f52	[log] [tgz]
author	Ben Chang <[email protected]>	Tue Apr 25 21:57:56 2017
committer	Timo Rothenpieler <[email protected]>	Wed Apr 26 19:57:54 2017
tree	1349f65fd1d08cdc1f1dc7372cf4027b8a2d3af4
parent	78a5fc4579deb63e1e6b93cd4d6e2ec2dceff931 [diff] [blame]