blob: 9aa80f8aff1829aa1b3b2997ef035c635acb4225 [file] [log] [blame]
Timo Rothenpieler2a428db2014-11-29 23:04:371/*
2 * H.264 hardware encoding using nvidia nvenc
3 * Copyright (c) 2014 Timo Rothenpieler <[email protected]>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
Timo Rothenpieler1efdb0a2014-12-25 13:55:3122#if defined(_WIN32)
Timo Rothenpieler2a428db2014-11-29 23:04:3723#include <windows.h>
24#else
25#include <dlfcn.h>
26#endif
27
Timo Rothenpieler2a428db2014-11-29 23:04:3728#include <nvEncodeAPI.h>
29
Andrey Turkincfb49fc2016-05-20 16:13:2030#include "libavutil/fifo.h"
Timo Rothenpieler2a428db2014-11-29 23:04:3731#include "libavutil/internal.h"
32#include "libavutil/imgutils.h"
33#include "libavutil/avassert.h"
34#include "libavutil/opt.h"
35#include "libavutil/mem.h"
Andrey Turkina8cf25d2016-05-20 22:08:0636#include "libavutil/hwcontext.h"
Timo Rothenpieler2a428db2014-11-29 23:04:3737#include "avcodec.h"
38#include "internal.h"
39#include "thread.h"
40
Andrey Turkina8cf25d2016-05-20 22:08:0641
42#if CONFIG_CUDA
43#include <cuda.h>
44#include "libavutil/hwcontext_cuda.h"
45#else
46
Timo Rothenpieler1efdb0a2014-12-25 13:55:3147#if defined(_WIN32)
Timo Rothenpieler2a428db2014-11-29 23:04:3748#define CUDAAPI __stdcall
49#else
50#define CUDAAPI
51#endif
52
Andrey Turkina8cf25d2016-05-20 22:08:0653typedef enum cudaError_enum {
54 CUDA_SUCCESS = 0
55} CUresult;
56typedef int CUdevice;
57typedef void* CUcontext;
58typedef void* CUdeviceptr;
59#endif
60
Timo Rothenpieler1efdb0a2014-12-25 13:55:3161#if defined(_WIN32)
Timo Rothenpieler2a428db2014-11-29 23:04:3762#define LOAD_FUNC(l, s) GetProcAddress(l, s)
63#define DL_CLOSE_FUNC(l) FreeLibrary(l)
64#else
65#define LOAD_FUNC(l, s) dlsym(l, s)
66#define DL_CLOSE_FUNC(l) dlclose(l)
67#endif
68
Timo Rothenpieler2a428db2014-11-29 23:04:3769typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags);
70typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count);
71typedef CUresult(CUDAAPI *PCUDEVICEGET)(CUdevice *device, int ordinal);
72typedef CUresult(CUDAAPI *PCUDEVICEGETNAME)(char *name, int len, CUdevice dev);
73typedef CUresult(CUDAAPI *PCUDEVICECOMPUTECAPABILITY)(int *major, int *minor, CUdevice dev);
74typedef CUresult(CUDAAPI *PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, CUdevice dev);
75typedef CUresult(CUDAAPI *PCUCTXPOPCURRENT)(CUcontext *pctx);
76typedef CUresult(CUDAAPI *PCUCTXDESTROY)(CUcontext ctx);
77
78typedef NVENCSTATUS (NVENCAPI* PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList);
79
Andrey Turkina8cf25d2016-05-20 22:08:0680#define MAX_REGISTERED_FRAMES 64
Andrey Turkine1691c42016-05-20 15:37:0081typedef struct NvencSurface
Timo Rothenpieler2a428db2014-11-29 23:04:3782{
83 NV_ENC_INPUT_PTR input_surface;
Andrey Turkina8cf25d2016-05-20 22:08:0684 AVFrame *in_ref;
85 NV_ENC_MAP_INPUT_RESOURCE in_map;
86 int reg_idx;
Timo Rothenpieler2a428db2014-11-29 23:04:3787 int width;
88 int height;
89
90 int lockCount;
91
92 NV_ENC_BUFFER_FORMAT format;
Timo Rothenpieler2a428db2014-11-29 23:04:3793
Timo Rothenpieler2a428db2014-11-29 23:04:3794 NV_ENC_OUTPUT_PTR output_surface;
95 int size;
Andrey Turkine1691c42016-05-20 15:37:0096} NvencSurface;
Timo Rothenpieler2a428db2014-11-29 23:04:3797
98typedef struct NvencData
99{
100 union {
101 int64_t timestamp;
Andrey Turkine1691c42016-05-20 15:37:00102 NvencSurface *surface;
Timo Rothenpieler550e8722015-07-14 20:58:54103 } u;
Timo Rothenpieler2a428db2014-11-29 23:04:37104} NvencData;
105
Timo Rothenpieler2a428db2014-11-29 23:04:37106typedef struct NvencDynLoadFunctions
107{
108 PCUINIT cu_init;
109 PCUDEVICEGETCOUNT cu_device_get_count;
110 PCUDEVICEGET cu_device_get;
111 PCUDEVICEGETNAME cu_device_get_name;
112 PCUDEVICECOMPUTECAPABILITY cu_device_compute_capability;
113 PCUCTXCREATE cu_ctx_create;
114 PCUCTXPOPCURRENT cu_ctx_pop_current;
115 PCUCTXDESTROY cu_ctx_destroy;
116
117 NV_ENCODE_API_FUNCTION_LIST nvenc_funcs;
118 int nvenc_device_count;
119 CUdevice nvenc_devices[16];
120
Andrey Turkina8cf25d2016-05-20 22:08:06121#if !CONFIG_CUDA
Timo Rothenpieler1efdb0a2014-12-25 13:55:31122#if defined(_WIN32)
Timo Rothenpieler2a428db2014-11-29 23:04:37123 HMODULE cuda_lib;
Timo Rothenpieler2a428db2014-11-29 23:04:37124#else
125 void* cuda_lib;
Andrey Turkina8cf25d2016-05-20 22:08:06126#endif
127#endif
128#if defined(_WIN32)
129 HMODULE nvenc_lib;
130#else
Timo Rothenpieler2a428db2014-11-29 23:04:37131 void* nvenc_lib;
132#endif
133} NvencDynLoadFunctions;
134
Timo Rothenpieler7b0689c2015-04-04 11:34:14135typedef struct NvencValuePair
136{
137 const char *str;
138 uint32_t num;
139} NvencValuePair;
140
Timo Rothenpieler2a428db2014-11-29 23:04:37141typedef struct NvencContext
142{
143 AVClass *avclass;
144
145 NvencDynLoadFunctions nvenc_dload_funcs;
146
147 NV_ENC_INITIALIZE_PARAMS init_encode_params;
148 NV_ENC_CONFIG encode_config;
149 CUcontext cu_context;
Andrey Turkina8cf25d2016-05-20 22:08:06150 CUcontext cu_context_internal;
Timo Rothenpieler2a428db2014-11-29 23:04:37151
152 int max_surface_count;
Andrey Turkine1691c42016-05-20 15:37:00153 NvencSurface *surfaces;
Timo Rothenpieler2a428db2014-11-29 23:04:37154
Andrey Turkincfb49fc2016-05-20 16:13:20155 AVFifoBuffer *output_surface_queue;
156 AVFifoBuffer *output_surface_ready_queue;
157 AVFifoBuffer *timestamp_list;
Timo Rothenpieler2a428db2014-11-29 23:04:37158 int64_t last_dts;
159
Andrey Turkina8cf25d2016-05-20 22:08:06160 struct {
161 CUdeviceptr ptr;
162 NV_ENC_REGISTERED_PTR regptr;
163 int mapped;
164 } registered_frames[MAX_REGISTERED_FRAMES];
165 int nb_registered_frames;
166
167 /* the actual data pixel format, different from
168 * AVCodecContext.pix_fmt when using hwaccel frames on input */
169 enum AVPixelFormat data_pix_fmt;
170
Timo Rothenpieler2a428db2014-11-29 23:04:37171 void *nvencoder;
172
173 char *preset;
Timo Rothenpieler764f87b2015-04-01 22:04:07174 char *profile;
Timo Rothenpieler7b0689c2015-04-04 11:34:14175 char *level;
176 char *tier;
Timo Rothenpieler2a428db2014-11-29 23:04:37177 int cbr;
178 int twopass;
Timo Rothenpieler2a428db2014-11-29 23:04:37179 int gpu;
Timo Rothenpieler9f4bff82015-07-25 21:20:28180 int buffer_delay;
Timo Rothenpieler2a428db2014-11-29 23:04:37181} NvencContext;
182
Timo Rothenpieler7b0689c2015-04-04 11:34:14183static const NvencValuePair nvenc_h264_level_pairs[] = {
184 { "auto", NV_ENC_LEVEL_AUTOSELECT },
185 { "1" , NV_ENC_LEVEL_H264_1 },
186 { "1.0" , NV_ENC_LEVEL_H264_1 },
187 { "1b" , NV_ENC_LEVEL_H264_1b },
188 { "1.0b", NV_ENC_LEVEL_H264_1b },
189 { "1.1" , NV_ENC_LEVEL_H264_11 },
190 { "1.2" , NV_ENC_LEVEL_H264_12 },
191 { "1.3" , NV_ENC_LEVEL_H264_13 },
192 { "2" , NV_ENC_LEVEL_H264_2 },
193 { "2.0" , NV_ENC_LEVEL_H264_2 },
194 { "2.1" , NV_ENC_LEVEL_H264_21 },
195 { "2.2" , NV_ENC_LEVEL_H264_22 },
196 { "3" , NV_ENC_LEVEL_H264_3 },
197 { "3.0" , NV_ENC_LEVEL_H264_3 },
198 { "3.1" , NV_ENC_LEVEL_H264_31 },
199 { "3.2" , NV_ENC_LEVEL_H264_32 },
200 { "4" , NV_ENC_LEVEL_H264_4 },
201 { "4.0" , NV_ENC_LEVEL_H264_4 },
202 { "4.1" , NV_ENC_LEVEL_H264_41 },
203 { "4.2" , NV_ENC_LEVEL_H264_42 },
204 { "5" , NV_ENC_LEVEL_H264_5 },
205 { "5.0" , NV_ENC_LEVEL_H264_5 },
206 { "5.1" , NV_ENC_LEVEL_H264_51 },
207 { NULL }
208};
209
Philip Langdalee79c40f2015-06-06 18:00:45210static const NvencValuePair nvenc_hevc_level_pairs[] = {
Timo Rothenpieler7b0689c2015-04-04 11:34:14211 { "auto", NV_ENC_LEVEL_AUTOSELECT },
212 { "1" , NV_ENC_LEVEL_HEVC_1 },
213 { "1.0" , NV_ENC_LEVEL_HEVC_1 },
214 { "2" , NV_ENC_LEVEL_HEVC_2 },
215 { "2.0" , NV_ENC_LEVEL_HEVC_2 },
216 { "2.1" , NV_ENC_LEVEL_HEVC_21 },
217 { "3" , NV_ENC_LEVEL_HEVC_3 },
218 { "3.0" , NV_ENC_LEVEL_HEVC_3 },
219 { "3.1" , NV_ENC_LEVEL_HEVC_31 },
220 { "4" , NV_ENC_LEVEL_HEVC_4 },
221 { "4.0" , NV_ENC_LEVEL_HEVC_4 },
222 { "4.1" , NV_ENC_LEVEL_HEVC_41 },
223 { "5" , NV_ENC_LEVEL_HEVC_5 },
224 { "5.0" , NV_ENC_LEVEL_HEVC_5 },
225 { "5.1" , NV_ENC_LEVEL_HEVC_51 },
226 { "5.2" , NV_ENC_LEVEL_HEVC_52 },
227 { "6" , NV_ENC_LEVEL_HEVC_6 },
228 { "6.0" , NV_ENC_LEVEL_HEVC_6 },
229 { "6.1" , NV_ENC_LEVEL_HEVC_61 },
230 { "6.2" , NV_ENC_LEVEL_HEVC_62 },
231 { NULL }
232};
233
Andrey Turkine1691c42016-05-20 15:37:00234static const struct {
235 NVENCSTATUS nverr;
236 int averr;
237 const char *desc;
238} nvenc_errors[] = {
239 { NV_ENC_SUCCESS, 0, "success" },
240 { NV_ENC_ERR_NO_ENCODE_DEVICE, AVERROR(ENOENT), "no encode device" },
241 { NV_ENC_ERR_UNSUPPORTED_DEVICE, AVERROR(ENOSYS), "unsupported device" },
242 { NV_ENC_ERR_INVALID_ENCODERDEVICE, AVERROR(EINVAL), "invalid encoder device" },
243 { NV_ENC_ERR_INVALID_DEVICE, AVERROR(EINVAL), "invalid device" },
244 { NV_ENC_ERR_DEVICE_NOT_EXIST, AVERROR(EIO), "device does not exist" },
245 { NV_ENC_ERR_INVALID_PTR, AVERROR(EFAULT), "invalid ptr" },
246 { NV_ENC_ERR_INVALID_EVENT, AVERROR(EINVAL), "invalid event" },
247 { NV_ENC_ERR_INVALID_PARAM, AVERROR(EINVAL), "invalid param" },
248 { NV_ENC_ERR_INVALID_CALL, AVERROR(EINVAL), "invalid call" },
249 { NV_ENC_ERR_OUT_OF_MEMORY, AVERROR(ENOMEM), "out of memory" },
250 { NV_ENC_ERR_ENCODER_NOT_INITIALIZED, AVERROR(EINVAL), "encoder not initialized" },
251 { NV_ENC_ERR_UNSUPPORTED_PARAM, AVERROR(ENOSYS), "unsupported param" },
252 { NV_ENC_ERR_LOCK_BUSY, AVERROR(EAGAIN), "lock busy" },
253 { NV_ENC_ERR_NOT_ENOUGH_BUFFER, AVERROR(ENOBUFS), "not enough buffer" },
254 { NV_ENC_ERR_INVALID_VERSION, AVERROR(EINVAL), "invalid version" },
255 { NV_ENC_ERR_MAP_FAILED, AVERROR(EIO), "map failed" },
256 { NV_ENC_ERR_NEED_MORE_INPUT, AVERROR(EAGAIN), "need more input" },
257 { NV_ENC_ERR_ENCODER_BUSY, AVERROR(EAGAIN), "encoder busy" },
258 { NV_ENC_ERR_EVENT_NOT_REGISTERD, AVERROR(EBADF), "event not registered" },
259 { NV_ENC_ERR_GENERIC, AVERROR_UNKNOWN, "generic error" },
260 { NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY, AVERROR(EINVAL), "incompatible client key" },
261 { NV_ENC_ERR_UNIMPLEMENTED, AVERROR(ENOSYS), "unimplemented" },
262 { NV_ENC_ERR_RESOURCE_REGISTER_FAILED, AVERROR(EIO), "resource register failed" },
263 { NV_ENC_ERR_RESOURCE_NOT_REGISTERED, AVERROR(EBADF), "resource not registered" },
264 { NV_ENC_ERR_RESOURCE_NOT_MAPPED, AVERROR(EBADF), "resource not mapped" },
265};
266
267static int nvenc_map_error(NVENCSTATUS err, const char **desc)
268{
269 int i;
270 for (i = 0; i < FF_ARRAY_ELEMS(nvenc_errors); i++) {
271 if (nvenc_errors[i].nverr == err) {
272 if (desc)
273 *desc = nvenc_errors[i].desc;
274 return nvenc_errors[i].averr;
275 }
276 }
277 if (desc)
278 *desc = "unknown error";
279 return AVERROR_UNKNOWN;
280}
281
282static int nvenc_print_error(void *log_ctx, NVENCSTATUS err,
283 const char *error_string)
284{
285 const char *desc;
286 int ret;
287 ret = nvenc_map_error(err, &desc);
288 av_log(log_ctx, AV_LOG_ERROR, "%s: %s (%d)\n", error_string, desc, err);
289 return ret;
290}
291
Timo Rothenpieler7b0689c2015-04-04 11:34:14292static int input_string_to_uint32(AVCodecContext *avctx, const NvencValuePair *pair, const char *input, uint32_t *output)
293{
294 for (; pair->str; ++pair) {
295 if (!strcmp(input, pair->str)) {
296 *output = pair->num;
297 return 0;
298 }
299 }
300
301 return AVERROR(EINVAL);
302}
303
Andrey Turkincfb49fc2016-05-20 16:13:20304static void timestamp_queue_enqueue(AVFifoBuffer* queue, int64_t timestamp)
Timo Rothenpieler2a428db2014-11-29 23:04:37305{
Andrey Turkincfb49fc2016-05-20 16:13:20306 av_fifo_generic_write(queue, &timestamp, sizeof(timestamp), NULL);
Timo Rothenpieler2a428db2014-11-29 23:04:37307}
308
Andrey Turkincfb49fc2016-05-20 16:13:20309static int64_t timestamp_queue_dequeue(AVFifoBuffer* queue)
Timo Rothenpieler2a428db2014-11-29 23:04:37310{
Andrey Turkincfb49fc2016-05-20 16:13:20311 int64_t timestamp = AV_NOPTS_VALUE;
312 if (av_fifo_size(queue) > 0)
313 av_fifo_generic_read(queue, &timestamp, sizeof(timestamp), NULL);
Timo Rothenpieler2a428db2014-11-29 23:04:37314
Andrey Turkincfb49fc2016-05-20 16:13:20315 return timestamp;
Timo Rothenpieler2a428db2014-11-29 23:04:37316}
317
318#define CHECK_LOAD_FUNC(t, f, s) \
319do { \
320 (f) = (t)LOAD_FUNC(dl_fn->cuda_lib, s); \
321 if (!(f)) { \
322 av_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \
323 goto error; \
324 } \
325} while (0)
326
327static av_cold int nvenc_dyload_cuda(AVCodecContext *avctx)
328{
329 NvencContext *ctx = avctx->priv_data;
330 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
331
Andrey Turkina8cf25d2016-05-20 22:08:06332#if CONFIG_CUDA
333 dl_fn->cu_init = cuInit;
334 dl_fn->cu_device_get_count = cuDeviceGetCount;
335 dl_fn->cu_device_get = cuDeviceGet;
336 dl_fn->cu_device_get_name = cuDeviceGetName;
337 dl_fn->cu_device_compute_capability = cuDeviceComputeCapability;
338 dl_fn->cu_ctx_create = cuCtxCreate_v2;
339 dl_fn->cu_ctx_pop_current = cuCtxPopCurrent_v2;
340 dl_fn->cu_ctx_destroy = cuCtxDestroy_v2;
341
342 return 1;
343#else
Timo Rothenpieler2a428db2014-11-29 23:04:37344 if (dl_fn->cuda_lib)
345 return 1;
346
347#if defined(_WIN32)
348 dl_fn->cuda_lib = LoadLibrary(TEXT("nvcuda.dll"));
Timo Rothenpieler2a428db2014-11-29 23:04:37349#else
350 dl_fn->cuda_lib = dlopen("libcuda.so", RTLD_LAZY);
351#endif
352
353 if (!dl_fn->cuda_lib) {
354 av_log(avctx, AV_LOG_FATAL, "Failed loading CUDA library\n");
355 goto error;
356 }
357
358 CHECK_LOAD_FUNC(PCUINIT, dl_fn->cu_init, "cuInit");
359 CHECK_LOAD_FUNC(PCUDEVICEGETCOUNT, dl_fn->cu_device_get_count, "cuDeviceGetCount");
360 CHECK_LOAD_FUNC(PCUDEVICEGET, dl_fn->cu_device_get, "cuDeviceGet");
361 CHECK_LOAD_FUNC(PCUDEVICEGETNAME, dl_fn->cu_device_get_name, "cuDeviceGetName");
362 CHECK_LOAD_FUNC(PCUDEVICECOMPUTECAPABILITY, dl_fn->cu_device_compute_capability, "cuDeviceComputeCapability");
363 CHECK_LOAD_FUNC(PCUCTXCREATE, dl_fn->cu_ctx_create, "cuCtxCreate_v2");
364 CHECK_LOAD_FUNC(PCUCTXPOPCURRENT, dl_fn->cu_ctx_pop_current, "cuCtxPopCurrent_v2");
365 CHECK_LOAD_FUNC(PCUCTXDESTROY, dl_fn->cu_ctx_destroy, "cuCtxDestroy_v2");
366
367 return 1;
368
369error:
370
371 if (dl_fn->cuda_lib)
372 DL_CLOSE_FUNC(dl_fn->cuda_lib);
373
374 dl_fn->cuda_lib = NULL;
375
376 return 0;
Andrey Turkina8cf25d2016-05-20 22:08:06377#endif
Timo Rothenpieler2a428db2014-11-29 23:04:37378}
379
380static av_cold int check_cuda_errors(AVCodecContext *avctx, CUresult err, const char *func)
381{
382 if (err != CUDA_SUCCESS) {
383 av_log(avctx, AV_LOG_FATAL, ">> %s - failed with error code 0x%x\n", func, err);
384 return 0;
385 }
386 return 1;
387}
388#define check_cuda_errors(f) if (!check_cuda_errors(avctx, f, #f)) goto error
389
390static av_cold int nvenc_check_cuda(AVCodecContext *avctx)
391{
392 int device_count = 0;
393 CUdevice cu_device = 0;
394 char gpu_name[128];
395 int smminor = 0, smmajor = 0;
Philip Langdale21175d82015-03-24 04:34:59396 int i, smver, target_smver;
Timo Rothenpieler2a428db2014-11-29 23:04:37397
398 NvencContext *ctx = avctx->priv_data;
399 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
400
Philip Langdale21175d82015-03-24 04:34:59401 switch (avctx->codec->id) {
402 case AV_CODEC_ID_H264:
Andrey Turkina8cf25d2016-05-20 22:08:06403 target_smver = ctx->data_pix_fmt == AV_PIX_FMT_YUV444P ? 0x52 : 0x30;
Philip Langdale21175d82015-03-24 04:34:59404 break;
405 case AV_CODEC_ID_H265:
406 target_smver = 0x52;
407 break;
408 default:
Agatha Hu49046582015-09-11 09:07:10409 av_log(avctx, AV_LOG_FATAL, "Unknown codec name\n");
Philip Langdale21175d82015-03-24 04:34:59410 goto error;
411 }
412
Timo Rothenpieler2a428db2014-11-29 23:04:37413 if (!nvenc_dyload_cuda(avctx))
414 return 0;
415
416 if (dl_fn->nvenc_device_count > 0)
417 return 1;
418
419 check_cuda_errors(dl_fn->cu_init(0));
420
421 check_cuda_errors(dl_fn->cu_device_get_count(&device_count));
422
423 if (!device_count) {
424 av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
425 goto error;
426 }
427
428 av_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", device_count);
429
430 dl_fn->nvenc_device_count = 0;
431
432 for (i = 0; i < device_count; ++i) {
433 check_cuda_errors(dl_fn->cu_device_get(&cu_device, i));
434 check_cuda_errors(dl_fn->cu_device_get_name(gpu_name, sizeof(gpu_name), cu_device));
435 check_cuda_errors(dl_fn->cu_device_compute_capability(&smmajor, &smminor, cu_device));
436
437 smver = (smmajor << 4) | smminor;
438
Philip Langdale21175d82015-03-24 04:34:59439 av_log(avctx, AV_LOG_VERBOSE, "[ GPU #%d - < %s > has Compute SM %d.%d, NVENC %s ]\n", i, gpu_name, smmajor, smminor, (smver >= target_smver) ? "Available" : "Not Available");
Timo Rothenpieler2a428db2014-11-29 23:04:37440
Philip Langdale21175d82015-03-24 04:34:59441 if (smver >= target_smver)
Timo Rothenpieler2a428db2014-11-29 23:04:37442 dl_fn->nvenc_devices[dl_fn->nvenc_device_count++] = cu_device;
443 }
444
445 if (!dl_fn->nvenc_device_count) {
446 av_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n");
447 goto error;
448 }
449
450 return 1;
451
452error:
453
454 dl_fn->nvenc_device_count = 0;
455
456 return 0;
457}
458
459static av_cold int nvenc_dyload_nvenc(AVCodecContext *avctx)
460{
461 PNVENCODEAPICREATEINSTANCE nvEncodeAPICreateInstance = 0;
462 NVENCSTATUS nvstatus;
463
464 NvencContext *ctx = avctx->priv_data;
465 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
466
467 if (!nvenc_check_cuda(avctx))
468 return 0;
469
470 if (dl_fn->nvenc_lib)
471 return 1;
472
473#if defined(_WIN32)
474 if (sizeof(void*) == 8) {
475 dl_fn->nvenc_lib = LoadLibrary(TEXT("nvEncodeAPI64.dll"));
476 } else {
477 dl_fn->nvenc_lib = LoadLibrary(TEXT("nvEncodeAPI.dll"));
478 }
Timo Rothenpieler2a428db2014-11-29 23:04:37479#else
480 dl_fn->nvenc_lib = dlopen("libnvidia-encode.so.1", RTLD_LAZY);
481#endif
482
483 if (!dl_fn->nvenc_lib) {
484 av_log(avctx, AV_LOG_FATAL, "Failed loading the nvenc library\n");
485 goto error;
486 }
487
488 nvEncodeAPICreateInstance = (PNVENCODEAPICREATEINSTANCE)LOAD_FUNC(dl_fn->nvenc_lib, "NvEncodeAPICreateInstance");
489
490 if (!nvEncodeAPICreateInstance) {
491 av_log(avctx, AV_LOG_FATAL, "Failed to load nvenc entrypoint\n");
492 goto error;
493 }
494
495 dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
496
497 nvstatus = nvEncodeAPICreateInstance(&dl_fn->nvenc_funcs);
498
499 if (nvstatus != NV_ENC_SUCCESS) {
Andrey Turkine1691c42016-05-20 15:37:00500 nvenc_print_error(avctx, nvstatus, "Failed to create nvenc instance");
Timo Rothenpieler2a428db2014-11-29 23:04:37501 goto error;
502 }
503
504 av_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
505
506 return 1;
507
508error:
509 if (dl_fn->nvenc_lib)
510 DL_CLOSE_FUNC(dl_fn->nvenc_lib);
511
512 dl_fn->nvenc_lib = NULL;
513
514 return 0;
515}
516
517static av_cold void nvenc_unload_nvenc(AVCodecContext *avctx)
518{
519 NvencContext *ctx = avctx->priv_data;
520 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
521
522 DL_CLOSE_FUNC(dl_fn->nvenc_lib);
523 dl_fn->nvenc_lib = NULL;
524
525 dl_fn->nvenc_device_count = 0;
526
Andrey Turkina8cf25d2016-05-20 22:08:06527#if !CONFIG_CUDA
Timo Rothenpieler2a428db2014-11-29 23:04:37528 DL_CLOSE_FUNC(dl_fn->cuda_lib);
529 dl_fn->cuda_lib = NULL;
Andrey Turkina8cf25d2016-05-20 22:08:06530#endif
Timo Rothenpieler2a428db2014-11-29 23:04:37531
532 dl_fn->cu_init = NULL;
533 dl_fn->cu_device_get_count = NULL;
534 dl_fn->cu_device_get = NULL;
535 dl_fn->cu_device_get_name = NULL;
536 dl_fn->cu_device_compute_capability = NULL;
537 dl_fn->cu_ctx_create = NULL;
538 dl_fn->cu_ctx_pop_current = NULL;
539 dl_fn->cu_ctx_destroy = NULL;
540
541 av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
542}
543
Andrey Turkin82d705e2016-05-20 14:49:24544static av_cold int nvenc_setup_device(AVCodecContext *avctx)
Timo Rothenpieler2a428db2014-11-29 23:04:37545{
Timo Rothenpieler2a428db2014-11-29 23:04:37546 NvencContext *ctx = avctx->priv_data;
547 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
Timo Rothenpieler2a428db2014-11-29 23:04:37548
Andrey Turkin82d705e2016-05-20 14:49:24549 CUresult cu_res;
550 CUcontext cu_context_curr;
Timo Rothenpielerbc3f7672015-01-16 00:02:40551
Andrey Turkina8cf25d2016-05-20 22:08:06552 ctx->data_pix_fmt = avctx->pix_fmt;
553
554#if CONFIG_CUDA
555 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
556 AVHWFramesContext *frames_ctx;
557 AVCUDADeviceContext *device_hwctx;
558
559 if (!avctx->hw_frames_ctx) {
560 av_log(avctx, AV_LOG_ERROR, "hw_frames_ctx must be set when using GPU frames as input\n");
561 return AVERROR(EINVAL);
562 }
563
564 frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
565 device_hwctx = frames_ctx->device_ctx->hwctx;
566 ctx->cu_context = device_hwctx->cuda_ctx;
567 ctx->data_pix_fmt = frames_ctx->sw_format;
568 return 0;
569 }
570#endif
571
Timo Rothenpieler2a428db2014-11-29 23:04:37572 if (ctx->gpu >= dl_fn->nvenc_device_count) {
573 av_log(avctx, AV_LOG_FATAL, "Requested GPU %d, but only %d GPUs are available!\n", ctx->gpu, dl_fn->nvenc_device_count);
Andrey Turkin82d705e2016-05-20 14:49:24574 return AVERROR(EINVAL);
Timo Rothenpieler2a428db2014-11-29 23:04:37575 }
576
577 ctx->cu_context = NULL;
Andrey Turkina8cf25d2016-05-20 22:08:06578 cu_res = dl_fn->cu_ctx_create(&ctx->cu_context_internal, 4, dl_fn->nvenc_devices[ctx->gpu]); // CU_CTX_SCHED_BLOCKING_SYNC=4, avoid CPU spins
Timo Rothenpieler2a428db2014-11-29 23:04:37579
580 if (cu_res != CUDA_SUCCESS) {
581 av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
Andrey Turkin82d705e2016-05-20 14:49:24582 return AVERROR_EXTERNAL;
Timo Rothenpieler2a428db2014-11-29 23:04:37583 }
584
585 cu_res = dl_fn->cu_ctx_pop_current(&cu_context_curr);
586
587 if (cu_res != CUDA_SUCCESS) {
588 av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res);
Andrey Turkin82d705e2016-05-20 14:49:24589 return AVERROR_EXTERNAL;
Timo Rothenpieler2a428db2014-11-29 23:04:37590 }
591
Andrey Turkina8cf25d2016-05-20 22:08:06592 ctx->cu_context = ctx->cu_context_internal;
593
Andrey Turkin82d705e2016-05-20 14:49:24594 return 0;
595}
596
597static av_cold int nvenc_open_session(AVCodecContext *avctx)
598{
599 NvencContext *ctx = avctx->priv_data;
600 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
601 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
602
603 NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encode_session_params = { 0 };
604 NVENCSTATUS nv_status;
605
606 encode_session_params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
607 encode_session_params.apiVersion = NVENCAPI_VERSION;
Timo Rothenpieler2a428db2014-11-29 23:04:37608 encode_session_params.device = ctx->cu_context;
609 encode_session_params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
610
611 nv_status = p_nvenc->nvEncOpenEncodeSessionEx(&encode_session_params, &ctx->nvencoder);
612 if (nv_status != NV_ENC_SUCCESS) {
613 ctx->nvencoder = NULL;
Andrey Turkine1691c42016-05-20 15:37:00614 return nvenc_print_error(avctx, nv_status, "OpenEncodeSessionEx failed");
Timo Rothenpieler2a428db2014-11-29 23:04:37615 }
616
Andrey Turkin82d705e2016-05-20 14:49:24617 return 0;
618}
619
620static av_cold void set_constqp(AVCodecContext *avctx)
621{
622 NvencContext *ctx = avctx->priv_data;
623
624 ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
625 ctx->encode_config.rcParams.constQP.qpInterB = avctx->global_quality;
626 ctx->encode_config.rcParams.constQP.qpInterP = avctx->global_quality;
627 ctx->encode_config.rcParams.constQP.qpIntra = avctx->global_quality;
628}
629
630static av_cold void set_vbr(AVCodecContext *avctx)
631{
632 NvencContext *ctx = avctx->priv_data;
633
634 ctx->encode_config.rcParams.enableMinQP = 1;
635 ctx->encode_config.rcParams.enableMaxQP = 1;
636
637 ctx->encode_config.rcParams.minQP.qpInterB = avctx->qmin;
638 ctx->encode_config.rcParams.minQP.qpInterP = avctx->qmin;
639 ctx->encode_config.rcParams.minQP.qpIntra = avctx->qmin;
640
641 ctx->encode_config.rcParams.maxQP.qpInterB = avctx->qmax;
642 ctx->encode_config.rcParams.maxQP.qpInterP = avctx->qmax;
643 ctx->encode_config.rcParams.maxQP.qpIntra = avctx->qmax;
644}
645
646static av_cold void set_lossless(AVCodecContext *avctx)
647{
648 NvencContext *ctx = avctx->priv_data;
649
650 ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
651 ctx->encode_config.rcParams.constQP.qpInterB = 0;
652 ctx->encode_config.rcParams.constQP.qpInterP = 0;
653 ctx->encode_config.rcParams.constQP.qpIntra = 0;
654}
655
656static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx, int lossless)
657{
658 NvencContext *ctx = avctx->priv_data;
659
660 int qp_inter_p;
661
662 if (avctx->bit_rate > 0) {
663 ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate;
664 } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
665 ctx->encode_config.rcParams.maxBitRate = ctx->encode_config.rcParams.averageBitRate;
666 }
667
668 if (avctx->rc_max_rate > 0)
669 ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
670
671 if (lossless) {
672 if (avctx->codec->id == AV_CODEC_ID_H264)
673 ctx->encode_config.encodeCodecConfig.h264Config.qpPrimeYZeroTransformBypassFlag = 1;
674
675 set_lossless(avctx);
676
677 avctx->qmin = -1;
678 avctx->qmax = -1;
679 } else if (ctx->cbr) {
680 if (!ctx->twopass) {
681 ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
682 } else {
683 ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
684
685 if (avctx->codec->id == AV_CODEC_ID_H264) {
686 ctx->encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
687 ctx->encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
688 }
689 }
690
691 if (avctx->codec->id == AV_CODEC_ID_H264) {
692 ctx->encode_config.encodeCodecConfig.h264Config.outputBufferingPeriodSEI = 1;
693 ctx->encode_config.encodeCodecConfig.h264Config.outputPictureTimingSEI = 1;
694 } else if (avctx->codec->id == AV_CODEC_ID_H265) {
695 ctx->encode_config.encodeCodecConfig.hevcConfig.outputBufferingPeriodSEI = 1;
696 ctx->encode_config.encodeCodecConfig.hevcConfig.outputPictureTimingSEI = 1;
697 }
698 } else if (avctx->global_quality > 0) {
699 set_constqp(avctx);
700
701 avctx->qmin = -1;
702 avctx->qmax = -1;
703 } else {
704 if (avctx->qmin >= 0 && avctx->qmax >= 0) {
705 set_vbr(avctx);
706
707 qp_inter_p = (avctx->qmax + 3 * avctx->qmin) / 4; // biased towards Qmin
708
709 if (ctx->twopass) {
710 ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_VBR;
711 if (avctx->codec->id == AV_CODEC_ID_H264) {
712 ctx->encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
713 ctx->encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
714 }
715 } else {
716 ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR_MINQP;
717 }
718 } else {
719 qp_inter_p = 26; // default to 26
720
721 if (ctx->twopass) {
722 ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_VBR;
723 } else {
724 ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
725 }
726 }
727
728 ctx->encode_config.rcParams.enableInitialRCQP = 1;
729 ctx->encode_config.rcParams.initialRCQP.qpInterP = qp_inter_p;
730
731 if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
732 ctx->encode_config.rcParams.initialRCQP.qpIntra = av_clip(
733 qp_inter_p * fabs(avctx->i_quant_factor) + avctx->i_quant_offset, 0, 51);
734 ctx->encode_config.rcParams.initialRCQP.qpInterB = av_clip(
735 qp_inter_p * fabs(avctx->b_quant_factor) + avctx->b_quant_offset, 0, 51);
736 } else {
737 ctx->encode_config.rcParams.initialRCQP.qpIntra = qp_inter_p;
738 ctx->encode_config.rcParams.initialRCQP.qpInterB = qp_inter_p;
739 }
740 }
741
742 if (avctx->rc_buffer_size > 0) {
743 ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size;
744 } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
745 ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
746 }
747}
748
749static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx, int lossless)
750{
751 NvencContext *ctx = avctx->priv_data;
752 int res;
753
754 ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix = avctx->colorspace;
755 ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries = avctx->color_primaries;
756 ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = avctx->color_trc;
757 ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
Andrey Turkina8cf25d2016-05-20 22:08:06758 || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
Andrey Turkin82d705e2016-05-20 14:49:24759
760 ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag =
761 (avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2);
762
763 ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag =
764 (ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag
765 || ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoFormat != 5
766 || ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag != 0);
767
768 ctx->encode_config.encodeCodecConfig.h264Config.sliceMode = 3;
769 ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData = 1;
770
771 ctx->encode_config.encodeCodecConfig.h264Config.disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
772 ctx->encode_config.encodeCodecConfig.h264Config.repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
773
774 ctx->encode_config.encodeCodecConfig.h264Config.outputAUD = 1;
775
776 if (!ctx->profile && !lossless) {
777 switch (avctx->profile) {
778 case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
779 ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
780 break;
781 case FF_PROFILE_H264_BASELINE:
782 ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
783 break;
784 case FF_PROFILE_H264_MAIN:
785 ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
786 break;
787 case FF_PROFILE_H264_HIGH:
788 case FF_PROFILE_UNKNOWN:
789 ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
790 break;
791 default:
792 av_log(avctx, AV_LOG_WARNING, "Unsupported profile requested, falling back to high\n");
793 ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
794 break;
795 }
796 } else if (!lossless) {
797 if (!strcmp(ctx->profile, "high")) {
798 ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
799 avctx->profile = FF_PROFILE_H264_HIGH;
800 } else if (!strcmp(ctx->profile, "main")) {
801 ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
802 avctx->profile = FF_PROFILE_H264_MAIN;
803 } else if (!strcmp(ctx->profile, "baseline")) {
804 ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
805 avctx->profile = FF_PROFILE_H264_BASELINE;
806 } else if (!strcmp(ctx->profile, "high444p")) {
807 ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
808 avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
809 } else {
810 av_log(avctx, AV_LOG_FATAL, "Profile \"%s\" is unknown! Supported profiles: high, main, baseline\n", ctx->profile);
811 return AVERROR(EINVAL);
812 }
813 }
814
815 // force setting profile as high444p if input is AV_PIX_FMT_YUV444P
Andrey Turkina8cf25d2016-05-20 22:08:06816 if (ctx->data_pix_fmt == AV_PIX_FMT_YUV444P) {
Andrey Turkin82d705e2016-05-20 14:49:24817 ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
818 avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
819 }
820
821 ctx->encode_config.encodeCodecConfig.h264Config.chromaFormatIDC = avctx->profile == FF_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1;
822
823 if (ctx->level) {
824 res = input_string_to_uint32(avctx, nvenc_h264_level_pairs, ctx->level, &ctx->encode_config.encodeCodecConfig.h264Config.level);
825
826 if (res) {
827 av_log(avctx, AV_LOG_FATAL, "Level \"%s\" is unknown! Supported levels: auto, 1, 1b, 1.1, 1.2, 1.3, 2, 2.1, 2.2, 3, 3.1, 3.2, 4, 4.1, 4.2, 5, 5.1\n", ctx->level);
828 return res;
829 }
830 } else {
831 ctx->encode_config.encodeCodecConfig.h264Config.level = NV_ENC_LEVEL_AUTOSELECT;
832 }
833
834 return 0;
835}
836
837static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
838{
839 NvencContext *ctx = avctx->priv_data;
840 int res;
841
842 ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.colourMatrix = avctx->colorspace;
843 ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.colourPrimaries = avctx->color_primaries;
844 ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.transferCharacteristics = avctx->color_trc;
845 ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
Andrey Turkina8cf25d2016-05-20 22:08:06846 || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
Andrey Turkin82d705e2016-05-20 14:49:24847
848 ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.colourDescriptionPresentFlag =
849 (avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2);
850
851 ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoSignalTypePresentFlag =
852 (ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.colourDescriptionPresentFlag
853 || ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFormat != 5
854 || ctx->encode_config.encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFullRangeFlag != 0);
855
856 ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode = 3;
857 ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData = 1;
858
859 ctx->encode_config.encodeCodecConfig.hevcConfig.disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
860 ctx->encode_config.encodeCodecConfig.hevcConfig.repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
861
862 ctx->encode_config.encodeCodecConfig.hevcConfig.outputAUD = 1;
863
864 /* No other profile is supported in the current SDK version 5 */
865 ctx->encode_config.profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
866 avctx->profile = FF_PROFILE_HEVC_MAIN;
867
868 if (ctx->level) {
869 res = input_string_to_uint32(avctx, nvenc_hevc_level_pairs, ctx->level, &ctx->encode_config.encodeCodecConfig.hevcConfig.level);
870
871 if (res) {
872 av_log(avctx, AV_LOG_FATAL, "Level \"%s\" is unknown! Supported levels: auto, 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6, 6.1, 6.2\n", ctx->level);
873 return res;
874 }
875 } else {
876 ctx->encode_config.encodeCodecConfig.hevcConfig.level = NV_ENC_LEVEL_AUTOSELECT;
877 }
878
879 if (ctx->tier) {
880 if (!strcmp(ctx->tier, "main")) {
881 ctx->encode_config.encodeCodecConfig.hevcConfig.tier = NV_ENC_TIER_HEVC_MAIN;
882 } else if (!strcmp(ctx->tier, "high")) {
883 ctx->encode_config.encodeCodecConfig.hevcConfig.tier = NV_ENC_TIER_HEVC_HIGH;
884 } else {
885 av_log(avctx, AV_LOG_FATAL, "Tier \"%s\" is unknown! Supported tiers: main, high\n", ctx->tier);
886 return AVERROR(EINVAL);
887 }
888 }
889
890 return 0;
891}
892
893static av_cold int nvenc_setup_codec_config(AVCodecContext *avctx, int lossless)
894{
895 switch (avctx->codec->id) {
896 case AV_CODEC_ID_H264:
897 return nvenc_setup_h264_config(avctx, lossless);
898 case AV_CODEC_ID_H265:
899 return nvenc_setup_hevc_config(avctx);
900 /* Earlier switch/case will return if unknown codec is passed. */
901 }
902
903 return 0;
904}
905
906static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
907{
908 NvencContext *ctx = avctx->priv_data;
909 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
910 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
911
912 NV_ENC_PRESET_CONFIG preset_config = { 0 };
913 GUID encoder_preset = NV_ENC_PRESET_HQ_GUID;
914 GUID codec;
915 NVENCSTATUS nv_status = NV_ENC_SUCCESS;
916 AVCPBProperties *cpb_props;
917 int num_mbs;
918 int isLL = 0;
919 int lossless = 0;
920 int res = 0;
921 int dw, dh;
922
923 ctx->last_dts = AV_NOPTS_VALUE;
924
925 ctx->encode_config.version = NV_ENC_CONFIG_VER;
926 ctx->init_encode_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
927 preset_config.version = NV_ENC_PRESET_CONFIG_VER;
928 preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
929
Timo Rothenpieler2a428db2014-11-29 23:04:37930 if (ctx->preset) {
Agatha Hu49046582015-09-11 09:07:10931 if (!strcmp(ctx->preset, "slow")) {
932 encoder_preset = NV_ENC_PRESET_HQ_GUID;
933 ctx->twopass = 1;
934 } else if (!strcmp(ctx->preset, "medium")) {
935 encoder_preset = NV_ENC_PRESET_HQ_GUID;
936 ctx->twopass = 0;
937 } else if (!strcmp(ctx->preset, "fast")) {
Timo Rothenpieler2a428db2014-11-29 23:04:37938 encoder_preset = NV_ENC_PRESET_HP_GUID;
Agatha Hu49046582015-09-11 09:07:10939 ctx->twopass = 0;
Timo Rothenpieler2a428db2014-11-29 23:04:37940 } else if (!strcmp(ctx->preset, "hq")) {
941 encoder_preset = NV_ENC_PRESET_HQ_GUID;
Agatha Hu49046582015-09-11 09:07:10942 } else if (!strcmp(ctx->preset, "hp")) {
943 encoder_preset = NV_ENC_PRESET_HP_GUID;
Timo Rothenpieler2a428db2014-11-29 23:04:37944 } else if (!strcmp(ctx->preset, "bd")) {
945 encoder_preset = NV_ENC_PRESET_BD_GUID;
946 } else if (!strcmp(ctx->preset, "ll")) {
947 encoder_preset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
948 isLL = 1;
949 } else if (!strcmp(ctx->preset, "llhp")) {
950 encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID;
951 isLL = 1;
952 } else if (!strcmp(ctx->preset, "llhq")) {
953 encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
954 isLL = 1;
Philip Langdale671bdd42015-07-02 04:09:57955 } else if (!strcmp(ctx->preset, "lossless")) {
956 encoder_preset = NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID;
957 lossless = 1;
958 } else if (!strcmp(ctx->preset, "losslesshp")) {
959 encoder_preset = NV_ENC_PRESET_LOSSLESS_HP_GUID;
960 lossless = 1;
Timo Rothenpieler2a428db2014-11-29 23:04:37961 } else if (!strcmp(ctx->preset, "default")) {
962 encoder_preset = NV_ENC_PRESET_DEFAULT_GUID;
963 } else {
Timo Rothenpielerf2bdf9d2016-03-08 10:18:16964 av_log(avctx, AV_LOG_FATAL, "Preset \"%s\" is unknown! Supported presets: slow, medium, fast, hp, hq, bd, ll, llhp, llhq, lossless, losslesshp, default\n", ctx->preset);
Andrey Turkin82d705e2016-05-20 14:49:24965 return AVERROR(EINVAL);
Timo Rothenpieler2a428db2014-11-29 23:04:37966 }
967 }
968
Agatha Hu49046582015-09-11 09:07:10969 if (ctx->twopass < 0) {
970 ctx->twopass = isLL;
971 }
972
Philip Langdale21175d82015-03-24 04:34:59973 switch (avctx->codec->id) {
974 case AV_CODEC_ID_H264:
975 codec = NV_ENC_CODEC_H264_GUID;
976 break;
977 case AV_CODEC_ID_H265:
978 codec = NV_ENC_CODEC_HEVC_GUID;
979 break;
980 default:
Agatha Hu49046582015-09-11 09:07:10981 av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
Andrey Turkin82d705e2016-05-20 14:49:24982 return AVERROR(EINVAL);
Philip Langdale21175d82015-03-24 04:34:59983 }
984
985 nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->nvencoder, codec, encoder_preset, &preset_config);
Timo Rothenpieler2a428db2014-11-29 23:04:37986 if (nv_status != NV_ENC_SUCCESS) {
Andrey Turkine1691c42016-05-20 15:37:00987 return nvenc_print_error(avctx, nv_status, "GetEncodePresetConfig failed");
Timo Rothenpieler2a428db2014-11-29 23:04:37988 }
989
Philip Langdale21175d82015-03-24 04:34:59990 ctx->init_encode_params.encodeGUID = codec;
Timo Rothenpieler2a428db2014-11-29 23:04:37991 ctx->init_encode_params.encodeHeight = avctx->height;
992 ctx->init_encode_params.encodeWidth = avctx->width;
Timo Rothenpielerfb34c582015-01-26 12:28:22993
994 if (avctx->sample_aspect_ratio.num && avctx->sample_aspect_ratio.den &&
995 (avctx->sample_aspect_ratio.num != 1 || avctx->sample_aspect_ratio.num != 1)) {
996 av_reduce(&dw, &dh,
997 avctx->width * avctx->sample_aspect_ratio.num,
998 avctx->height * avctx->sample_aspect_ratio.den,
999 1024 * 1024);
1000 ctx->init_encode_params.darHeight = dh;
1001 ctx->init_encode_params.darWidth = dw;
1002 } else {
1003 ctx->init_encode_params.darHeight = avctx->height;
1004 ctx->init_encode_params.darWidth = avctx->width;
1005 }
1006
Philip Langdaled20df262015-01-28 17:05:531007 // De-compensate for hardware, dubiously, trying to compensate for
1008 // playback at 704 pixel width.
1009 if (avctx->width == 720 &&
1010 (avctx->height == 480 || avctx->height == 576)) {
1011 av_reduce(&dw, &dh,
1012 ctx->init_encode_params.darWidth * 44,
1013 ctx->init_encode_params.darHeight * 45,
Philip Langdale7ae805d2015-05-27 01:35:151014 1024 * 1024);
Philip Langdaled20df262015-01-28 17:05:531015 ctx->init_encode_params.darHeight = dh;
1016 ctx->init_encode_params.darWidth = dw;
1017 }
1018
Timo Rothenpieler2a428db2014-11-29 23:04:371019 ctx->init_encode_params.frameRateNum = avctx->time_base.den;
1020 ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
1021
1022 num_mbs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4);
1023 ctx->max_surface_count = (num_mbs >= 8160) ? 32 : 48;
1024
Timo Rothenpieler9f4bff82015-07-25 21:20:281025 if (ctx->buffer_delay >= ctx->max_surface_count)
1026 ctx->buffer_delay = ctx->max_surface_count - 1;
1027
Timo Rothenpieler2a428db2014-11-29 23:04:371028 ctx->init_encode_params.enableEncodeAsync = 0;
1029 ctx->init_encode_params.enablePTD = 1;
1030
1031 ctx->init_encode_params.presetGUID = encoder_preset;
1032
1033 ctx->init_encode_params.encodeConfig = &ctx->encode_config;
1034 memcpy(&ctx->encode_config, &preset_config.presetCfg, sizeof(ctx->encode_config));
1035 ctx->encode_config.version = NV_ENC_CONFIG_VER;
1036
Philip Langdaleff0c5592015-01-24 20:52:581037 if (avctx->refs >= 0) {
1038 /* 0 means "let the hardware decide" */
Philip Langdale21175d82015-03-24 04:34:591039 switch (avctx->codec->id) {
1040 case AV_CODEC_ID_H264:
1041 ctx->encode_config.encodeCodecConfig.h264Config.maxNumRefFrames = avctx->refs;
1042 break;
1043 case AV_CODEC_ID_H265:
1044 ctx->encode_config.encodeCodecConfig.hevcConfig.maxNumRefFramesInDPB = avctx->refs;
1045 break;
1046 /* Earlier switch/case will return if unknown codec is passed. */
1047 }
Philip Langdaleff0c5592015-01-24 20:52:581048 }
1049
Timo Rothenpieler914fd422015-01-26 12:28:211050 if (avctx->gop_size > 0) {
1051 if (avctx->max_b_frames >= 0) {
1052 /* 0 is intra-only, 1 is I/P only, 2 is one B Frame, 3 two B frames, and so on. */
1053 ctx->encode_config.frameIntervalP = avctx->max_b_frames + 1;
1054 }
1055
Timo Rothenpieler2a428db2014-11-29 23:04:371056 ctx->encode_config.gopLength = avctx->gop_size;
Philip Langdale21175d82015-03-24 04:34:591057 switch (avctx->codec->id) {
1058 case AV_CODEC_ID_H264:
1059 ctx->encode_config.encodeCodecConfig.h264Config.idrPeriod = avctx->gop_size;
Philip Langdale21175d82015-03-24 04:34:591060 break;
1061 case AV_CODEC_ID_H265:
1062 ctx->encode_config.encodeCodecConfig.hevcConfig.idrPeriod = avctx->gop_size;
1063 break;
1064 /* Earlier switch/case will return if unknown codec is passed. */
1065 }
Timo Rothenpieler914fd422015-01-26 12:28:211066 } else if (avctx->gop_size == 0) {
1067 ctx->encode_config.frameIntervalP = 0;
1068 ctx->encode_config.gopLength = 1;
Philip Langdale21175d82015-03-24 04:34:591069 switch (avctx->codec->id) {
1070 case AV_CODEC_ID_H264:
1071 ctx->encode_config.encodeCodecConfig.h264Config.idrPeriod = 1;
1072 break;
1073 case AV_CODEC_ID_H265:
1074 ctx->encode_config.encodeCodecConfig.hevcConfig.idrPeriod = 1;
1075 break;
1076 /* Earlier switch/case will return if unknown codec is passed. */
1077 }
Timo Rothenpieler2a428db2014-11-29 23:04:371078 }
1079
Timo Rothenpieler914fd422015-01-26 12:28:211080 /* when there're b frames, set dts offset */
1081 if (ctx->encode_config.frameIntervalP >= 2)
1082 ctx->last_dts = -2;
1083
Andrey Turkin82d705e2016-05-20 14:49:241084 nvenc_setup_rate_control(avctx, lossless);
Timo Rothenpieler2a428db2014-11-29 23:04:371085
Vittorio Giovara7c6eb0a2015-06-29 19:59:371086 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
Timo Rothenpieler2a428db2014-11-29 23:04:371087 ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
1088 } else {
1089 ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
1090 }
1091
Andrey Turkin82d705e2016-05-20 14:49:241092 res = nvenc_setup_codec_config(avctx, lossless);
1093 if (res)
1094 return res;
Timo Rothenpieler2a428db2014-11-29 23:04:371095
1096 nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
1097 if (nv_status != NV_ENC_SUCCESS) {
Andrey Turkine1691c42016-05-20 15:37:001098 return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
Timo Rothenpieler2a428db2014-11-29 23:04:371099 }
1100
1101 if (ctx->encode_config.frameIntervalP > 1)
1102 avctx->has_b_frames = 2;
1103
1104 if (ctx->encode_config.rcParams.averageBitRate > 0)
1105 avctx->bit_rate = ctx->encode_config.rcParams.averageBitRate;
1106
Anton Khirnov1520c6f2015-10-03 13:19:101107 cpb_props = ff_add_cpb_side_data(avctx);
1108 if (!cpb_props)
1109 return AVERROR(ENOMEM);
Hendrik Leppkes5fc17ed2015-12-17 12:41:291110 cpb_props->max_bitrate = ctx->encode_config.rcParams.maxBitRate;
Anton Khirnov1520c6f2015-10-03 13:19:101111 cpb_props->avg_bitrate = avctx->bit_rate;
Hendrik Leppkes5fc17ed2015-12-17 12:41:291112 cpb_props->buffer_size = ctx->encode_config.rcParams.vbvBufferSize;
Anton Khirnov1520c6f2015-10-03 13:19:101113
Timo Rothenpieler2a428db2014-11-29 23:04:371114 return 0;
Andrey Turkin82d705e2016-05-20 14:49:241115}
1116
1117static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
1118{
1119 NvencContext *ctx = avctx->priv_data;
1120 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1121 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1122
1123 NVENCSTATUS nv_status;
Andrey Turkin82d705e2016-05-20 14:49:241124 NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
Andrey Turkin82d705e2016-05-20 14:49:241125 allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
1126
Andrey Turkina8cf25d2016-05-20 22:08:061127 switch (ctx->data_pix_fmt) {
Andrey Turkin82d705e2016-05-20 14:49:241128 case AV_PIX_FMT_YUV420P:
Andrey Turkina8cf25d2016-05-20 22:08:061129 ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YV12_PL;
Andrey Turkin82d705e2016-05-20 14:49:241130 break;
1131
1132 case AV_PIX_FMT_NV12:
Andrey Turkina8cf25d2016-05-20 22:08:061133 ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_NV12_PL;
Andrey Turkin82d705e2016-05-20 14:49:241134 break;
1135
1136 case AV_PIX_FMT_YUV444P:
Andrey Turkina8cf25d2016-05-20 22:08:061137 ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_PL;
Andrey Turkin82d705e2016-05-20 14:49:241138 break;
1139
1140 default:
1141 av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n");
1142 return AVERROR(EINVAL);
1143 }
1144
Andrey Turkina8cf25d2016-05-20 22:08:061145 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
1146 ctx->surfaces[idx].in_ref = av_frame_alloc();
1147 if (!ctx->surfaces[idx].in_ref)
1148 return AVERROR(ENOMEM);
1149 } else {
1150 NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
1151 allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
1152 allocSurf.width = (avctx->width + 31) & ~31;
1153 allocSurf.height = (avctx->height + 31) & ~31;
1154 allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
1155 allocSurf.bufferFmt = ctx->surfaces[idx].format;
1156
1157 nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
1158 if (nv_status != NV_ENC_SUCCESS) {
1159 return nvenc_print_error(avctx, nv_status, "CreateInputBuffer failed");
1160 }
1161
1162 ctx->surfaces[idx].input_surface = allocSurf.inputBuffer;
1163 ctx->surfaces[idx].width = allocSurf.width;
1164 ctx->surfaces[idx].height = allocSurf.height;
Andrey Turkin82d705e2016-05-20 14:49:241165 }
1166
Andrey Turkine1691c42016-05-20 15:37:001167 ctx->surfaces[idx].lockCount = 0;
Andrey Turkin82d705e2016-05-20 14:49:241168
1169 /* 1MB is large enough to hold most output frames. NVENC increases this automaticaly if it's not enough. */
1170 allocOut.size = 1024 * 1024;
1171
1172 allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
1173
1174 nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
1175 if (nv_status != NV_ENC_SUCCESS) {
Andrey Turkine1691c42016-05-20 15:37:001176 int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer failed");
Andrey Turkina8cf25d2016-05-20 22:08:061177 if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
1178 p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[idx].input_surface);
1179 av_frame_free(&ctx->surfaces[idx].in_ref);
Andrey Turkine1691c42016-05-20 15:37:001180 return err;
Andrey Turkin82d705e2016-05-20 14:49:241181 }
1182
Andrey Turkine1691c42016-05-20 15:37:001183 ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer;
1184 ctx->surfaces[idx].size = allocOut.size;
Andrey Turkin82d705e2016-05-20 14:49:241185
1186 return 0;
1187}
1188
1189static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx, int* surfaceCount)
1190{
1191 int res;
1192 NvencContext *ctx = avctx->priv_data;
1193
Andrey Turkine1691c42016-05-20 15:37:001194 ctx->surfaces = av_malloc(ctx->max_surface_count * sizeof(*ctx->surfaces));
Andrey Turkin82d705e2016-05-20 14:49:241195
Andrey Turkine1691c42016-05-20 15:37:001196 if (!ctx->surfaces) {
Andrey Turkin82d705e2016-05-20 14:49:241197 return AVERROR(ENOMEM);
1198 }
1199
Andrey Turkincfb49fc2016-05-20 16:13:201200 ctx->timestamp_list = av_fifo_alloc(ctx->max_surface_count * sizeof(int64_t));
1201 if (!ctx->timestamp_list)
1202 return AVERROR(ENOMEM);
1203 ctx->output_surface_queue = av_fifo_alloc(ctx->max_surface_count * sizeof(NvencSurface*));
1204 if (!ctx->output_surface_queue)
1205 return AVERROR(ENOMEM);
1206 ctx->output_surface_ready_queue = av_fifo_alloc(ctx->max_surface_count * sizeof(NvencSurface*));
1207 if (!ctx->output_surface_ready_queue)
1208 return AVERROR(ENOMEM);
1209
Andrey Turkin82d705e2016-05-20 14:49:241210 for (*surfaceCount = 0; *surfaceCount < ctx->max_surface_count; ++*surfaceCount) {
1211 res = nvenc_alloc_surface(avctx, *surfaceCount);
1212 if (res)
1213 return res;
1214 }
1215
1216 return 0;
1217}
1218
1219static av_cold int nvenc_setup_extradata(AVCodecContext *avctx)
1220{
1221 NvencContext *ctx = avctx->priv_data;
1222 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1223 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1224
1225 NVENCSTATUS nv_status;
1226 uint32_t outSize = 0;
1227 char tmpHeader[256];
1228 NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
1229 payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
1230
1231 payload.spsppsBuffer = tmpHeader;
1232 payload.inBufferSize = sizeof(tmpHeader);
1233 payload.outSPSPPSPayloadSize = &outSize;
1234
1235 nv_status = p_nvenc->nvEncGetSequenceParams(ctx->nvencoder, &payload);
1236 if (nv_status != NV_ENC_SUCCESS) {
Andrey Turkine1691c42016-05-20 15:37:001237 return nvenc_print_error(avctx, nv_status, "GetSequenceParams failed");
Andrey Turkin82d705e2016-05-20 14:49:241238 }
1239
1240 avctx->extradata_size = outSize;
1241 avctx->extradata = av_mallocz(outSize + AV_INPUT_BUFFER_PADDING_SIZE);
1242
1243 if (!avctx->extradata) {
1244 return AVERROR(ENOMEM);
1245 }
1246
1247 memcpy(avctx->extradata, tmpHeader, outSize);
1248
1249 return 0;
1250}
1251
1252static av_cold int nvenc_encode_init(AVCodecContext *avctx)
1253{
1254 NvencContext *ctx = avctx->priv_data;
1255 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1256 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1257
1258 int res;
1259 int i;
1260 int surfaceCount = 0;
1261
1262 if (!nvenc_dyload_nvenc(avctx))
1263 return AVERROR_EXTERNAL;
1264
1265 res = nvenc_setup_device(avctx);
1266 if (res)
1267 goto error;
1268
1269 res = nvenc_open_session(avctx);
1270 if (res)
1271 goto error;
1272
1273 res = nvenc_setup_encoder(avctx);
1274 if (res)
1275 goto error;
1276
1277 res = nvenc_setup_surfaces(avctx, &surfaceCount);
1278 if (res)
1279 goto error;
1280
1281 if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1282 res = nvenc_setup_extradata(avctx);
1283 if (res)
1284 goto error;
1285 }
1286
1287 return 0;
Timo Rothenpieler2a428db2014-11-29 23:04:371288
1289error:
Andrey Turkincfb49fc2016-05-20 16:13:201290 av_fifo_freep(&ctx->timestamp_list);
1291 av_fifo_freep(&ctx->output_surface_ready_queue);
1292 av_fifo_freep(&ctx->output_surface_queue);
Timo Rothenpieler2a428db2014-11-29 23:04:371293
1294 for (i = 0; i < surfaceCount; ++i) {
Andrey Turkina8cf25d2016-05-20 22:08:061295 if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
1296 p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface);
1297 av_frame_free(&ctx->surfaces[i].in_ref);
Andrey Turkine1691c42016-05-20 15:37:001298 p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->surfaces[i].output_surface);
Timo Rothenpieler2a428db2014-11-29 23:04:371299 }
Andrey Turkincfb49fc2016-05-20 16:13:201300 av_freep(&ctx->surfaces);
Timo Rothenpieler2a428db2014-11-29 23:04:371301
1302 if (ctx->nvencoder)
1303 p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
Andrey Turkina8cf25d2016-05-20 22:08:061304 ctx->nvencoder = NULL;
Timo Rothenpieler2a428db2014-11-29 23:04:371305
Andrey Turkina8cf25d2016-05-20 22:08:061306 if (ctx->cu_context_internal)
1307 dl_fn->cu_ctx_destroy(ctx->cu_context_internal);
1308 ctx->cu_context = ctx->cu_context_internal = NULL;
Timo Rothenpieler2a428db2014-11-29 23:04:371309
Timo Rothenpieler2a428db2014-11-29 23:04:371310 nvenc_unload_nvenc(avctx);
1311
Timo Rothenpieler2a428db2014-11-29 23:04:371312 return res;
1313}
1314
1315static av_cold int nvenc_encode_close(AVCodecContext *avctx)
1316{
1317 NvencContext *ctx = avctx->priv_data;
1318 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1319 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1320 int i;
1321
Andrey Turkincfb49fc2016-05-20 16:13:201322 av_fifo_freep(&ctx->timestamp_list);
1323 av_fifo_freep(&ctx->output_surface_ready_queue);
1324 av_fifo_freep(&ctx->output_surface_queue);
Timo Rothenpieler2a428db2014-11-29 23:04:371325
Andrey Turkina8cf25d2016-05-20 22:08:061326 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
1327 for (i = 0; i < ctx->max_surface_count; ++i) {
1328 if (ctx->surfaces[i].input_surface) {
1329 p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->surfaces[i].in_map.mappedResource);
1330 }
1331 }
1332 for (i = 0; i < ctx->nb_registered_frames; i++) {
1333 if (ctx->registered_frames[i].regptr)
1334 p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr);
1335 }
1336 ctx->nb_registered_frames = 0;
1337 }
1338
Timo Rothenpieler2a428db2014-11-29 23:04:371339 for (i = 0; i < ctx->max_surface_count; ++i) {
Andrey Turkina8cf25d2016-05-20 22:08:061340 if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
1341 p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface);
1342 av_frame_free(&ctx->surfaces[i].in_ref);
Andrey Turkine1691c42016-05-20 15:37:001343 p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->surfaces[i].output_surface);
Timo Rothenpieler2a428db2014-11-29 23:04:371344 }
Andrey Turkincfb49fc2016-05-20 16:13:201345 av_freep(&ctx->surfaces);
Timo Rothenpieler2a428db2014-11-29 23:04:371346 ctx->max_surface_count = 0;
1347
1348 p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
1349 ctx->nvencoder = NULL;
1350
Andrey Turkina8cf25d2016-05-20 22:08:061351 if (ctx->cu_context_internal)
1352 dl_fn->cu_ctx_destroy(ctx->cu_context_internal);
1353 ctx->cu_context = ctx->cu_context_internal = NULL;
Timo Rothenpieler2a428db2014-11-29 23:04:371354
1355 nvenc_unload_nvenc(avctx);
1356
Timo Rothenpieler2a428db2014-11-29 23:04:371357 return 0;
1358}
1359
Andrey Turkine1691c42016-05-20 15:37:001360static NvencSurface *get_free_frame(NvencContext *ctx)
Andrey Turkin82d705e2016-05-20 14:49:241361{
1362 int i;
1363
1364 for (i = 0; i < ctx->max_surface_count; ++i) {
Andrey Turkine1691c42016-05-20 15:37:001365 if (!ctx->surfaces[i].lockCount) {
1366 ctx->surfaces[i].lockCount = 1;
1367 return &ctx->surfaces[i];
Andrey Turkin82d705e2016-05-20 14:49:241368 }
1369 }
1370
1371 return NULL;
1372}
1373
Andrey Turkine1691c42016-05-20 15:37:001374static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
Andrey Turkin82d705e2016-05-20 14:49:241375 NV_ENC_LOCK_INPUT_BUFFER *lockBufferParams, const AVFrame *frame)
1376{
1377 uint8_t *buf = lockBufferParams->bufferDataPtr;
1378 int off = inSurf->height * lockBufferParams->pitch;
1379
Andrey Turkina8cf25d2016-05-20 22:08:061380 if (frame->format == AV_PIX_FMT_YUV420P) {
Andrey Turkin82d705e2016-05-20 14:49:241381 av_image_copy_plane(buf, lockBufferParams->pitch,
1382 frame->data[0], frame->linesize[0],
1383 avctx->width, avctx->height);
1384
1385 buf += off;
1386
1387 av_image_copy_plane(buf, lockBufferParams->pitch >> 1,
1388 frame->data[2], frame->linesize[2],
1389 avctx->width >> 1, avctx->height >> 1);
1390
1391 buf += off >> 2;
1392
1393 av_image_copy_plane(buf, lockBufferParams->pitch >> 1,
1394 frame->data[1], frame->linesize[1],
1395 avctx->width >> 1, avctx->height >> 1);
Andrey Turkina8cf25d2016-05-20 22:08:061396 } else if (frame->format == AV_PIX_FMT_NV12) {
Andrey Turkin82d705e2016-05-20 14:49:241397 av_image_copy_plane(buf, lockBufferParams->pitch,
1398 frame->data[0], frame->linesize[0],
1399 avctx->width, avctx->height);
1400
1401 buf += off;
1402
1403 av_image_copy_plane(buf, lockBufferParams->pitch,
1404 frame->data[1], frame->linesize[1],
1405 avctx->width, avctx->height >> 1);
Andrey Turkina8cf25d2016-05-20 22:08:061406 } else if (frame->format == AV_PIX_FMT_YUV444P) {
Andrey Turkin82d705e2016-05-20 14:49:241407 av_image_copy_plane(buf, lockBufferParams->pitch,
1408 frame->data[0], frame->linesize[0],
1409 avctx->width, avctx->height);
1410
1411 buf += off;
1412
1413 av_image_copy_plane(buf, lockBufferParams->pitch,
1414 frame->data[1], frame->linesize[1],
1415 avctx->width, avctx->height);
1416
1417 buf += off;
1418
1419 av_image_copy_plane(buf, lockBufferParams->pitch,
1420 frame->data[2], frame->linesize[2],
1421 avctx->width, avctx->height);
1422 } else {
1423 av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
1424 return AVERROR(EINVAL);
1425 }
1426
1427 return 0;
1428}
1429
Andrey Turkina8cf25d2016-05-20 22:08:061430static int nvenc_find_free_reg_resource(AVCodecContext *avctx)
1431{
1432 NvencContext *ctx = avctx->priv_data;
1433 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1434 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1435
1436 int i;
1437
1438 if (ctx->nb_registered_frames == FF_ARRAY_ELEMS(ctx->registered_frames)) {
1439 for (i = 0; i < ctx->nb_registered_frames; i++) {
1440 if (!ctx->registered_frames[i].mapped) {
1441 if (ctx->registered_frames[i].regptr) {
1442 p_nvenc->nvEncUnregisterResource(ctx->nvencoder,
1443 ctx->registered_frames[i].regptr);
1444 ctx->registered_frames[i].regptr = NULL;
1445 }
1446 return i;
1447 }
1448 }
1449 } else {
1450 return ctx->nb_registered_frames++;
1451 }
1452
1453 av_log(avctx, AV_LOG_ERROR, "Too many registered CUDA frames\n");
1454 return AVERROR(ENOMEM);
1455}
1456
1457static int nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame)
1458{
1459 NvencContext *ctx = avctx->priv_data;
1460 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1461 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1462
1463 AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
1464 NV_ENC_REGISTER_RESOURCE reg;
1465 int i, idx, ret;
1466
1467 for (i = 0; i < ctx->nb_registered_frames; i++) {
1468 if (ctx->registered_frames[i].ptr == (CUdeviceptr)frame->data[0])
1469 return i;
1470 }
1471
1472 idx = nvenc_find_free_reg_resource(avctx);
1473 if (idx < 0)
1474 return idx;
1475
1476 reg.version = NV_ENC_REGISTER_RESOURCE_VER;
1477 reg.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
1478 reg.width = frames_ctx->width;
1479 reg.height = frames_ctx->height;
1480 reg.bufferFormat = ctx->surfaces[0].format;
1481 reg.pitch = frame->linesize[0];
1482 reg.resourceToRegister = frame->data[0];
1483
1484 ret = p_nvenc->nvEncRegisterResource(ctx->nvencoder, &reg);
1485 if (ret != NV_ENC_SUCCESS) {
1486 nvenc_print_error(avctx, ret, "Error registering an input resource");
1487 return AVERROR_UNKNOWN;
1488 }
1489
1490 ctx->registered_frames[idx].ptr = (CUdeviceptr)frame->data[0];
1491 ctx->registered_frames[idx].regptr = reg.registeredResource;
1492 return idx;
1493}
1494
Andrey Turkin82d705e2016-05-20 14:49:241495static int nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame,
Andrey Turkine1691c42016-05-20 15:37:001496 NvencSurface *nvenc_frame)
Andrey Turkin82d705e2016-05-20 14:49:241497{
1498 NvencContext *ctx = avctx->priv_data;
1499 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1500 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1501
1502 int res;
1503 NVENCSTATUS nv_status;
Andrey Turkin82d705e2016-05-20 14:49:241504
Andrey Turkina8cf25d2016-05-20 22:08:061505 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
1506 int reg_idx = nvenc_register_frame(avctx, frame);
1507 if (reg_idx < 0) {
1508 av_log(avctx, AV_LOG_ERROR, "Could not register an input CUDA frame\n");
1509 return reg_idx;
1510 }
Andrey Turkin82d705e2016-05-20 14:49:241511
Andrey Turkina8cf25d2016-05-20 22:08:061512 res = av_frame_ref(nvenc_frame->in_ref, frame);
1513 if (res < 0)
1514 return res;
1515
1516 nvenc_frame->in_map.version = NV_ENC_MAP_INPUT_RESOURCE_VER;
1517 nvenc_frame->in_map.registeredResource = ctx->registered_frames[reg_idx].regptr;
1518 nv_status = p_nvenc->nvEncMapInputResource(ctx->nvencoder, &nvenc_frame->in_map);
1519 if (nv_status != NV_ENC_SUCCESS) {
1520 av_frame_unref(nvenc_frame->in_ref);
1521 return nvenc_print_error(avctx, nv_status, "Error mapping an input resource");
1522 }
1523
1524 ctx->registered_frames[reg_idx].mapped = 1;
1525 nvenc_frame->reg_idx = reg_idx;
1526 nvenc_frame->input_surface = nvenc_frame->in_map.mappedResource;
1527 return 0;
1528 } else {
1529 NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
1530
1531 lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
1532 lockBufferParams.inputBuffer = nvenc_frame->input_surface;
1533
1534 nv_status = p_nvenc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams);
1535 if (nv_status != NV_ENC_SUCCESS) {
1536 return nvenc_print_error(avctx, nv_status, "Failed locking nvenc input buffer");
1537 }
1538
1539 res = nvenc_copy_frame(avctx, nvenc_frame, &lockBufferParams, frame);
1540
1541 nv_status = p_nvenc->nvEncUnlockInputBuffer(ctx->nvencoder, nvenc_frame->input_surface);
1542 if (nv_status != NV_ENC_SUCCESS) {
1543 return nvenc_print_error(avctx, nv_status, "Failed unlocking input buffer!");
1544 }
1545
1546 return res;
Andrey Turkin82d705e2016-05-20 14:49:241547 }
Andrey Turkin82d705e2016-05-20 14:49:241548}
1549
1550static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
1551 NV_ENC_PIC_PARAMS *params)
1552{
1553 NvencContext *ctx = avctx->priv_data;
1554
1555 switch (avctx->codec->id) {
1556 case AV_CODEC_ID_H264:
1557 params->codecPicParams.h264PicParams.sliceMode = ctx->encode_config.encodeCodecConfig.h264Config.sliceMode;
1558 params->codecPicParams.h264PicParams.sliceModeData = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
1559 break;
1560 case AV_CODEC_ID_H265:
1561 params->codecPicParams.hevcPicParams.sliceMode = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode;
1562 params->codecPicParams.hevcPicParams.sliceModeData = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
1563 break;
1564 }
1565}
1566
Andrey Turkine1691c42016-05-20 15:37:001567static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencSurface *tmpoutsurf)
Timo Rothenpieler2a428db2014-11-29 23:04:371568{
1569 NvencContext *ctx = avctx->priv_data;
1570 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1571 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1572
Philip Langdale21175d82015-03-24 04:34:591573 uint32_t slice_mode_data;
1574 uint32_t *slice_offsets;
Timo Rothenpieler2a428db2014-11-29 23:04:371575 NV_ENC_LOCK_BITSTREAM lock_params = { 0 };
1576 NVENCSTATUS nv_status;
1577 int res = 0;
1578
Lucas Cooperfd554702016-03-07 23:47:561579 enum AVPictureType pict_type;
1580
Philip Langdale21175d82015-03-24 04:34:591581 switch (avctx->codec->id) {
1582 case AV_CODEC_ID_H264:
1583 slice_mode_data = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
1584 break;
1585 case AV_CODEC_ID_H265:
1586 slice_mode_data = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
1587 break;
1588 default:
Agatha Hu49046582015-09-11 09:07:101589 av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
Philip Langdale21175d82015-03-24 04:34:591590 res = AVERROR(EINVAL);
1591 goto error;
1592 }
1593 slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets));
1594
Timo Rothenpieler2a428db2014-11-29 23:04:371595 if (!slice_offsets)
1596 return AVERROR(ENOMEM);
1597
1598 lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
1599
1600 lock_params.doNotWait = 0;
1601 lock_params.outputBitstream = tmpoutsurf->output_surface;
1602 lock_params.sliceOffsets = slice_offsets;
1603
1604 nv_status = p_nvenc->nvEncLockBitstream(ctx->nvencoder, &lock_params);
1605 if (nv_status != NV_ENC_SUCCESS) {
Andrey Turkine1691c42016-05-20 15:37:001606 res = nvenc_print_error(avctx, nv_status, "Failed locking bitstream buffer");
Timo Rothenpieler2a428db2014-11-29 23:04:371607 goto error;
1608 }
1609
Agatha Hu49046582015-09-11 09:07:101610 if (res = ff_alloc_packet2(avctx, pkt, lock_params.bitstreamSizeInBytes,0)) {
Timo Rothenpieler2a428db2014-11-29 23:04:371611 p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
1612 goto error;
1613 }
1614
1615 memcpy(pkt->data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);
1616
1617 nv_status = p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
1618 if (nv_status != NV_ENC_SUCCESS)
Andrey Turkine1691c42016-05-20 15:37:001619 nvenc_print_error(avctx, nv_status, "Failed unlocking bitstream buffer, expect the gates of mordor to open");
Timo Rothenpieler2a428db2014-11-29 23:04:371620
Andrey Turkina8cf25d2016-05-20 22:08:061621
1622 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
1623 p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, tmpoutsurf->in_map.mappedResource);
1624 av_frame_unref(tmpoutsurf->in_ref);
1625 ctx->registered_frames[tmpoutsurf->reg_idx].mapped = 0;
1626
1627 tmpoutsurf->input_surface = NULL;
1628 }
1629
Timo Rothenpieler2a428db2014-11-29 23:04:371630 switch (lock_params.pictureType) {
1631 case NV_ENC_PIC_TYPE_IDR:
1632 pkt->flags |= AV_PKT_FLAG_KEY;
Timo Rothenpieler2a428db2014-11-29 23:04:371633 case NV_ENC_PIC_TYPE_I:
Lucas Cooperfd554702016-03-07 23:47:561634 pict_type = AV_PICTURE_TYPE_I;
Timo Rothenpieler2a428db2014-11-29 23:04:371635 break;
1636 case NV_ENC_PIC_TYPE_P:
Lucas Cooperfd554702016-03-07 23:47:561637 pict_type = AV_PICTURE_TYPE_P;
Timo Rothenpieler2a428db2014-11-29 23:04:371638 break;
1639 case NV_ENC_PIC_TYPE_B:
Lucas Cooperfd554702016-03-07 23:47:561640 pict_type = AV_PICTURE_TYPE_B;
Timo Rothenpieler2a428db2014-11-29 23:04:371641 break;
1642 case NV_ENC_PIC_TYPE_BI:
Lucas Cooperfd554702016-03-07 23:47:561643 pict_type = AV_PICTURE_TYPE_BI;
Timo Rothenpieler2a428db2014-11-29 23:04:371644 break;
1645 default:
1646 av_log(avctx, AV_LOG_ERROR, "Unknown picture type encountered, expect the output to be broken.\n");
1647 av_log(avctx, AV_LOG_ERROR, "Please report this error and include as much information on how to reproduce it as possible.\n");
1648 res = AVERROR_EXTERNAL;
1649 goto error;
Lucas Cooperfd554702016-03-07 23:47:561650 }
1651
1652#if FF_API_CODED_FRAME
1653FF_DISABLE_DEPRECATION_WARNINGS
1654 avctx->coded_frame->pict_type = pict_type;
Vittorio Giovara40cf1bb2015-07-15 17:41:221655FF_ENABLE_DEPRECATION_WARNINGS
1656#endif
Lucas Cooperfd554702016-03-07 23:47:561657
1658 ff_side_data_set_encoder_stats(pkt,
1659 (lock_params.frameAvgQP - 1) * FF_QP2LAMBDA, NULL, 0, pict_type);
Timo Rothenpieler2a428db2014-11-29 23:04:371660
1661 pkt->pts = lock_params.outputTimeStamp;
Andrey Turkincfb49fc2016-05-20 16:13:201662 pkt->dts = timestamp_queue_dequeue(ctx->timestamp_list);
Timo Rothenpieler2a428db2014-11-29 23:04:371663
Timo Rothenpieler914fd422015-01-26 12:28:211664 /* when there're b frame(s), set dts offset */
agathah72c61c22015-01-07 09:19:321665 if (ctx->encode_config.frameIntervalP >= 2)
1666 pkt->dts -= 1;
1667
Timo Rothenpieler2a428db2014-11-29 23:04:371668 if (pkt->dts > pkt->pts)
1669 pkt->dts = pkt->pts;
1670
1671 if (ctx->last_dts != AV_NOPTS_VALUE && pkt->dts <= ctx->last_dts)
1672 pkt->dts = ctx->last_dts + 1;
1673
1674 ctx->last_dts = pkt->dts;
1675
1676 av_free(slice_offsets);
1677
1678 return 0;
1679
1680error:
1681
1682 av_free(slice_offsets);
Andrey Turkincfb49fc2016-05-20 16:13:201683 timestamp_queue_dequeue(ctx->timestamp_list);
Timo Rothenpieler2a428db2014-11-29 23:04:371684
1685 return res;
1686}
1687
Andrey Turkincfb49fc2016-05-20 16:13:201688static int output_ready(NvencContext *ctx, int flush)
1689{
1690 int nb_ready, nb_pending;
1691
1692 nb_ready = av_fifo_size(ctx->output_surface_ready_queue) / sizeof(NvencSurface*);
1693 nb_pending = av_fifo_size(ctx->output_surface_queue) / sizeof(NvencSurface*);
1694 return nb_ready > 0 && (flush || nb_ready + nb_pending >= ctx->buffer_delay);
1695}
1696
Timo Rothenpieler2a428db2014-11-29 23:04:371697static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1698 const AVFrame *frame, int *got_packet)
1699{
1700 NVENCSTATUS nv_status;
Andrey Turkine1691c42016-05-20 15:37:001701 NvencSurface *tmpoutsurf, *inSurf;
1702 int res;
Timo Rothenpieler2a428db2014-11-29 23:04:371703
1704 NvencContext *ctx = avctx->priv_data;
1705 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1706 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1707
1708 NV_ENC_PIC_PARAMS pic_params = { 0 };
1709 pic_params.version = NV_ENC_PIC_PARAMS_VER;
1710
1711 if (frame) {
Andrey Turkin82d705e2016-05-20 14:49:241712 inSurf = get_free_frame(ctx);
Timo Rothenpieler2a428db2014-11-29 23:04:371713 av_assert0(inSurf);
1714
Andrey Turkin82d705e2016-05-20 14:49:241715 res = nvenc_upload_frame(avctx, frame, inSurf);
1716 if (res) {
1717 inSurf->lockCount = 0;
1718 return res;
Timo Rothenpieler2a428db2014-11-29 23:04:371719 }
1720
Timo Rothenpieler2a428db2014-11-29 23:04:371721 pic_params.inputBuffer = inSurf->input_surface;
1722 pic_params.bufferFmt = inSurf->format;
1723 pic_params.inputWidth = avctx->width;
1724 pic_params.inputHeight = avctx->height;
Andrey Turkine1691c42016-05-20 15:37:001725 pic_params.outputBitstream = inSurf->output_surface;
Timo Rothenpieler2a428db2014-11-29 23:04:371726 pic_params.completionEvent = 0;
1727
Michael Niedermayer94d68a42015-07-27 19:14:311728 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
Timo Rothenpieler2a428db2014-11-29 23:04:371729 if (frame->top_field_first) {
1730 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
1731 } else {
1732 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
1733 }
1734 } else {
1735 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
1736 }
1737
1738 pic_params.encodePicFlags = 0;
1739 pic_params.inputTimeStamp = frame->pts;
1740 pic_params.inputDuration = 0;
Andrey Turkin82d705e2016-05-20 14:49:241741
1742 nvenc_codec_specific_pic_params(avctx, &pic_params);
Timo Rothenpielerbc3f7672015-01-16 00:02:401743
Andrey Turkincfb49fc2016-05-20 16:13:201744 timestamp_queue_enqueue(ctx->timestamp_list, frame->pts);
Timo Rothenpieler2a428db2014-11-29 23:04:371745 } else {
1746 pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
1747 }
1748
1749 nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params);
1750
Andrey Turkincfb49fc2016-05-20 16:13:201751 if (frame && nv_status == NV_ENC_ERR_NEED_MORE_INPUT)
1752 av_fifo_generic_write(ctx->output_surface_queue, &inSurf, sizeof(inSurf), NULL);
Timo Rothenpieler2a428db2014-11-29 23:04:371753
1754 if (nv_status != NV_ENC_SUCCESS && nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
Andrey Turkine1691c42016-05-20 15:37:001755 return nvenc_print_error(avctx, nv_status, "EncodePicture failed!");
Timo Rothenpieler2a428db2014-11-29 23:04:371756 }
1757
1758 if (nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
Andrey Turkincfb49fc2016-05-20 16:13:201759 while (av_fifo_size(ctx->output_surface_queue) > 0) {
1760 av_fifo_generic_read(ctx->output_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
1761 av_fifo_generic_write(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
Timo Rothenpieler2a428db2014-11-29 23:04:371762 }
1763
Andrey Turkincfb49fc2016-05-20 16:13:201764 if (frame)
1765 av_fifo_generic_write(ctx->output_surface_ready_queue, &inSurf, sizeof(inSurf), NULL);
Timo Rothenpieler2a428db2014-11-29 23:04:371766 }
1767
Andrey Turkincfb49fc2016-05-20 16:13:201768 if (output_ready(ctx, !frame)) {
1769 av_fifo_generic_read(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
Timo Rothenpieler2a428db2014-11-29 23:04:371770
Timo Rothenpieler15cd2f82015-07-25 21:26:421771 res = process_output_surface(avctx, pkt, tmpoutsurf);
Timo Rothenpieler2a428db2014-11-29 23:04:371772
1773 if (res)
1774 return res;
1775
Andrey Turkine1691c42016-05-20 15:37:001776 av_assert0(tmpoutsurf->lockCount);
1777 tmpoutsurf->lockCount--;
Timo Rothenpieler2a428db2014-11-29 23:04:371778
1779 *got_packet = 1;
1780 } else {
1781 *got_packet = 0;
1782 }
1783
1784 return 0;
1785}
1786
Michael Niedermayer29ef54a2015-05-02 13:00:351787static const enum AVPixelFormat pix_fmts_nvenc[] = {
Philip Langdale01fac842015-06-07 03:28:221788 AV_PIX_FMT_YUV420P,
Timo Rothenpieler2a428db2014-11-29 23:04:371789 AV_PIX_FMT_NV12,
Philip Langdale671bdd42015-07-02 04:09:571790 AV_PIX_FMT_YUV444P,
Andrey Turkina8cf25d2016-05-20 22:08:061791#if CONFIG_CUDA
1792 AV_PIX_FMT_CUDA,
1793#endif
Timo Rothenpieler2a428db2014-11-29 23:04:371794 AV_PIX_FMT_NONE
1795};
1796
1797#define OFFSET(x) offsetof(NvencContext, x)
1798#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1799static const AVOption options[] = {
Timo Rothenpielerc4312b12016-04-27 20:22:291800 { "preset", "Set the encoding preset (one of slow = hq 2pass, medium = hq, fast = hp, hq, hp, bd, ll, llhq, llhp, lossless, losslesshp, default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "medium" }, 0, 0, VE },
Agatha Huddbad152015-11-09 12:05:021801 { "profile", "Set the encoding profile (high, main, baseline or high444p)", OFFSET(profile), AV_OPT_TYPE_STRING, { .str = "main" }, 0, 0, VE },
1802 { "level", "Set the encoding level restriction (auto, 1.0, 1.0b, 1.1, 1.2, ..., 4.2, 5.0, 5.1)", OFFSET(level), AV_OPT_TYPE_STRING, { .str = "auto" }, 0, 0, VE },
1803 { "tier", "Set the encoding tier (main or high)", OFFSET(tier), AV_OPT_TYPE_STRING, { .str = "main" }, 0, 0, VE },
Agatha Hu49046582015-09-11 09:07:101804 { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
1805 { "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
Timo Rothenpieler2a428db2014-11-29 23:04:371806 { "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.", OFFSET(gpu), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
Timo Rothenpieler9f4bff82015-07-25 21:20:281807 { "delay", "Delays frame output by the given amount of frames.", OFFSET(buffer_delay), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
Timo Rothenpieler2a428db2014-11-29 23:04:371808 { NULL }
1809};
1810
Timo Rothenpieler2a428db2014-11-29 23:04:371811static const AVCodecDefault nvenc_defaults[] = {
Agatha Huddbad152015-11-09 12:05:021812 { "b", "2M" },
Timo Rothenpieler2a428db2014-11-29 23:04:371813 { "qmin", "-1" },
1814 { "qmax", "-1" },
1815 { "qdiff", "-1" },
1816 { "qblur", "-1" },
1817 { "qcomp", "-1" },
Agatha Huddbad152015-11-09 12:05:021818 { "g", "250" },
1819 { "bf", "0" },
Timo Rothenpieler2a428db2014-11-29 23:04:371820 { NULL },
1821};
1822
Philip Langdale21175d82015-03-24 04:34:591823#if CONFIG_NVENC_ENCODER
Philip Langdale21adb992015-03-25 22:24:301824static const AVClass nvenc_class = {
1825 .class_name = "nvenc",
1826 .item_name = av_default_item_name,
1827 .option = options,
1828 .version = LIBAVUTIL_VERSION_INT,
1829};
1830
Timo Rothenpieler2a428db2014-11-29 23:04:371831AVCodec ff_nvenc_encoder = {
1832 .name = "nvenc",
Agatha Hu49046582015-09-11 09:07:101833 .long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC h264 encoder"),
Timo Rothenpieler2a428db2014-11-29 23:04:371834 .type = AVMEDIA_TYPE_VIDEO,
1835 .id = AV_CODEC_ID_H264,
1836 .priv_data_size = sizeof(NvencContext),
1837 .init = nvenc_encode_init,
1838 .encode2 = nvenc_encode_frame,
1839 .close = nvenc_encode_close,
Michael Niedermayer444e9872015-07-27 20:21:191840 .capabilities = AV_CODEC_CAP_DELAY,
Timo Rothenpieler2a428db2014-11-29 23:04:371841 .priv_class = &nvenc_class,
1842 .defaults = nvenc_defaults,
1843 .pix_fmts = pix_fmts_nvenc,
1844};
Philip Langdale21175d82015-03-24 04:34:591845#endif
1846
Philip Langdale7e466112015-06-06 18:09:151847/* Add an alias for nvenc_h264 */
1848#if CONFIG_NVENC_H264_ENCODER
1849static const AVClass nvenc_h264_class = {
1850 .class_name = "nvenc_h264",
1851 .item_name = av_default_item_name,
1852 .option = options,
1853 .version = LIBAVUTIL_VERSION_INT,
1854};
1855
1856AVCodec ff_nvenc_h264_encoder = {
1857 .name = "nvenc_h264",
Agatha Hu49046582015-09-11 09:07:101858 .long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC h264 encoder"),
Philip Langdale7e466112015-06-06 18:09:151859 .type = AVMEDIA_TYPE_VIDEO,
1860 .id = AV_CODEC_ID_H264,
1861 .priv_data_size = sizeof(NvencContext),
1862 .init = nvenc_encode_init,
1863 .encode2 = nvenc_encode_frame,
1864 .close = nvenc_encode_close,
Michael Niedermayer444e9872015-07-27 20:21:191865 .capabilities = AV_CODEC_CAP_DELAY,
Philip Langdale7e466112015-06-06 18:09:151866 .priv_class = &nvenc_h264_class,
1867 .defaults = nvenc_defaults,
1868 .pix_fmts = pix_fmts_nvenc,
1869};
1870#endif
1871
Philip Langdalee79c40f2015-06-06 18:00:451872#if CONFIG_NVENC_HEVC_ENCODER
1873static const AVClass nvenc_hevc_class = {
1874 .class_name = "nvenc_hevc",
Philip Langdale21adb992015-03-25 22:24:301875 .item_name = av_default_item_name,
1876 .option = options,
1877 .version = LIBAVUTIL_VERSION_INT,
1878};
1879
Philip Langdalee79c40f2015-06-06 18:00:451880AVCodec ff_nvenc_hevc_encoder = {
1881 .name = "nvenc_hevc",
Agatha Hu49046582015-09-11 09:07:101882 .long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC hevc encoder"),
Philip Langdale21175d82015-03-24 04:34:591883 .type = AVMEDIA_TYPE_VIDEO,
1884 .id = AV_CODEC_ID_H265,
1885 .priv_data_size = sizeof(NvencContext),
1886 .init = nvenc_encode_init,
1887 .encode2 = nvenc_encode_frame,
1888 .close = nvenc_encode_close,
Michael Niedermayer444e9872015-07-27 20:21:191889 .capabilities = AV_CODEC_CAP_DELAY,
Philip Langdalee79c40f2015-06-06 18:00:451890 .priv_class = &nvenc_hevc_class,
Philip Langdale21175d82015-03-24 04:34:591891 .defaults = nvenc_defaults,
1892 .pix_fmts = pix_fmts_nvenc,
1893};
1894#endif