blob: c9e2191fde3576edf7326cc4cd2c7db267a8b134 [file] [log] [blame]
wm4050b72a2017-04-04 05:45:411/*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#define COBJMACROS
20#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0602
21#undef _WIN32_WINNT
22#define _WIN32_WINNT 0x0602
23#endif
24
James Almer827d6fe2020-06-09 21:31:3225#include "encode.h"
wm4050b72a2017-04-04 05:45:4126#include "mf_utils.h"
27#include "libavutil/imgutils.h"
Andreas Rheinhardt790f7932024-03-25 00:30:3728#include "libavutil/mem.h"
wm4050b72a2017-04-04 05:45:4129#include "libavutil/opt.h"
30#include "libavutil/time.h"
Andreas Rheinhardta688f3c2022-03-16 17:18:2831#include "codec_internal.h"
Martin Storsjö869f6552020-05-25 10:18:0332#include "internal.h"
Martin Storsjö9fba0b82022-05-25 21:31:0533#include "compat/w32dlfcn.h"
wm4050b72a2017-04-04 05:45:4134
35typedef struct MFContext {
36 AVClass *av_class;
Trystan Mata1cb601a2022-05-25 10:54:0137 HMODULE library;
38 MFFunctions functions;
James Almer827d6fe2020-06-09 21:31:3239 AVFrame *frame;
wm4050b72a2017-04-04 05:45:4140 int is_video, is_audio;
41 GUID main_subtype;
42 IMFTransform *mft;
43 IMFMediaEventGenerator *async_events;
44 DWORD in_stream_id, out_stream_id;
45 MFT_INPUT_STREAM_INFO in_info;
46 MFT_OUTPUT_STREAM_INFO out_info;
47 int out_stream_provides_samples;
48 int draining, draining_done;
49 int sample_sent;
50 int async_need_input, async_have_output, async_marker;
51 int64_t reorder_delay;
52 ICodecAPI *codec_api;
53 // set by AVOption
54 int opt_enc_rc;
55 int opt_enc_quality;
56 int opt_enc_scenario;
57 int opt_enc_hw;
58} MFContext;
59
60static int mf_choose_output_type(AVCodecContext *avctx);
61static int mf_setup_context(AVCodecContext *avctx);
62
63#define MF_TIMEBASE (AVRational){1, 10000000}
64// Sentinel value only used by us.
65#define MF_INVALID_TIME AV_NOPTS_VALUE
66
67static int mf_wait_events(AVCodecContext *avctx)
68{
69 MFContext *c = avctx->priv_data;
70
71 if (!c->async_events)
72 return 0;
73
74 while (!(c->async_need_input || c->async_have_output || c->draining_done || c->async_marker)) {
75 IMFMediaEvent *ev = NULL;
76 MediaEventType ev_id = 0;
77 HRESULT hr = IMFMediaEventGenerator_GetEvent(c->async_events, 0, &ev);
78 if (FAILED(hr)) {
79 av_log(avctx, AV_LOG_ERROR, "IMFMediaEventGenerator_GetEvent() failed: %s\n",
80 ff_hr_str(hr));
81 return AVERROR_EXTERNAL;
82 }
83 IMFMediaEvent_GetType(ev, &ev_id);
84 switch (ev_id) {
85 case ff_METransformNeedInput:
86 if (!c->draining)
87 c->async_need_input = 1;
88 break;
89 case ff_METransformHaveOutput:
90 c->async_have_output = 1;
91 break;
92 case ff_METransformDrainComplete:
93 c->draining_done = 1;
94 break;
95 case ff_METransformMarker:
96 c->async_marker = 1;
97 break;
98 default: ;
99 }
100 IMFMediaEvent_Release(ev);
101 }
102
103 return 0;
104}
105
106static AVRational mf_get_tb(AVCodecContext *avctx)
107{
wm4050b72a2017-04-04 05:45:41108 if (avctx->time_base.num > 0 && avctx->time_base.den > 0)
109 return avctx->time_base;
110 return MF_TIMEBASE;
111}
112
113static LONGLONG mf_to_mf_time(AVCodecContext *avctx, int64_t av_pts)
114{
115 if (av_pts == AV_NOPTS_VALUE)
116 return MF_INVALID_TIME;
117 return av_rescale_q(av_pts, mf_get_tb(avctx), MF_TIMEBASE);
118}
119
120static void mf_sample_set_pts(AVCodecContext *avctx, IMFSample *sample, int64_t av_pts)
121{
122 LONGLONG stime = mf_to_mf_time(avctx, av_pts);
123 if (stime != MF_INVALID_TIME)
124 IMFSample_SetSampleTime(sample, stime);
125}
126
127static int64_t mf_from_mf_time(AVCodecContext *avctx, LONGLONG stime)
128{
129 return av_rescale_q(stime, MF_TIMEBASE, mf_get_tb(avctx));
130}
131
132static int64_t mf_sample_get_pts(AVCodecContext *avctx, IMFSample *sample)
133{
134 LONGLONG pts;
135 HRESULT hr = IMFSample_GetSampleTime(sample, &pts);
136 if (FAILED(hr))
137 return AV_NOPTS_VALUE;
138 return mf_from_mf_time(avctx, pts);
139}
140
141static int mf_enca_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
142{
143 MFContext *c = avctx->priv_data;
144 HRESULT hr;
145 UINT32 sz;
146
147 if (avctx->codec_id != AV_CODEC_ID_MP3 && avctx->codec_id != AV_CODEC_ID_AC3) {
148 hr = IMFAttributes_GetBlobSize(type, &MF_MT_USER_DATA, &sz);
149 if (!FAILED(hr) && sz > 0) {
150 avctx->extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
151 if (!avctx->extradata)
152 return AVERROR(ENOMEM);
153 avctx->extradata_size = sz;
154 hr = IMFAttributes_GetBlob(type, &MF_MT_USER_DATA, avctx->extradata, sz, NULL);
155 if (FAILED(hr))
156 return AVERROR_EXTERNAL;
157
158 if (avctx->codec_id == AV_CODEC_ID_AAC && avctx->extradata_size >= 12) {
159 // Get rid of HEAACWAVEINFO (after wfx field, 12 bytes).
160 avctx->extradata_size = avctx->extradata_size - 12;
161 memmove(avctx->extradata, avctx->extradata + 12, avctx->extradata_size);
162 }
163 }
164 }
165
166 // I don't know where it's documented that we need this. It happens with the
167 // MS mp3 encoder MFT. The idea for the workaround is taken from NAudio.
168 // (Certainly any lossy codec will have frames much smaller than 1 second.)
169 if (!c->out_info.cbSize && !c->out_stream_provides_samples) {
170 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &sz);
171 if (!FAILED(hr)) {
172 av_log(avctx, AV_LOG_VERBOSE, "MFT_OUTPUT_STREAM_INFO.cbSize set to 0, "
173 "assuming %d bytes instead.\n", (int)sz);
174 c->out_info.cbSize = sz;
175 }
176 }
177
178 return 0;
179}
180
181static int mf_encv_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
182{
183 HRESULT hr;
184 UINT32 sz;
185
186 hr = IMFAttributes_GetBlobSize(type, &MF_MT_MPEG_SEQUENCE_HEADER, &sz);
187 if (!FAILED(hr) && sz > 0) {
188 uint8_t *extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
189 if (!extradata)
190 return AVERROR(ENOMEM);
191 hr = IMFAttributes_GetBlob(type, &MF_MT_MPEG_SEQUENCE_HEADER, extradata, sz, NULL);
192 if (FAILED(hr)) {
193 av_free(extradata);
194 return AVERROR_EXTERNAL;
195 }
196 av_freep(&avctx->extradata);
197 avctx->extradata = extradata;
198 avctx->extradata_size = sz;
199 }
200
201 return 0;
202}
203
204static int mf_output_type_get(AVCodecContext *avctx)
205{
206 MFContext *c = avctx->priv_data;
207 HRESULT hr;
208 IMFMediaType *type;
209 int ret;
210
211 hr = IMFTransform_GetOutputCurrentType(c->mft, c->out_stream_id, &type);
212 if (FAILED(hr)) {
213 av_log(avctx, AV_LOG_ERROR, "could not get output type\n");
214 return AVERROR_EXTERNAL;
215 }
216
217 av_log(avctx, AV_LOG_VERBOSE, "final output type:\n");
218 ff_media_type_dump(avctx, type);
219
220 ret = 0;
221 if (c->is_video) {
222 ret = mf_encv_output_type_get(avctx, type);
223 } else if (c->is_audio) {
224 ret = mf_enca_output_type_get(avctx, type);
225 }
226
227 if (ret < 0)
228 av_log(avctx, AV_LOG_ERROR, "output type not supported\n");
229
230 IMFMediaType_Release(type);
231 return ret;
232}
233
234static int mf_sample_to_avpacket(AVCodecContext *avctx, IMFSample *sample, AVPacket *avpkt)
235{
236 MFContext *c = avctx->priv_data;
237 HRESULT hr;
238 int ret;
239 DWORD len;
240 IMFMediaBuffer *buffer;
241 BYTE *data;
242 UINT64 t;
243 UINT32 t32;
244
245 hr = IMFSample_GetTotalLength(sample, &len);
246 if (FAILED(hr))
247 return AVERROR_EXTERNAL;
248
James Almerb86af932021-03-13 00:07:42249 if ((ret = ff_get_encode_buffer(avctx, avpkt, len, 0)) < 0)
wm4050b72a2017-04-04 05:45:41250 return ret;
251
Michael Niedermayer86cd7c62024-05-26 23:52:18252 hr = IMFSample_ConvertToContiguousBuffer(sample, &buffer);
wm4050b72a2017-04-04 05:45:41253 if (FAILED(hr))
254 return AVERROR_EXTERNAL;
255
256 hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
257 if (FAILED(hr)) {
258 IMFMediaBuffer_Release(buffer);
259 return AVERROR_EXTERNAL;
260 }
261
262 memcpy(avpkt->data, data, len);
263
264 IMFMediaBuffer_Unlock(buffer);
265 IMFMediaBuffer_Release(buffer);
266
267 avpkt->pts = avpkt->dts = mf_sample_get_pts(avctx, sample);
268
269 hr = IMFAttributes_GetUINT32(sample, &MFSampleExtension_CleanPoint, &t32);
270 if (c->is_audio || (!FAILED(hr) && t32 != 0))
271 avpkt->flags |= AV_PKT_FLAG_KEY;
272
273 hr = IMFAttributes_GetUINT64(sample, &MFSampleExtension_DecodeTimestamp, &t);
274 if (!FAILED(hr)) {
275 avpkt->dts = mf_from_mf_time(avctx, t);
276 // At least on Qualcomm's HEVC encoder on SD 835, the output dts
277 // starts from the input pts of the first frame, while the output pts
278 // is shifted forward. Therefore, shift the output values back so that
279 // the output pts matches the input.
280 if (c->reorder_delay == AV_NOPTS_VALUE)
281 c->reorder_delay = avpkt->pts - avpkt->dts;
282 avpkt->dts -= c->reorder_delay;
283 avpkt->pts -= c->reorder_delay;
284 }
285
286 return 0;
287}
288
289static IMFSample *mf_a_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
290{
291 MFContext *c = avctx->priv_data;
292 size_t len;
293 size_t bps;
294 IMFSample *sample;
295
James Almer6d8b2582021-08-25 13:37:54296 bps = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->ch_layout.nb_channels;
wm4050b72a2017-04-04 05:45:41297 len = frame->nb_samples * bps;
298
Trystan Mata1cb601a2022-05-25 10:54:01299 sample = ff_create_memory_sample(&c->functions, frame->data[0], len,
300 c->in_info.cbAlignment);
wm4050b72a2017-04-04 05:45:41301 if (sample)
302 IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->nb_samples));
303 return sample;
304}
305
306static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
307{
308 MFContext *c = avctx->priv_data;
309 IMFSample *sample;
310 IMFMediaBuffer *buffer;
311 BYTE *data;
312 HRESULT hr;
313 int ret;
314 int size;
315
316 size = av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1);
317 if (size < 0)
318 return NULL;
319
Trystan Mata1cb601a2022-05-25 10:54:01320 sample = ff_create_memory_sample(&c->functions, NULL, size,
321 c->in_info.cbAlignment);
wm4050b72a2017-04-04 05:45:41322 if (!sample)
323 return NULL;
324
325 hr = IMFSample_GetBufferByIndex(sample, 0, &buffer);
326 if (FAILED(hr)) {
327 IMFSample_Release(sample);
328 return NULL;
329 }
330
331 hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
332 if (FAILED(hr)) {
333 IMFMediaBuffer_Release(buffer);
334 IMFSample_Release(sample);
335 return NULL;
336 }
337
338 ret = av_image_copy_to_buffer((uint8_t *)data, size, (void *)frame->data, frame->linesize,
339 avctx->pix_fmt, avctx->width, avctx->height, 1);
340 IMFMediaBuffer_SetCurrentLength(buffer, size);
341 IMFMediaBuffer_Unlock(buffer);
342 IMFMediaBuffer_Release(buffer);
343 if (ret < 0) {
344 IMFSample_Release(sample);
345 return NULL;
346 }
347
Anton Khirnovac2cda42022-07-11 08:20:12348 IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->duration));
wm4050b72a2017-04-04 05:45:41349
350 return sample;
351}
352
353static IMFSample *mf_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
354{
355 MFContext *c = avctx->priv_data;
356 IMFSample *sample;
357
358 if (c->is_audio) {
359 sample = mf_a_avframe_to_sample(avctx, frame);
360 } else {
361 sample = mf_v_avframe_to_sample(avctx, frame);
362 }
363
364 if (sample)
365 mf_sample_set_pts(avctx, sample, frame->pts);
366
367 return sample;
368}
369
370static int mf_send_sample(AVCodecContext *avctx, IMFSample *sample)
371{
372 MFContext *c = avctx->priv_data;
373 HRESULT hr;
374 int ret;
375
376 if (sample) {
377 if (c->async_events) {
378 if ((ret = mf_wait_events(avctx)) < 0)
379 return ret;
380 if (!c->async_need_input)
381 return AVERROR(EAGAIN);
382 }
383 if (!c->sample_sent)
384 IMFSample_SetUINT32(sample, &MFSampleExtension_Discontinuity, TRUE);
385 c->sample_sent = 1;
386 hr = IMFTransform_ProcessInput(c->mft, c->in_stream_id, sample, 0);
387 if (hr == MF_E_NOTACCEPTING) {
388 return AVERROR(EAGAIN);
389 } else if (FAILED(hr)) {
390 av_log(avctx, AV_LOG_ERROR, "failed processing input: %s\n", ff_hr_str(hr));
391 return AVERROR_EXTERNAL;
392 }
393 c->async_need_input = 0;
394 } else if (!c->draining) {
395 hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_COMMAND_DRAIN, 0);
396 if (FAILED(hr))
397 av_log(avctx, AV_LOG_ERROR, "failed draining: %s\n", ff_hr_str(hr));
398 // Some MFTs (AC3) will send a frame after each drain command (???), so
399 // this is required to make draining actually terminate.
400 c->draining = 1;
401 c->async_need_input = 0;
402 } else {
403 return AVERROR_EOF;
404 }
405 return 0;
406}
407
wm4050b72a2017-04-04 05:45:41408static int mf_receive_sample(AVCodecContext *avctx, IMFSample **out_sample)
409{
410 MFContext *c = avctx->priv_data;
411 HRESULT hr;
412 DWORD st;
413 MFT_OUTPUT_DATA_BUFFER out_buffers;
414 IMFSample *sample;
415 int ret = 0;
416
417 while (1) {
418 *out_sample = NULL;
419 sample = NULL;
420
421 if (c->async_events) {
422 if ((ret = mf_wait_events(avctx)) < 0)
423 return ret;
424 if (!c->async_have_output || c->draining_done) {
425 ret = 0;
426 break;
427 }
428 }
429
430 if (!c->out_stream_provides_samples) {
Trystan Mata1cb601a2022-05-25 10:54:01431 sample = ff_create_memory_sample(&c->functions, NULL,
432 c->out_info.cbSize,
433 c->out_info.cbAlignment);
wm4050b72a2017-04-04 05:45:41434 if (!sample)
435 return AVERROR(ENOMEM);
436 }
437
438 out_buffers = (MFT_OUTPUT_DATA_BUFFER) {
439 .dwStreamID = c->out_stream_id,
440 .pSample = sample,
441 };
442
443 st = 0;
444 hr = IMFTransform_ProcessOutput(c->mft, 0, 1, &out_buffers, &st);
445
446 if (out_buffers.pEvents)
447 IMFCollection_Release(out_buffers.pEvents);
448
449 if (!FAILED(hr)) {
450 *out_sample = out_buffers.pSample;
451 ret = 0;
452 break;
453 }
454
455 if (out_buffers.pSample)
456 IMFSample_Release(out_buffers.pSample);
457
458 if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
459 if (c->draining)
460 c->draining_done = 1;
461 ret = 0;
462 } else if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
463 av_log(avctx, AV_LOG_WARNING, "stream format change\n");
464 ret = mf_choose_output_type(avctx);
465 if (ret == 0) // we don't expect renegotiating the input type
466 ret = AVERROR_EXTERNAL;
467 if (ret > 0) {
468 ret = mf_setup_context(avctx);
469 if (ret >= 0) {
470 c->async_have_output = 0;
471 continue;
472 }
473 }
474 } else {
475 av_log(avctx, AV_LOG_ERROR, "failed processing output: %s\n", ff_hr_str(hr));
476 ret = AVERROR_EXTERNAL;
477 }
478
479 break;
480 }
481
482 c->async_have_output = 0;
483
484 if (ret >= 0 && !*out_sample)
485 ret = c->draining_done ? AVERROR_EOF : AVERROR(EAGAIN);
486
487 return ret;
488}
489
490static int mf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
491{
James Almer827d6fe2020-06-09 21:31:32492 MFContext *c = avctx->priv_data;
493 IMFSample *sample = NULL;
wm4050b72a2017-04-04 05:45:41494 int ret;
495
James Almer827d6fe2020-06-09 21:31:32496 if (!c->frame->buf[0]) {
497 ret = ff_encode_get_frame(avctx, c->frame);
498 if (ret < 0 && ret != AVERROR_EOF)
499 return ret;
500 }
501
502 if (c->frame->buf[0]) {
503 sample = mf_avframe_to_sample(avctx, c->frame);
504 if (!sample) {
505 av_frame_unref(c->frame);
506 return AVERROR(ENOMEM);
507 }
508 if (c->is_video && c->codec_api) {
509 if (c->frame->pict_type == AV_PICTURE_TYPE_I || !c->sample_sent)
510 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncVideoForceKeyFrame, FF_VAL_VT_UI4(1));
511 }
512 }
513
514 ret = mf_send_sample(avctx, sample);
515 if (sample)
516 IMFSample_Release(sample);
517 if (ret != AVERROR(EAGAIN))
518 av_frame_unref(c->frame);
519 if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
520 return ret;
521
wm4050b72a2017-04-04 05:45:41522 ret = mf_receive_sample(avctx, &sample);
523 if (ret < 0)
524 return ret;
525
526 ret = mf_sample_to_avpacket(avctx, sample, avpkt);
527 IMFSample_Release(sample);
528
529 return ret;
530}
531
532// Most encoders seem to enumerate supported audio formats on the output types,
533// at least as far as channel configuration and sample rate is concerned. Pick
534// the one which seems to match best.
535static int64_t mf_enca_output_score(AVCodecContext *avctx, IMFMediaType *type)
536{
537 MFContext *c = avctx->priv_data;
538 HRESULT hr;
539 UINT32 t;
540 GUID tg;
541 int64_t score = 0;
542
543 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
544 if (!FAILED(hr) && t == avctx->sample_rate)
545 score |= 1LL << 32;
546
547 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
James Almer6d8b2582021-08-25 13:37:54548 if (!FAILED(hr) && t == avctx->ch_layout.nb_channels)
wm4050b72a2017-04-04 05:45:41549 score |= 2LL << 32;
550
551 hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
552 if (!FAILED(hr)) {
553 if (IsEqualGUID(&c->main_subtype, &tg))
554 score |= 4LL << 32;
555 }
556
557 // Select the bitrate (lowest priority).
558 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &t);
559 if (!FAILED(hr)) {
560 int diff = (int)t - avctx->bit_rate / 8;
561 if (diff >= 0) {
562 score |= (1LL << 31) - diff; // prefer lower bitrate
563 } else {
564 score |= (1LL << 30) + diff; // prefer higher bitrate
565 }
566 }
567
568 hr = IMFAttributes_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE, &t);
569 if (!FAILED(hr) && t != 0)
570 return -1;
571
572 return score;
573}
574
575static int mf_enca_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
576{
577 // (some decoders allow adjusting this freely, but it can also cause failure
578 // to set the output type - so it's commented for being too fragile)
579 //IMFAttributes_SetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, avctx->bit_rate / 8);
580 //IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
581
582 return 0;
583}
584
585static int64_t mf_enca_input_score(AVCodecContext *avctx, IMFMediaType *type)
586{
587 HRESULT hr;
588 UINT32 t;
589 int64_t score = 0;
590
591 enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
592 if (sformat == AV_SAMPLE_FMT_NONE)
593 return -1; // can not use
594
595 if (sformat == avctx->sample_fmt)
596 score |= 1;
597
598 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
599 if (!FAILED(hr) && t == avctx->sample_rate)
600 score |= 2;
601
602 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
James Almer6d8b2582021-08-25 13:37:54603 if (!FAILED(hr) && t == avctx->ch_layout.nb_channels)
wm4050b72a2017-04-04 05:45:41604 score |= 4;
605
606 return score;
607}
608
609static int mf_enca_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
610{
611 HRESULT hr;
612 UINT32 t;
613
614 enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
615 if (sformat != avctx->sample_fmt) {
616 av_log(avctx, AV_LOG_ERROR, "unsupported input sample format set\n");
617 return AVERROR(EINVAL);
618 }
619
620 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
621 if (FAILED(hr) || t != avctx->sample_rate) {
622 av_log(avctx, AV_LOG_ERROR, "unsupported input sample rate set\n");
623 return AVERROR(EINVAL);
624 }
625
626 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
James Almer6d8b2582021-08-25 13:37:54627 if (FAILED(hr) || t != avctx->ch_layout.nb_channels) {
wm4050b72a2017-04-04 05:45:41628 av_log(avctx, AV_LOG_ERROR, "unsupported input channel number set\n");
629 return AVERROR(EINVAL);
630 }
631
632 return 0;
633}
634
635static int64_t mf_encv_output_score(AVCodecContext *avctx, IMFMediaType *type)
636{
637 MFContext *c = avctx->priv_data;
638 GUID tg;
639 HRESULT hr;
640 int score = -1;
641
642 hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
643 if (!FAILED(hr)) {
644 if (IsEqualGUID(&c->main_subtype, &tg))
645 score = 1;
646 }
647
648 return score;
649}
650
651static int mf_encv_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
652{
653 MFContext *c = avctx->priv_data;
Martin Storsjöc116c122020-05-20 20:08:17654 AVRational framerate;
wm4050b72a2017-04-04 05:45:41655
656 ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
657 IMFAttributes_SetUINT32(type, &MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
658
Martin Storsjöc116c122020-05-20 20:08:17659 if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
660 framerate = avctx->framerate;
661 } else {
662 framerate = av_inv_q(avctx->time_base);
Martin Storsjöc116c122020-05-20 20:08:17663 }
664
665 ff_MFSetAttributeRatio((IMFAttributes *)type, &MF_MT_FRAME_RATE, framerate.num, framerate.den);
wm4050b72a2017-04-04 05:45:41666
667 // (MS HEVC supports eAVEncH265VProfile_Main_420_8 only.)
668 if (avctx->codec_id == AV_CODEC_ID_H264) {
Martin Storsjö6c33a232020-05-25 10:26:04669 UINT32 profile = ff_eAVEncH264VProfile_Base;
wm4050b72a2017-04-04 05:45:41670 switch (avctx->profile) {
Andreas Rheinhardt8238bc02023-09-02 12:57:41671 case AV_PROFILE_H264_MAIN:
Martin Storsjö6c33a232020-05-25 10:26:04672 profile = ff_eAVEncH264VProfile_Main;
wm4050b72a2017-04-04 05:45:41673 break;
Andreas Rheinhardt8238bc02023-09-02 12:57:41674 case AV_PROFILE_H264_HIGH:
Martin Storsjö6c33a232020-05-25 10:26:04675 profile = ff_eAVEncH264VProfile_High;
wm4050b72a2017-04-04 05:45:41676 break;
677 }
678 IMFAttributes_SetUINT32(type, &MF_MT_MPEG2_PROFILE, profile);
679 }
680
681 IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
682
683 // Note that some of the ICodecAPI options must be set before SetOutputType.
684 if (c->codec_api) {
685 if (avctx->bit_rate)
686 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonMeanBitRate, FF_VAL_VT_UI4(avctx->bit_rate));
687
688 if (c->opt_enc_rc >= 0)
689 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonRateControlMode, FF_VAL_VT_UI4(c->opt_enc_rc));
690
691 if (c->opt_enc_quality >= 0)
692 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonQuality, FF_VAL_VT_UI4(c->opt_enc_quality));
693
Mark Samuelsonc52869f2024-03-28 12:34:30694 if (avctx->rc_max_rate > 0)
695 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonMaxBitRate, FF_VAL_VT_UI4(avctx->rc_max_rate));
696
697 if (avctx->gop_size > 0)
698 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncMPVGOPSize, FF_VAL_VT_UI4(avctx->gop_size));
699
700 if(avctx->rc_buffer_size > 0)
701 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonBufferSize, FF_VAL_VT_UI4(avctx->rc_buffer_size));
702
703 if(avctx->compression_level >= 0)
704 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonQualityVsSpeed, FF_VAL_VT_UI4(avctx->compression_level));
705
706 if(avctx->global_quality > 0)
707 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncVideoEncodeQP, FF_VAL_VT_UI4(avctx->global_quality ));
708
wm4050b72a2017-04-04 05:45:41709 // Always set the number of b-frames. Qualcomm's HEVC encoder on SD835
710 // defaults this to 1, and that setting is buggy with many of the
711 // rate control modes. (0 or 2 b-frames works fine with most rate
712 // control modes, but 2 seems buggy with the u_vbr mode.) Setting
713 // "scenario" to "camera_record" sets it in CFR mode (where the default
714 // is VFR), which makes the encoder avoid dropping frames.
715 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncMPVDefaultBPictureCount, FF_VAL_VT_UI4(avctx->max_b_frames));
Martin Storsjöfea5f5b2020-05-20 21:11:47716 avctx->has_b_frames = avctx->max_b_frames > 0;
wm4050b72a2017-04-04 05:45:41717
718 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncH264CABACEnable, FF_VAL_VT_BOOL(1));
719
720 if (c->opt_enc_scenario >= 0)
721 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVScenarioInfo, FF_VAL_VT_UI4(c->opt_enc_scenario));
722 }
723
724 return 0;
725}
726
727static int64_t mf_encv_input_score(AVCodecContext *avctx, IMFMediaType *type)
728{
729 enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
730 if (pix_fmt != avctx->pix_fmt)
731 return -1; // can not use
732
733 return 0;
734}
735
736static int mf_encv_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
737{
738 enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
739 if (pix_fmt != avctx->pix_fmt) {
740 av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n");
741 return AVERROR(EINVAL);
742 }
743
744 //ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
745
746 return 0;
747}
748
749static int mf_choose_output_type(AVCodecContext *avctx)
750{
751 MFContext *c = avctx->priv_data;
752 HRESULT hr;
753 int ret;
754 IMFMediaType *out_type = NULL;
755 int64_t out_type_score = -1;
756 int out_type_index = -1;
757 int n;
758
759 av_log(avctx, AV_LOG_VERBOSE, "output types:\n");
760 for (n = 0; ; n++) {
761 IMFMediaType *type;
762 int64_t score = -1;
763
764 hr = IMFTransform_GetOutputAvailableType(c->mft, c->out_stream_id, n, &type);
765 if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
766 break;
767 if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
768 av_log(avctx, AV_LOG_VERBOSE, "(need to set input type)\n");
769 ret = 0;
770 goto done;
771 }
772 if (FAILED(hr)) {
773 av_log(avctx, AV_LOG_ERROR, "error getting output type: %s\n", ff_hr_str(hr));
774 ret = AVERROR_EXTERNAL;
775 goto done;
776 }
777
778 av_log(avctx, AV_LOG_VERBOSE, "output type %d:\n", n);
779 ff_media_type_dump(avctx, type);
780
781 if (c->is_video) {
782 score = mf_encv_output_score(avctx, type);
783 } else if (c->is_audio) {
784 score = mf_enca_output_score(avctx, type);
785 }
786
787 if (score > out_type_score) {
788 if (out_type)
789 IMFMediaType_Release(out_type);
790 out_type = type;
791 out_type_score = score;
792 out_type_index = n;
793 IMFMediaType_AddRef(out_type);
794 }
795
796 IMFMediaType_Release(type);
797 }
798
799 if (out_type) {
800 av_log(avctx, AV_LOG_VERBOSE, "picking output type %d.\n", out_type_index);
801 } else {
Trystan Mata1cb601a2022-05-25 10:54:01802 hr = c->functions.MFCreateMediaType(&out_type);
wm4050b72a2017-04-04 05:45:41803 if (FAILED(hr)) {
804 ret = AVERROR(ENOMEM);
805 goto done;
806 }
807 }
808
809 ret = 0;
810 if (c->is_video) {
811 ret = mf_encv_output_adjust(avctx, out_type);
812 } else if (c->is_audio) {
813 ret = mf_enca_output_adjust(avctx, out_type);
814 }
815
816 if (ret >= 0) {
817 av_log(avctx, AV_LOG_VERBOSE, "setting output type:\n");
818 ff_media_type_dump(avctx, out_type);
819
820 hr = IMFTransform_SetOutputType(c->mft, c->out_stream_id, out_type, 0);
821 if (!FAILED(hr)) {
822 ret = 1;
823 } else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
824 av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set input type\n");
825 ret = 0;
826 } else {
827 av_log(avctx, AV_LOG_ERROR, "could not set output type (%s)\n", ff_hr_str(hr));
828 ret = AVERROR_EXTERNAL;
829 }
830 }
831
832done:
833 if (out_type)
834 IMFMediaType_Release(out_type);
835 return ret;
836}
837
838static int mf_choose_input_type(AVCodecContext *avctx)
839{
840 MFContext *c = avctx->priv_data;
841 HRESULT hr;
842 int ret;
843 IMFMediaType *in_type = NULL;
844 int64_t in_type_score = -1;
845 int in_type_index = -1;
846 int n;
847
848 av_log(avctx, AV_LOG_VERBOSE, "input types:\n");
849 for (n = 0; ; n++) {
850 IMFMediaType *type = NULL;
851 int64_t score = -1;
852
853 hr = IMFTransform_GetInputAvailableType(c->mft, c->in_stream_id, n, &type);
854 if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
855 break;
856 if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
857 av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 1)\n");
858 ret = 0;
859 goto done;
860 }
861 if (FAILED(hr)) {
862 av_log(avctx, AV_LOG_ERROR, "error getting input type: %s\n", ff_hr_str(hr));
863 ret = AVERROR_EXTERNAL;
864 goto done;
865 }
866
867 av_log(avctx, AV_LOG_VERBOSE, "input type %d:\n", n);
868 ff_media_type_dump(avctx, type);
869
870 if (c->is_video) {
871 score = mf_encv_input_score(avctx, type);
872 } else if (c->is_audio) {
873 score = mf_enca_input_score(avctx, type);
874 }
875
876 if (score > in_type_score) {
877 if (in_type)
878 IMFMediaType_Release(in_type);
879 in_type = type;
880 in_type_score = score;
881 in_type_index = n;
882 IMFMediaType_AddRef(in_type);
883 }
884
885 IMFMediaType_Release(type);
886 }
887
888 if (in_type) {
889 av_log(avctx, AV_LOG_VERBOSE, "picking input type %d.\n", in_type_index);
890 } else {
891 // Some buggy MFTs (WMA encoder) fail to return MF_E_TRANSFORM_TYPE_NOT_SET.
892 av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 2)\n");
893 ret = 0;
894 goto done;
895 }
896
897 ret = 0;
898 if (c->is_video) {
899 ret = mf_encv_input_adjust(avctx, in_type);
900 } else if (c->is_audio) {
901 ret = mf_enca_input_adjust(avctx, in_type);
902 }
903
904 if (ret >= 0) {
905 av_log(avctx, AV_LOG_VERBOSE, "setting input type:\n");
906 ff_media_type_dump(avctx, in_type);
907
908 hr = IMFTransform_SetInputType(c->mft, c->in_stream_id, in_type, 0);
909 if (!FAILED(hr)) {
910 ret = 1;
911 } else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
912 av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set output type\n");
913 ret = 0;
914 } else {
915 av_log(avctx, AV_LOG_ERROR, "could not set input type (%s)\n", ff_hr_str(hr));
916 ret = AVERROR_EXTERNAL;
917 }
918 }
919
920done:
921 if (in_type)
922 IMFMediaType_Release(in_type);
923 return ret;
924}
925
926static int mf_negotiate_types(AVCodecContext *avctx)
927{
928 // This follows steps 1-5 on:
929 // https://msdn.microsoft.com/en-us/library/windows/desktop/aa965264(v=vs.85).aspx
930 // If every MFT implementer does this correctly, this loop should at worst
931 // be repeated once.
932 int need_input = 1, need_output = 1;
933 int n;
934 for (n = 0; n < 2 && (need_input || need_output); n++) {
935 int ret;
936 ret = mf_choose_input_type(avctx);
937 if (ret < 0)
938 return ret;
939 need_input = ret < 1;
940 ret = mf_choose_output_type(avctx);
941 if (ret < 0)
942 return ret;
943 need_output = ret < 1;
944 }
945 if (need_input || need_output) {
946 av_log(avctx, AV_LOG_ERROR, "format negotiation failed (%d/%d)\n",
947 need_input, need_output);
948 return AVERROR_EXTERNAL;
949 }
950 return 0;
951}
952
953static int mf_setup_context(AVCodecContext *avctx)
954{
955 MFContext *c = avctx->priv_data;
956 HRESULT hr;
957 int ret;
958
959 hr = IMFTransform_GetInputStreamInfo(c->mft, c->in_stream_id, &c->in_info);
960 if (FAILED(hr))
961 return AVERROR_EXTERNAL;
962 av_log(avctx, AV_LOG_VERBOSE, "in_info: size=%d, align=%d\n",
963 (int)c->in_info.cbSize, (int)c->in_info.cbAlignment);
964
965 hr = IMFTransform_GetOutputStreamInfo(c->mft, c->out_stream_id, &c->out_info);
966 if (FAILED(hr))
967 return AVERROR_EXTERNAL;
968 c->out_stream_provides_samples =
969 (c->out_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) ||
970 (c->out_info.dwFlags & MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES);
971 av_log(avctx, AV_LOG_VERBOSE, "out_info: size=%d, align=%d%s\n",
972 (int)c->out_info.cbSize, (int)c->out_info.cbAlignment,
973 c->out_stream_provides_samples ? " (provides samples)" : "");
974
975 if ((ret = mf_output_type_get(avctx)) < 0)
976 return ret;
977
978 return 0;
979}
980
981static int mf_unlock_async(AVCodecContext *avctx)
982{
983 MFContext *c = avctx->priv_data;
984 HRESULT hr;
985 IMFAttributes *attrs;
986 UINT32 v;
987 int res = AVERROR_EXTERNAL;
988
989 // For hw encoding we unfortunately need to use async mode, otherwise
990 // play it safe and avoid it.
991 if (!(c->is_video && c->opt_enc_hw))
992 return 0;
993
994 hr = IMFTransform_GetAttributes(c->mft, &attrs);
995 if (FAILED(hr)) {
996 av_log(avctx, AV_LOG_ERROR, "error retrieving MFT attributes: %s\n", ff_hr_str(hr));
997 goto err;
998 }
999
1000 hr = IMFAttributes_GetUINT32(attrs, &MF_TRANSFORM_ASYNC, &v);
1001 if (FAILED(hr)) {
1002 av_log(avctx, AV_LOG_ERROR, "error querying async: %s\n", ff_hr_str(hr));
1003 goto err;
1004 }
1005
1006 if (!v) {
1007 av_log(avctx, AV_LOG_ERROR, "hardware MFT is not async\n");
1008 goto err;
1009 }
1010
1011 hr = IMFAttributes_SetUINT32(attrs, &MF_TRANSFORM_ASYNC_UNLOCK, TRUE);
1012 if (FAILED(hr)) {
1013 av_log(avctx, AV_LOG_ERROR, "could not set async unlock: %s\n", ff_hr_str(hr));
1014 goto err;
1015 }
1016
1017 hr = IMFTransform_QueryInterface(c->mft, &IID_IMFMediaEventGenerator, (void **)&c->async_events);
1018 if (FAILED(hr)) {
1019 av_log(avctx, AV_LOG_ERROR, "could not get async interface\n");
1020 goto err;
1021 }
1022
1023 res = 0;
1024
1025err:
1026 IMFAttributes_Release(attrs);
1027 return res;
1028}
1029
Trystan Mata1cb601a2022-05-25 10:54:011030static int mf_create(void *log, MFFunctions *f, IMFTransform **mft,
1031 const AVCodec *codec, int use_hw)
wm4050b72a2017-04-04 05:45:411032{
1033 int is_audio = codec->type == AVMEDIA_TYPE_AUDIO;
1034 const CLSID *subtype = ff_codec_to_mf_subtype(codec->id);
1035 MFT_REGISTER_TYPE_INFO reg = {0};
1036 GUID category;
1037 int ret;
1038
1039 *mft = NULL;
1040
1041 if (!subtype)
1042 return AVERROR(ENOSYS);
1043
1044 reg.guidSubtype = *subtype;
1045
1046 if (is_audio) {
1047 reg.guidMajorType = MFMediaType_Audio;
1048 category = MFT_CATEGORY_AUDIO_ENCODER;
1049 } else {
1050 reg.guidMajorType = MFMediaType_Video;
1051 category = MFT_CATEGORY_VIDEO_ENCODER;
1052 }
1053
Trystan Mata1cb601a2022-05-25 10:54:011054 if ((ret = ff_instantiate_mf(log, f, category, NULL, &reg, use_hw, mft)) < 0)
wm4050b72a2017-04-04 05:45:411055 return ret;
1056
1057 return 0;
1058}
1059
Trystan Mata1cb601a2022-05-25 10:54:011060static int mf_init_encoder(AVCodecContext *avctx)
wm4050b72a2017-04-04 05:45:411061{
1062 MFContext *c = avctx->priv_data;
1063 HRESULT hr;
1064 int ret;
1065 const CLSID *subtype = ff_codec_to_mf_subtype(avctx->codec_id);
1066 int use_hw = 0;
1067
James Almer827d6fe2020-06-09 21:31:321068 c->frame = av_frame_alloc();
1069 if (!c->frame)
1070 return AVERROR(ENOMEM);
1071
wm4050b72a2017-04-04 05:45:411072 c->is_audio = avctx->codec_type == AVMEDIA_TYPE_AUDIO;
1073 c->is_video = !c->is_audio;
1074 c->reorder_delay = AV_NOPTS_VALUE;
1075
1076 if (c->is_video && c->opt_enc_hw)
1077 use_hw = 1;
1078
1079 if (!subtype)
1080 return AVERROR(ENOSYS);
1081
1082 c->main_subtype = *subtype;
1083
Trystan Mata1cb601a2022-05-25 10:54:011084 if ((ret = mf_create(avctx, &c->functions, &c->mft, avctx->codec, use_hw)) < 0)
wm4050b72a2017-04-04 05:45:411085 return ret;
1086
1087 if ((ret = mf_unlock_async(avctx)) < 0)
1088 return ret;
1089
1090 hr = IMFTransform_QueryInterface(c->mft, &IID_ICodecAPI, (void **)&c->codec_api);
1091 if (!FAILED(hr))
1092 av_log(avctx, AV_LOG_VERBOSE, "MFT supports ICodecAPI.\n");
1093
1094
1095 hr = IMFTransform_GetStreamIDs(c->mft, 1, &c->in_stream_id, 1, &c->out_stream_id);
1096 if (hr == E_NOTIMPL) {
1097 c->in_stream_id = c->out_stream_id = 0;
1098 } else if (FAILED(hr)) {
1099 av_log(avctx, AV_LOG_ERROR, "could not get stream IDs (%s)\n", ff_hr_str(hr));
1100 return AVERROR_EXTERNAL;
1101 }
1102
1103 if ((ret = mf_negotiate_types(avctx)) < 0)
1104 return ret;
1105
1106 if ((ret = mf_setup_context(avctx)) < 0)
1107 return ret;
1108
1109 hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
1110 if (FAILED(hr)) {
1111 av_log(avctx, AV_LOG_ERROR, "could not start streaming (%s)\n", ff_hr_str(hr));
1112 return AVERROR_EXTERNAL;
1113 }
1114
1115 hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0);
1116 if (FAILED(hr)) {
1117 av_log(avctx, AV_LOG_ERROR, "could not start stream (%s)\n", ff_hr_str(hr));
1118 return AVERROR_EXTERNAL;
1119 }
1120
1121 if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER && c->async_events &&
1122 c->is_video && !avctx->extradata) {
1123 int sleep = 10000, total = 0;
1124 av_log(avctx, AV_LOG_VERBOSE, "Awaiting extradata\n");
1125 while (total < 70*1000) {
1126 // The Qualcomm H264 encoder on SD835 doesn't provide extradata
1127 // immediately, but it becomes available soon after init (without
1128 // any waitable event). In practice, it's available after less
1129 // than 10 ms, but wait for up to 70 ms before giving up.
1130 // Some encoders (Qualcomm's HEVC encoder on SD835, some versions
1131 // of the QSV H264 encoder at least) don't provide extradata this
1132 // way at all, not even after encoding a frame - it's only
1133 // available prepended to frames.
1134 av_usleep(sleep);
1135 total += sleep;
1136 mf_output_type_get(avctx);
1137 if (avctx->extradata)
1138 break;
1139 sleep *= 2;
1140 }
1141 av_log(avctx, AV_LOG_VERBOSE, "%s extradata in %d ms\n",
1142 avctx->extradata ? "Got" : "Didn't get", total / 1000);
1143 }
1144
1145 return 0;
1146}
1147
Trystan Mata1cb601a2022-05-25 10:54:011148#if !HAVE_UWP
1149#define LOAD_MF_FUNCTION(context, func_name) \
Martin Storsjö9fba0b82022-05-25 21:31:051150 context->functions.func_name = (void *)dlsym(context->library, #func_name); \
Trystan Mata1cb601a2022-05-25 10:54:011151 if (!context->functions.func_name) { \
1152 av_log(context, AV_LOG_ERROR, "DLL mfplat.dll failed to find function "\
1153 #func_name "\n"); \
1154 return AVERROR_UNKNOWN; \
1155 }
1156#else
1157// In UWP (which lacks LoadLibrary), just link directly against
1158// the functions - this requires building with new/complete enough
1159// import libraries.
1160#define LOAD_MF_FUNCTION(context, func_name) \
1161 context->functions.func_name = func_name; \
1162 if (!context->functions.func_name) { \
1163 av_log(context, AV_LOG_ERROR, "Failed to find function " #func_name \
1164 "\n"); \
1165 return AVERROR_UNKNOWN; \
1166 }
1167#endif
1168
1169// Windows N editions does not provide MediaFoundation by default.
1170// So to avoid DLL loading error, MediaFoundation is dynamically loaded except
1171// on UWP build since LoadLibrary is not available on it.
1172static int mf_load_library(AVCodecContext *avctx)
1173{
1174 MFContext *c = avctx->priv_data;
1175
1176#if !HAVE_UWP
Martin Storsjö9fba0b82022-05-25 21:31:051177 c->library = dlopen("mfplat.dll", 0);
Trystan Mata1cb601a2022-05-25 10:54:011178
1179 if (!c->library) {
1180 av_log(c, AV_LOG_ERROR, "DLL mfplat.dll failed to open\n");
1181 return AVERROR_UNKNOWN;
1182 }
1183#endif
1184
1185 LOAD_MF_FUNCTION(c, MFStartup);
1186 LOAD_MF_FUNCTION(c, MFShutdown);
1187 LOAD_MF_FUNCTION(c, MFCreateAlignedMemoryBuffer);
1188 LOAD_MF_FUNCTION(c, MFCreateSample);
1189 LOAD_MF_FUNCTION(c, MFCreateMediaType);
1190 // MFTEnumEx is missing in Windows Vista's mfplat.dll.
1191 LOAD_MF_FUNCTION(c, MFTEnumEx);
1192
1193 return 0;
1194}
1195
wm4050b72a2017-04-04 05:45:411196static int mf_close(AVCodecContext *avctx)
1197{
1198 MFContext *c = avctx->priv_data;
1199
1200 if (c->codec_api)
1201 ICodecAPI_Release(c->codec_api);
1202
1203 if (c->async_events)
1204 IMFMediaEventGenerator_Release(c->async_events);
1205
Trystan Mata1cb601a2022-05-25 10:54:011206#if !HAVE_UWP
1207 if (c->library)
1208 ff_free_mf(&c->functions, &c->mft);
1209
Martin Storsjö9fba0b82022-05-25 21:31:051210 dlclose(c->library);
Trystan Mata1cb601a2022-05-25 10:54:011211 c->library = NULL;
1212#else
1213 ff_free_mf(&c->functions, &c->mft);
1214#endif
wm4050b72a2017-04-04 05:45:411215
James Almer827d6fe2020-06-09 21:31:321216 av_frame_free(&c->frame);
1217
wm4050b72a2017-04-04 05:45:411218 av_freep(&avctx->extradata);
1219 avctx->extradata_size = 0;
1220
1221 return 0;
1222}
1223
Andreas Rheinhardt5ad86d52025-03-06 17:28:571224static av_cold int mf_init(AVCodecContext *avctx)
Trystan Mata1cb601a2022-05-25 10:54:011225{
1226 int ret;
1227 if ((ret = mf_load_library(avctx)) == 0) {
1228 if ((ret = mf_init_encoder(avctx)) == 0) {
1229 return 0;
1230 }
1231 }
Trystan Mata1cb601a2022-05-25 10:54:011232 return ret;
1233}
1234
wm4050b72a2017-04-04 05:45:411235#define OFFSET(x) offsetof(MFContext, x)
1236
Mark Samuelsonc52869f2024-03-28 12:34:301237#define MF_ENCODER(MEDIATYPE, NAME, ID, OPTS, FMTS, CAPS, DEFAULTS) \
wm4050b72a2017-04-04 05:45:411238 static const AVClass ff_ ## NAME ## _mf_encoder_class = { \
1239 .class_name = #NAME "_mf", \
Anton Khirnov08bebeb2024-01-19 12:33:281240 .item_name = av_default_item_name, \
wm4050b72a2017-04-04 05:45:411241 .option = OPTS, \
1242 .version = LIBAVUTIL_VERSION_INT, \
1243 }; \
Andreas Rheinhardt20f97272022-03-16 20:09:541244 const FFCodec ff_ ## NAME ## _mf_encoder = { \
1245 .p.priv_class = &ff_ ## NAME ## _mf_encoder_class, \
1246 .p.name = #NAME "_mf", \
Andreas Rheinhardt48286d42022-08-29 11:38:021247 CODEC_LONG_NAME(#ID " via MediaFoundation"), \
Andreas Rheinhardt20f97272022-03-16 20:09:541248 .p.type = AVMEDIA_TYPE_ ## MEDIATYPE, \
1249 .p.id = AV_CODEC_ID_ ## ID, \
wm4050b72a2017-04-04 05:45:411250 .priv_data_size = sizeof(MFContext), \
1251 .init = mf_init, \
1252 .close = mf_close, \
Andreas Rheinhardt4243da42022-03-30 21:28:241253 FF_CODEC_RECEIVE_PACKET_CB(mf_receive_packet), \
Gyan Doshi56419422022-06-17 05:42:051254 FMTS \
1255 CAPS \
Andreas Rheinhardt21b23ce2022-07-09 22:05:451256 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, \
Mark Samuelsonc52869f2024-03-28 12:34:301257 .defaults = DEFAULTS, \
wm4050b72a2017-04-04 05:45:411258 };
1259
1260#define AFMTS \
Andreas Rheinhardt0971fcf2025-03-07 00:19:271261 CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_S16),
Gyan Doshi56419422022-06-17 05:42:051262#define ACAPS \
1263 .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID | \
1264 AV_CODEC_CAP_DR1 | AV_CODEC_CAP_VARIABLE_FRAME_SIZE,
wm4050b72a2017-04-04 05:45:411265
Mark Samuelsonc52869f2024-03-28 12:34:301266MF_ENCODER(AUDIO, aac, AAC, NULL, AFMTS, ACAPS, NULL);
1267MF_ENCODER(AUDIO, ac3, AC3, NULL, AFMTS, ACAPS, NULL);
1268MF_ENCODER(AUDIO, mp3, MP3, NULL, AFMTS, ACAPS, NULL);
wm4050b72a2017-04-04 05:45:411269
1270#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1271static const AVOption venc_opts[] = {
Anton Khirnov1e7d2002024-02-11 14:41:051272 {"rate_control", "Select rate control mode", OFFSET(opt_enc_rc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, .unit = "rate_control"},
1273 { "default", "Default mode", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, .unit = "rate_control"},
1274 { "cbr", "CBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_CBR}, 0, 0, VE, .unit = "rate_control"},
1275 { "pc_vbr", "Peak constrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_PeakConstrainedVBR}, 0, 0, VE, .unit = "rate_control"},
1276 { "u_vbr", "Unconstrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_UnconstrainedVBR}, 0, 0, VE, .unit = "rate_control"},
1277 { "quality", "Quality mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_Quality}, 0, 0, VE, .unit = "rate_control" },
wm4050b72a2017-04-04 05:45:411278 // The following rate_control modes require Windows 8.
Anton Khirnov1e7d2002024-02-11 14:41:051279 { "ld_vbr", "Low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_LowDelayVBR}, 0, 0, VE, .unit = "rate_control"},
1280 { "g_vbr", "Global VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalVBR}, 0, 0, VE, .unit = "rate_control" },
1281 { "gld_vbr", "Global low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalLowDelayVBR}, 0, 0, VE, .unit = "rate_control"},
wm4050b72a2017-04-04 05:45:411282
Anton Khirnov1e7d2002024-02-11 14:41:051283 {"scenario", "Select usage scenario", OFFSET(opt_enc_scenario), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, .unit = "scenario"},
1284 { "default", "Default scenario", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, .unit = "scenario"},
1285 { "display_remoting", "Display remoting", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemoting}, 0, 0, VE, .unit = "scenario"},
1286 { "video_conference", "Video conference", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_VideoConference}, 0, 0, VE, .unit = "scenario"},
1287 { "archive", "Archive", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_Archive}, 0, 0, VE, .unit = "scenario"},
1288 { "live_streaming", "Live streaming", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_LiveStreaming}, 0, 0, VE, .unit = "scenario"},
1289 { "camera_record", "Camera record", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_CameraRecord}, 0, 0, VE, .unit = "scenario"},
1290 { "display_remoting_with_feature_map", "Display remoting with feature map", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemotingWithFeatureMap}, 0, 0, VE, .unit = "scenario"},
wm4050b72a2017-04-04 05:45:411291
1292 {"quality", "Quality", OFFSET(opt_enc_quality), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 100, VE},
1293 {"hw_encoding", "Force hardware encoding", OFFSET(opt_enc_hw), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, VE},
1294 {NULL}
1295};
1296
Mark Samuelsonc52869f2024-03-28 12:34:301297static const FFCodecDefault defaults[] = {
1298 { "g", "0" },
1299 { NULL },
1300};
1301
wm4050b72a2017-04-04 05:45:411302#define VFMTS \
Andreas Rheinhardt0971fcf2025-03-07 00:19:271303 CODEC_PIXFMTS(AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P),
Gyan Doshi56419422022-06-17 05:42:051304#define VCAPS \
1305 .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID | \
1306 AV_CODEC_CAP_DR1,
wm4050b72a2017-04-04 05:45:411307
Mark Samuelsonc52869f2024-03-28 12:34:301308MF_ENCODER(VIDEO, h264, H264, venc_opts, VFMTS, VCAPS, defaults);
1309MF_ENCODER(VIDEO, hevc, HEVC, venc_opts, VFMTS, VCAPS, defaults);
Min Chen37842f22024-10-04 17:34:041310MF_ENCODER(VIDEO, av1, AV1, venc_opts, VFMTS, VCAPS, defaults);