blob: 1e94a486e9e4ef018f01fd0a3290bd8188266181 [file] [log] [blame]
Paul B Mahol7ef9d312023-06-27 17:54:251/*
2 * OSQ audio decoder
3 * Copyright (c) 2023 Paul B Mahol
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "libavutil/internal.h"
23#include "libavutil/intreadwrite.h"
Andreas Rheinhardt790f7932024-03-25 00:30:3724#include "libavutil/mem.h"
Paul B Mahol7ef9d312023-06-27 17:54:2525#include "avcodec.h"
26#include "codec_internal.h"
27#include "decode.h"
28#include "internal.h"
29#define BITSTREAM_READER_LE
30#include "get_bits.h"
31#include "unary.h"
32
33#define OFFSET 5
34
35typedef struct OSQChannel {
36 unsigned prediction;
37 unsigned coding_mode;
38 unsigned residue_parameter;
39 unsigned residue_bits;
40 unsigned history[3];
41 unsigned pos, count;
42 double sum;
43 int32_t prev;
44} OSQChannel;
45
46typedef struct OSQContext {
47 GetBitContext gb;
48 OSQChannel ch[2];
49
50 uint8_t *bitstream;
51 size_t max_framesize;
52 size_t bitstream_size;
53
Paul B Maholc4ab17a2023-09-04 12:13:4554 int factor;
Paul B Mahol7ef9d312023-06-27 17:54:2555 int decorrelate;
56 int frame_samples;
Paul B Mahol87b8c102023-09-04 12:05:4457 uint64_t nb_samples;
Paul B Mahol7ef9d312023-06-27 17:54:2558
59 int32_t *decode_buffer[2];
60
61 AVPacket *pkt;
62 int pkt_offset;
63} OSQContext;
64
Michael Niedermayerc75fccd2023-09-20 21:53:2165static void osq_flush(AVCodecContext *avctx)
66{
67 OSQContext *s = avctx->priv_data;
68
69 s->bitstream_size = 0;
70 s->pkt_offset = 0;
71}
72
Paul B Mahol7ef9d312023-06-27 17:54:2573static av_cold int osq_close(AVCodecContext *avctx)
74{
75 OSQContext *s = avctx->priv_data;
76
77 av_freep(&s->bitstream);
78 s->bitstream_size = 0;
79
80 for (int ch = 0; ch < FF_ARRAY_ELEMS(s->decode_buffer); ch++)
81 av_freep(&s->decode_buffer[ch]);
82
83 return 0;
84}
85
86static av_cold int osq_init(AVCodecContext *avctx)
87{
88 OSQContext *s = avctx->priv_data;
89
90 if (avctx->extradata_size < 48)
91 return AVERROR(EINVAL);
92
93 if (avctx->extradata[0] != 1) {
94 av_log(avctx, AV_LOG_ERROR, "Unsupported version.\n");
95 return AVERROR_INVALIDDATA;
96 }
97
98 avctx->sample_rate = AV_RL32(avctx->extradata + 4);
99 if (avctx->sample_rate < 1)
100 return AVERROR_INVALIDDATA;
101
102 av_channel_layout_uninit(&avctx->ch_layout);
103 avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
104 avctx->ch_layout.nb_channels = avctx->extradata[3];
105 if (avctx->ch_layout.nb_channels < 1)
106 return AVERROR_INVALIDDATA;
107 if (avctx->ch_layout.nb_channels > FF_ARRAY_ELEMS(s->decode_buffer))
108 return AVERROR_INVALIDDATA;
109
Paul B Maholc4ab17a2023-09-04 12:13:45110 s->factor = 1;
Paul B Mahol7ef9d312023-06-27 17:54:25111 switch (avctx->extradata[2]) {
112 case 8: avctx->sample_fmt = AV_SAMPLE_FMT_U8P; break;
113 case 16: avctx->sample_fmt = AV_SAMPLE_FMT_S16P; break;
114 case 20:
Paul B Mahol8cb2c6a2023-09-04 22:03:18115 case 24: s->factor = 256;
Paul B Maholc4ab17a2023-09-04 12:13:45116 avctx->sample_fmt = AV_SAMPLE_FMT_S32P; break;
Paul B Mahol7ef9d312023-06-27 17:54:25117 default: return AVERROR_INVALIDDATA;
118 }
119
Paul B Maholc4ab17a2023-09-04 12:13:45120 avctx->bits_per_raw_sample = avctx->extradata[2];
Paul B Mahol7ef9d312023-06-27 17:54:25121 s->nb_samples = AV_RL64(avctx->extradata + 16);
122 s->frame_samples = AV_RL16(avctx->extradata + 8);
123 s->max_framesize = (s->frame_samples * 16 + 1024) * avctx->ch_layout.nb_channels;
124
125 s->bitstream = av_calloc(s->max_framesize + AV_INPUT_BUFFER_PADDING_SIZE, sizeof(*s->bitstream));
126 if (!s->bitstream)
127 return AVERROR(ENOMEM);
128
129 for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) {
130 s->decode_buffer[ch] = av_calloc(s->frame_samples + OFFSET,
131 sizeof(*s->decode_buffer[ch]));
132 if (!s->decode_buffer[ch])
133 return AVERROR(ENOMEM);
134 }
135
136 s->pkt = avctx->internal->in_pkt;
137
138 return 0;
139}
140
141static void reset_stats(OSQChannel *cb)
142{
143 memset(cb->history, 0, sizeof(cb->history));
144 cb->pos = cb->count = cb->sum = 0;
145}
146
147static void update_stats(OSQChannel *cb, int val)
148{
149 cb->sum += FFABS(val) - cb->history[cb->pos];
150 cb->history[cb->pos] = FFABS(val);
151 cb->pos++;
152 cb->count++;
153 if (cb->pos >= FF_ARRAY_ELEMS(cb->history))
154 cb->pos = 0;
155}
156
157static int update_residue_parameter(OSQChannel *cb)
158{
159 double sum, x;
160 int rice_k;
161
162 sum = cb->sum;
Michael Niedermayerad35eaf2024-09-19 17:43:03163 if (!sum)
164 return 0;
Paul B Mahol7ef9d312023-06-27 17:54:25165 x = sum / cb->count;
Michael Niedermayer56c334d2023-09-14 22:49:41166 rice_k = ceil(log2(x));
Paul B Mahol7ef9d312023-06-27 17:54:25167 if (rice_k >= 30) {
Michael Niedermayer56c334d2023-09-14 22:49:41168 double f = floor(sum / 1.4426952 + 0.5);
169 if (f <= 1) {
Paul B Mahol7ef9d312023-06-27 17:54:25170 rice_k = 1;
Michael Niedermayer56c334d2023-09-14 22:49:41171 } else if (f >= 31) {
172 rice_k = 31;
173 } else
174 rice_k = f;
Paul B Mahol7ef9d312023-06-27 17:54:25175 }
176
177 return rice_k;
178}
179
180static uint32_t get_urice(GetBitContext *gb, int k)
181{
182 uint32_t z, x, b;
183
184 x = get_unary(gb, 1, 512);
185 b = get_bits_long(gb, k);
186 z = b | x << k;
187
188 return z;
189}
190
191static int32_t get_srice(GetBitContext *gb, int x)
192{
Michael Niedermayerc6a889f2025-02-04 02:58:45193 uint32_t y = get_urice(gb, x);
Paul B Mahol7ef9d312023-06-27 17:54:25194 return get_bits1(gb) ? -y : y;
195}
196
197static int osq_channel_parameters(AVCodecContext *avctx, int ch)
198{
199 OSQContext *s = avctx->priv_data;
200 OSQChannel *cb = &s->ch[ch];
201 GetBitContext *gb = &s->gb;
202
203 cb->prev = 0;
204 cb->prediction = get_urice(gb, 5);
205 cb->coding_mode = get_urice(gb, 3);
206 if (cb->prediction >= 15)
207 return AVERROR_INVALIDDATA;
208 if (cb->coding_mode > 0 && cb->coding_mode < 3) {
209 cb->residue_parameter = get_urice(gb, 4);
210 if (!cb->residue_parameter || cb->residue_parameter >= 31)
211 return AVERROR_INVALIDDATA;
212 } else if (cb->coding_mode == 3) {
213 cb->residue_bits = get_urice(gb, 4);
214 if (!cb->residue_bits || cb->residue_bits >= 31)
215 return AVERROR_INVALIDDATA;
216 } else if (cb->coding_mode) {
217 return AVERROR_INVALIDDATA;
218 }
219
220 if (cb->coding_mode == 2)
221 reset_stats(cb);
222
223 return 0;
224}
225
226#define A (-1)
227#define B (-2)
228#define C (-3)
229#define D (-4)
230#define E (-5)
Michael Niedermayerb54c9a92023-12-25 23:33:02231#define P2 (((unsigned)dst[A] + dst[A]) - dst[B])
232#define P3 (((unsigned)dst[A] - dst[B]) * 3 + dst[C])
Paul B Mahol7ef9d312023-06-27 17:54:25233
234static int do_decode(AVCodecContext *avctx, AVFrame *frame, int decorrelate, int downsample)
235{
236 OSQContext *s = avctx->priv_data;
237 const int nb_channels = avctx->ch_layout.nb_channels;
238 const int nb_samples = frame->nb_samples;
239 GetBitContext *gb = &s->gb;
240
241 for (int n = 0; n < nb_samples; n++) {
242 for (int ch = 0; ch < nb_channels; ch++) {
243 OSQChannel *cb = &s->ch[ch];
244 int32_t *dst = s->decode_buffer[ch] + OFFSET;
245 int32_t p, prev = cb->prev;
246
247 if (nb_channels == 2 && ch == 1 && decorrelate != s->decorrelate) {
248 if (!decorrelate) {
249 s->decode_buffer[1][OFFSET+A] += s->decode_buffer[0][OFFSET+B];
250 s->decode_buffer[1][OFFSET+B] += s->decode_buffer[0][OFFSET+C];
251 s->decode_buffer[1][OFFSET+C] += s->decode_buffer[0][OFFSET+D];
252 s->decode_buffer[1][OFFSET+D] += s->decode_buffer[0][OFFSET+E];
253 } else {
254 s->decode_buffer[1][OFFSET+A] -= s->decode_buffer[0][OFFSET+B];
255 s->decode_buffer[1][OFFSET+B] -= s->decode_buffer[0][OFFSET+C];
256 s->decode_buffer[1][OFFSET+C] -= s->decode_buffer[0][OFFSET+D];
257 s->decode_buffer[1][OFFSET+D] -= s->decode_buffer[0][OFFSET+E];
258 }
259 s->decorrelate = decorrelate;
260 }
261
262 if (!cb->coding_mode) {
263 dst[n] = 0;
264 } else if (cb->coding_mode == 3) {
265 dst[n] = get_sbits_long(gb, cb->residue_bits);
266 } else {
267 dst[n] = get_srice(gb, cb->residue_parameter);
268 }
269
270 if (get_bits_left(gb) < 0) {
271 av_log(avctx, AV_LOG_ERROR, "overread!\n");
272 return AVERROR_INVALIDDATA;
273 }
274
275 p = prev / 2;
276 prev = dst[n];
277
278 switch (cb->prediction) {
279 case 0:
280 break;
281 case 1:
Michael Niedermayerb54c9a92023-12-25 23:33:02282 dst[n] += (unsigned)dst[A];
Paul B Mahol7ef9d312023-06-27 17:54:25283 break;
284 case 2:
Michael Niedermayerb54c9a92023-12-25 23:33:02285 dst[n] += (unsigned)dst[A] + p;
Paul B Mahol7ef9d312023-06-27 17:54:25286 break;
287 case 3:
288 dst[n] += P2;
289 break;
290 case 4:
291 dst[n] += P2 + p;
292 break;
293 case 5:
294 dst[n] += P3;
295 break;
296 case 6:
297 dst[n] += P3 + p;
298 break;
299 case 7:
Michael Niedermayerb54c9a92023-12-25 23:33:02300 dst[n] += (int)(P2 + P3) / 2 + (unsigned)p;
Paul B Mahol7ef9d312023-06-27 17:54:25301 break;
302 case 8:
Michael Niedermayer0f511b42024-12-01 02:31:56303 dst[n] += (int)(P2 + P3) / 2 + 0U;
Paul B Mahol7ef9d312023-06-27 17:54:25304 break;
305 case 9:
Michael Niedermayerb54c9a92023-12-25 23:33:02306 dst[n] += (int)(P2 * 2 + P3) / 3 + (unsigned)p;
Paul B Mahol7ef9d312023-06-27 17:54:25307 break;
308 case 10:
Michael Niedermayerb54c9a92023-12-25 23:33:02309 dst[n] += (int)(P2 + P3 * 2) / 3 + (unsigned)p;
Paul B Mahol7ef9d312023-06-27 17:54:25310 break;
311 case 11:
Michael Niedermayer0f511b42024-12-01 02:31:56312 dst[n] += (int)((unsigned)dst[A] + dst[B]) / 2 + 0U;
Paul B Mahol7ef9d312023-06-27 17:54:25313 break;
314 case 12:
Michael Niedermayerb54c9a92023-12-25 23:33:02315 dst[n] += (unsigned)dst[B];
Paul B Mahol7ef9d312023-06-27 17:54:25316 break;
317 case 13:
Michael Niedermayer0f511b42024-12-01 02:31:56318 dst[n] += (int)((unsigned)dst[D] + dst[B]) / 2 + 0U;
Paul B Mahol7ef9d312023-06-27 17:54:25319 break;
320 case 14:
Michael Niedermayerb54c9a92023-12-25 23:33:02321 dst[n] += (int)((unsigned)P2 + dst[A]) / 2 + (unsigned)p;
Paul B Mahol7ef9d312023-06-27 17:54:25322 break;
323 default:
324 return AVERROR_INVALIDDATA;
325 }
326
327 cb->prev = prev;
328
329 if (downsample)
Michael Niedermayered34b0c2024-06-21 19:35:48330 dst[n] *= 256U;
Paul B Mahol7ef9d312023-06-27 17:54:25331
332 dst[E] = dst[D];
333 dst[D] = dst[C];
334 dst[C] = dst[B];
335 dst[B] = dst[A];
336 dst[A] = dst[n];
337
338 if (cb->coding_mode == 2) {
339 update_stats(cb, dst[n]);
340 cb->residue_parameter = update_residue_parameter(cb);
341 }
342
343 if (nb_channels == 2 && ch == 1) {
344 if (decorrelate)
Michael Niedermayere9f588a2024-09-19 20:03:37345 dst[n] += (unsigned)s->decode_buffer[0][OFFSET+n];
Paul B Mahol7ef9d312023-06-27 17:54:25346 }
347
348 if (downsample)
349 dst[A] /= 256;
350 }
351 }
352
353 return 0;
354}
355
356static int osq_decode_block(AVCodecContext *avctx, AVFrame *frame)
357{
358 const int nb_channels = avctx->ch_layout.nb_channels;
Paul B Maholc4ab17a2023-09-04 12:13:45359 const int nb_samples = frame->nb_samples;
Paul B Mahol7ef9d312023-06-27 17:54:25360 OSQContext *s = avctx->priv_data;
Michael Niedermayer6420c1b2023-09-14 22:49:41361 const unsigned factor = s->factor;
Paul B Mahol7ef9d312023-06-27 17:54:25362 int ret, decorrelate, downsample;
363 GetBitContext *gb = &s->gb;
364
365 skip_bits1(gb);
366 decorrelate = get_bits1(gb);
367 downsample = get_bits1(gb);
368
369 for (int ch = 0; ch < nb_channels; ch++) {
370 if ((ret = osq_channel_parameters(avctx, ch)) < 0) {
371 av_log(avctx, AV_LOG_ERROR, "invalid channel parameters\n");
372 return ret;
373 }
374 }
375
376 if ((ret = do_decode(avctx, frame, decorrelate, downsample)) < 0)
377 return ret;
378
379 align_get_bits(gb);
380
381 switch (avctx->sample_fmt) {
382 case AV_SAMPLE_FMT_U8P:
383 for (int ch = 0; ch < nb_channels; ch++) {
384 uint8_t *dst = (uint8_t *)frame->extended_data[ch];
385 int32_t *src = s->decode_buffer[ch] + OFFSET;
386
Paul B Maholc4ab17a2023-09-04 12:13:45387 for (int n = 0; n < nb_samples; n++)
Paul B Mahol7ef9d312023-06-27 17:54:25388 dst[n] = av_clip_uint8(src[n] + 0x80);
389 }
390 break;
391 case AV_SAMPLE_FMT_S16P:
392 for (int ch = 0; ch < nb_channels; ch++) {
393 int16_t *dst = (int16_t *)frame->extended_data[ch];
394 int32_t *src = s->decode_buffer[ch] + OFFSET;
395
Paul B Maholc4ab17a2023-09-04 12:13:45396 for (int n = 0; n < nb_samples; n++)
Paul B Mahol7ef9d312023-06-27 17:54:25397 dst[n] = (int16_t)src[n];
398 }
399 break;
400 case AV_SAMPLE_FMT_S32P:
401 for (int ch = 0; ch < nb_channels; ch++) {
402 int32_t *dst = (int32_t *)frame->extended_data[ch];
403 int32_t *src = s->decode_buffer[ch] + OFFSET;
404
Paul B Maholc4ab17a2023-09-04 12:13:45405 for (int n = 0; n < nb_samples; n++)
406 dst[n] = src[n] * factor;
Paul B Mahol7ef9d312023-06-27 17:54:25407 }
408 break;
409 default:
410 return AVERROR_BUG;
411 }
412
413 return 0;
414}
415
416static int osq_receive_frame(AVCodecContext *avctx, AVFrame *frame)
417{
418 OSQContext *s = avctx->priv_data;
419 GetBitContext *gb = &s->gb;
420 int ret, n;
421
422 while (s->bitstream_size < s->max_framesize) {
423 int size;
424
425 if (!s->pkt->data) {
426 ret = ff_decode_get_packet(avctx, s->pkt);
427 if (ret == AVERROR_EOF && s->bitstream_size > 0)
428 break;
Paul B Maholea063172023-09-21 18:25:37429 if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN))
Paul B Mahol7ef9d312023-06-27 17:54:25430 return ret;
Paul B Maholea063172023-09-21 18:25:37431 if (ret < 0)
432 goto fail;
Paul B Mahol7ef9d312023-06-27 17:54:25433 }
434
435 size = FFMIN(s->pkt->size - s->pkt_offset, s->max_framesize - s->bitstream_size);
436 memcpy(s->bitstream + s->bitstream_size, s->pkt->data + s->pkt_offset, size);
437 s->bitstream_size += size;
438 s->pkt_offset += size;
439
440 if (s->pkt_offset == s->pkt->size) {
441 av_packet_unref(s->pkt);
442 s->pkt_offset = 0;
443 }
444 }
445
446 frame->nb_samples = FFMIN(s->frame_samples, s->nb_samples);
447 if (frame->nb_samples <= 0)
448 return AVERROR_EOF;
449
450 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
451 goto fail;
452
453 if ((ret = init_get_bits8(gb, s->bitstream, s->bitstream_size)) < 0)
454 goto fail;
455
456 if ((ret = osq_decode_block(avctx, frame)) < 0)
457 goto fail;
458
459 s->nb_samples -= frame->nb_samples;
460
461 n = get_bits_count(gb) / 8;
462 if (n > s->bitstream_size) {
463 ret = AVERROR_INVALIDDATA;
464 goto fail;
465 }
466
467 memmove(s->bitstream, &s->bitstream[n], s->bitstream_size - n);
468 s->bitstream_size -= n;
469
470 return 0;
471
472fail:
473 s->bitstream_size = 0;
474 s->pkt_offset = 0;
475 av_packet_unref(s->pkt);
476
477 return ret;
478}
479
480const FFCodec ff_osq_decoder = {
481 .p.name = "osq",
482 CODEC_LONG_NAME("OSQ (Original Sound Quality)"),
483 .p.type = AVMEDIA_TYPE_AUDIO,
484 .p.id = AV_CODEC_ID_OSQ,
485 .priv_data_size = sizeof(OSQContext),
486 .init = osq_init,
487 FF_CODEC_RECEIVE_FRAME_CB(osq_receive_frame),
488 .close = osq_close,
489 .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF |
490 AV_CODEC_CAP_DR1,
491 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
Andreas Rheinhardt0971fcf2025-03-07 00:19:27492 CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_U8P, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S32P),
Michael Niedermayerc75fccd2023-09-20 21:53:21493 .flush = osq_flush,
Paul B Mahol7ef9d312023-06-27 17:54:25494};