blob: 47587cd8122ebaa2cae3142b5a170e47ae19e8d4 [file] [log] [blame]
Paul B Mahol7ef9d312023-06-27 17:54:251/*
2 * OSQ audio decoder
3 * Copyright (c) 2023 Paul B Mahol
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "libavutil/internal.h"
23#include "libavutil/intreadwrite.h"
24#include "avcodec.h"
25#include "codec_internal.h"
26#include "decode.h"
27#include "internal.h"
28#define BITSTREAM_READER_LE
29#include "get_bits.h"
30#include "unary.h"
31
32#define OFFSET 5
33
34typedef struct OSQChannel {
35 unsigned prediction;
36 unsigned coding_mode;
37 unsigned residue_parameter;
38 unsigned residue_bits;
39 unsigned history[3];
40 unsigned pos, count;
41 double sum;
42 int32_t prev;
43} OSQChannel;
44
45typedef struct OSQContext {
46 GetBitContext gb;
47 OSQChannel ch[2];
48
49 uint8_t *bitstream;
50 size_t max_framesize;
51 size_t bitstream_size;
52
53 int decorrelate;
54 int frame_samples;
55 int64_t nb_samples;
56
57 int32_t *decode_buffer[2];
58
59 AVPacket *pkt;
60 int pkt_offset;
61} OSQContext;
62
63static av_cold int osq_close(AVCodecContext *avctx)
64{
65 OSQContext *s = avctx->priv_data;
66
67 av_freep(&s->bitstream);
68 s->bitstream_size = 0;
69
70 for (int ch = 0; ch < FF_ARRAY_ELEMS(s->decode_buffer); ch++)
71 av_freep(&s->decode_buffer[ch]);
72
73 return 0;
74}
75
76static av_cold int osq_init(AVCodecContext *avctx)
77{
78 OSQContext *s = avctx->priv_data;
79
80 if (avctx->extradata_size < 48)
81 return AVERROR(EINVAL);
82
83 if (avctx->extradata[0] != 1) {
84 av_log(avctx, AV_LOG_ERROR, "Unsupported version.\n");
85 return AVERROR_INVALIDDATA;
86 }
87
88 avctx->sample_rate = AV_RL32(avctx->extradata + 4);
89 if (avctx->sample_rate < 1)
90 return AVERROR_INVALIDDATA;
91
92 av_channel_layout_uninit(&avctx->ch_layout);
93 avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
94 avctx->ch_layout.nb_channels = avctx->extradata[3];
95 if (avctx->ch_layout.nb_channels < 1)
96 return AVERROR_INVALIDDATA;
97 if (avctx->ch_layout.nb_channels > FF_ARRAY_ELEMS(s->decode_buffer))
98 return AVERROR_INVALIDDATA;
99
100 switch (avctx->extradata[2]) {
101 case 8: avctx->sample_fmt = AV_SAMPLE_FMT_U8P; break;
102 case 16: avctx->sample_fmt = AV_SAMPLE_FMT_S16P; break;
103 case 20:
104 case 24:
105 case 28:
106 case 32: avctx->sample_fmt = AV_SAMPLE_FMT_S32P; break;
107 default: return AVERROR_INVALIDDATA;
108 }
109
110 s->nb_samples = AV_RL64(avctx->extradata + 16);
111 s->frame_samples = AV_RL16(avctx->extradata + 8);
112 s->max_framesize = (s->frame_samples * 16 + 1024) * avctx->ch_layout.nb_channels;
113
114 s->bitstream = av_calloc(s->max_framesize + AV_INPUT_BUFFER_PADDING_SIZE, sizeof(*s->bitstream));
115 if (!s->bitstream)
116 return AVERROR(ENOMEM);
117
118 for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) {
119 s->decode_buffer[ch] = av_calloc(s->frame_samples + OFFSET,
120 sizeof(*s->decode_buffer[ch]));
121 if (!s->decode_buffer[ch])
122 return AVERROR(ENOMEM);
123 }
124
125 s->pkt = avctx->internal->in_pkt;
126
127 return 0;
128}
129
130static void reset_stats(OSQChannel *cb)
131{
132 memset(cb->history, 0, sizeof(cb->history));
133 cb->pos = cb->count = cb->sum = 0;
134}
135
136static void update_stats(OSQChannel *cb, int val)
137{
138 cb->sum += FFABS(val) - cb->history[cb->pos];
139 cb->history[cb->pos] = FFABS(val);
140 cb->pos++;
141 cb->count++;
142 if (cb->pos >= FF_ARRAY_ELEMS(cb->history))
143 cb->pos = 0;
144}
145
146static int update_residue_parameter(OSQChannel *cb)
147{
148 double sum, x;
149 int rice_k;
150
151 sum = cb->sum;
152 x = sum / cb->count;
153 rice_k = av_ceil_log2(x);
154 if (rice_k >= 30) {
155 rice_k = floor(sum / 1.4426952 + 0.5);
156 if (rice_k < 1)
157 rice_k = 1;
158 }
159
160 return rice_k;
161}
162
163static uint32_t get_urice(GetBitContext *gb, int k)
164{
165 uint32_t z, x, b;
166
167 x = get_unary(gb, 1, 512);
168 b = get_bits_long(gb, k);
169 z = b | x << k;
170
171 return z;
172}
173
174static int32_t get_srice(GetBitContext *gb, int x)
175{
176 int32_t y = get_urice(gb, x);
177 return get_bits1(gb) ? -y : y;
178}
179
180static int osq_channel_parameters(AVCodecContext *avctx, int ch)
181{
182 OSQContext *s = avctx->priv_data;
183 OSQChannel *cb = &s->ch[ch];
184 GetBitContext *gb = &s->gb;
185
186 cb->prev = 0;
187 cb->prediction = get_urice(gb, 5);
188 cb->coding_mode = get_urice(gb, 3);
189 if (cb->prediction >= 15)
190 return AVERROR_INVALIDDATA;
191 if (cb->coding_mode > 0 && cb->coding_mode < 3) {
192 cb->residue_parameter = get_urice(gb, 4);
193 if (!cb->residue_parameter || cb->residue_parameter >= 31)
194 return AVERROR_INVALIDDATA;
195 } else if (cb->coding_mode == 3) {
196 cb->residue_bits = get_urice(gb, 4);
197 if (!cb->residue_bits || cb->residue_bits >= 31)
198 return AVERROR_INVALIDDATA;
199 } else if (cb->coding_mode) {
200 return AVERROR_INVALIDDATA;
201 }
202
203 if (cb->coding_mode == 2)
204 reset_stats(cb);
205
206 return 0;
207}
208
209#define A (-1)
210#define B (-2)
211#define C (-3)
212#define D (-4)
213#define E (-5)
214#define P2 ((dst[A] + dst[A]) - dst[B])
215#define P3 ((dst[A] - dst[B]) * 3 + dst[C])
216
217static int do_decode(AVCodecContext *avctx, AVFrame *frame, int decorrelate, int downsample)
218{
219 OSQContext *s = avctx->priv_data;
220 const int nb_channels = avctx->ch_layout.nb_channels;
221 const int nb_samples = frame->nb_samples;
222 GetBitContext *gb = &s->gb;
223
224 for (int n = 0; n < nb_samples; n++) {
225 for (int ch = 0; ch < nb_channels; ch++) {
226 OSQChannel *cb = &s->ch[ch];
227 int32_t *dst = s->decode_buffer[ch] + OFFSET;
228 int32_t p, prev = cb->prev;
229
230 if (nb_channels == 2 && ch == 1 && decorrelate != s->decorrelate) {
231 if (!decorrelate) {
232 s->decode_buffer[1][OFFSET+A] += s->decode_buffer[0][OFFSET+B];
233 s->decode_buffer[1][OFFSET+B] += s->decode_buffer[0][OFFSET+C];
234 s->decode_buffer[1][OFFSET+C] += s->decode_buffer[0][OFFSET+D];
235 s->decode_buffer[1][OFFSET+D] += s->decode_buffer[0][OFFSET+E];
236 } else {
237 s->decode_buffer[1][OFFSET+A] -= s->decode_buffer[0][OFFSET+B];
238 s->decode_buffer[1][OFFSET+B] -= s->decode_buffer[0][OFFSET+C];
239 s->decode_buffer[1][OFFSET+C] -= s->decode_buffer[0][OFFSET+D];
240 s->decode_buffer[1][OFFSET+D] -= s->decode_buffer[0][OFFSET+E];
241 }
242 s->decorrelate = decorrelate;
243 }
244
245 if (!cb->coding_mode) {
246 dst[n] = 0;
247 } else if (cb->coding_mode == 3) {
248 dst[n] = get_sbits_long(gb, cb->residue_bits);
249 } else {
250 dst[n] = get_srice(gb, cb->residue_parameter);
251 }
252
253 if (get_bits_left(gb) < 0) {
254 av_log(avctx, AV_LOG_ERROR, "overread!\n");
255 return AVERROR_INVALIDDATA;
256 }
257
258 p = prev / 2;
259 prev = dst[n];
260
261 switch (cb->prediction) {
262 case 0:
263 break;
264 case 1:
265 dst[n] += dst[A];
266 break;
267 case 2:
268 dst[n] += dst[A] + p;
269 break;
270 case 3:
271 dst[n] += P2;
272 break;
273 case 4:
274 dst[n] += P2 + p;
275 break;
276 case 5:
277 dst[n] += P3;
278 break;
279 case 6:
280 dst[n] += P3 + p;
281 break;
282 case 7:
283 dst[n] += (P2 + P3) / 2 + p;
284 break;
285 case 8:
286 dst[n] += (P2 + P3) / 2;
287 break;
288 case 9:
289 dst[n] += (P2 * 2 + P3) / 3 + p;
290 break;
291 case 10:
292 dst[n] += (P2 + P3 * 2) / 3 + p;
293 break;
294 case 11:
295 dst[n] += (dst[A] + dst[B]) / 2;
296 break;
297 case 12:
298 dst[n] += dst[B];
299 break;
300 case 13:
301 dst[n] += (dst[D] + dst[B]) / 2;
302 break;
303 case 14:
304 dst[n] += (P2 + dst[A]) / 2 + p;
305 break;
306 default:
307 return AVERROR_INVALIDDATA;
308 }
309
310 cb->prev = prev;
311
312 if (downsample)
313 dst[n] *= 256;
314
315 dst[E] = dst[D];
316 dst[D] = dst[C];
317 dst[C] = dst[B];
318 dst[B] = dst[A];
319 dst[A] = dst[n];
320
321 if (cb->coding_mode == 2) {
322 update_stats(cb, dst[n]);
323 cb->residue_parameter = update_residue_parameter(cb);
324 }
325
326 if (nb_channels == 2 && ch == 1) {
327 if (decorrelate)
328 dst[n] += s->decode_buffer[0][OFFSET+n];
329 }
330
331 if (downsample)
332 dst[A] /= 256;
333 }
334 }
335
336 return 0;
337}
338
339static int osq_decode_block(AVCodecContext *avctx, AVFrame *frame)
340{
341 const int nb_channels = avctx->ch_layout.nb_channels;
342 OSQContext *s = avctx->priv_data;
343 int ret, decorrelate, downsample;
344 GetBitContext *gb = &s->gb;
345
346 skip_bits1(gb);
347 decorrelate = get_bits1(gb);
348 downsample = get_bits1(gb);
349
350 for (int ch = 0; ch < nb_channels; ch++) {
351 if ((ret = osq_channel_parameters(avctx, ch)) < 0) {
352 av_log(avctx, AV_LOG_ERROR, "invalid channel parameters\n");
353 return ret;
354 }
355 }
356
357 if ((ret = do_decode(avctx, frame, decorrelate, downsample)) < 0)
358 return ret;
359
360 align_get_bits(gb);
361
362 switch (avctx->sample_fmt) {
363 case AV_SAMPLE_FMT_U8P:
364 for (int ch = 0; ch < nb_channels; ch++) {
365 uint8_t *dst = (uint8_t *)frame->extended_data[ch];
366 int32_t *src = s->decode_buffer[ch] + OFFSET;
367
368 for (int n = 0; n < frame->nb_samples; n++)
369 dst[n] = av_clip_uint8(src[n] + 0x80);
370 }
371 break;
372 case AV_SAMPLE_FMT_S16P:
373 for (int ch = 0; ch < nb_channels; ch++) {
374 int16_t *dst = (int16_t *)frame->extended_data[ch];
375 int32_t *src = s->decode_buffer[ch] + OFFSET;
376
377 for (int n = 0; n < frame->nb_samples; n++)
378 dst[n] = (int16_t)src[n];
379 }
380 break;
381 case AV_SAMPLE_FMT_S32P:
382 for (int ch = 0; ch < nb_channels; ch++) {
383 int32_t *dst = (int32_t *)frame->extended_data[ch];
384 int32_t *src = s->decode_buffer[ch] + OFFSET;
385
386 for (int n = 0; n < frame->nb_samples; n++)
387 dst[n] = src[n];
388 }
389 break;
390 default:
391 return AVERROR_BUG;
392 }
393
394 return 0;
395}
396
397static int osq_receive_frame(AVCodecContext *avctx, AVFrame *frame)
398{
399 OSQContext *s = avctx->priv_data;
400 GetBitContext *gb = &s->gb;
401 int ret, n;
402
403 while (s->bitstream_size < s->max_framesize) {
404 int size;
405
406 if (!s->pkt->data) {
407 ret = ff_decode_get_packet(avctx, s->pkt);
408 if (ret == AVERROR_EOF && s->bitstream_size > 0)
409 break;
410 if (ret < 0)
411 return ret;
412 }
413
414 size = FFMIN(s->pkt->size - s->pkt_offset, s->max_framesize - s->bitstream_size);
415 memcpy(s->bitstream + s->bitstream_size, s->pkt->data + s->pkt_offset, size);
416 s->bitstream_size += size;
417 s->pkt_offset += size;
418
419 if (s->pkt_offset == s->pkt->size) {
420 av_packet_unref(s->pkt);
421 s->pkt_offset = 0;
422 }
423 }
424
425 frame->nb_samples = FFMIN(s->frame_samples, s->nb_samples);
426 if (frame->nb_samples <= 0)
427 return AVERROR_EOF;
428
429 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
430 goto fail;
431
432 if ((ret = init_get_bits8(gb, s->bitstream, s->bitstream_size)) < 0)
433 goto fail;
434
435 if ((ret = osq_decode_block(avctx, frame)) < 0)
436 goto fail;
437
438 s->nb_samples -= frame->nb_samples;
439
440 n = get_bits_count(gb) / 8;
441 if (n > s->bitstream_size) {
442 ret = AVERROR_INVALIDDATA;
443 goto fail;
444 }
445
446 memmove(s->bitstream, &s->bitstream[n], s->bitstream_size - n);
447 s->bitstream_size -= n;
448
449 return 0;
450
451fail:
452 s->bitstream_size = 0;
453 s->pkt_offset = 0;
454 av_packet_unref(s->pkt);
455
456 return ret;
457}
458
459const FFCodec ff_osq_decoder = {
460 .p.name = "osq",
461 CODEC_LONG_NAME("OSQ (Original Sound Quality)"),
462 .p.type = AVMEDIA_TYPE_AUDIO,
463 .p.id = AV_CODEC_ID_OSQ,
464 .priv_data_size = sizeof(OSQContext),
465 .init = osq_init,
466 FF_CODEC_RECEIVE_FRAME_CB(osq_receive_frame),
467 .close = osq_close,
468 .p.capabilities = AV_CODEC_CAP_CHANNEL_CONF |
469 AV_CODEC_CAP_DR1,
470 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
471 .p.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
472 AV_SAMPLE_FMT_S16P,
473 AV_SAMPLE_FMT_S32P,
474 AV_SAMPLE_FMT_NONE },
475};