blob: 5253afc6df587e7fc66f134b4c9f1e8edebefb94 [file] [log] [blame]
Arpi37e8dcd2001-12-09 12:39:541/*
Fabrice Bellardff4ec492002-05-25 22:45:332 * Simple IDCT
3 *
4 * Copyright (c) 2001 Michael Niedermayer <[email protected]>
5 *
Diego Biurrunb78e7192006-10-07 15:30:466 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
Fabrice Bellardff4ec492002-05-25 22:45:339 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
Diego Biurrunb78e7192006-10-07 15:30:4611 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellardff4ec492002-05-25 22:45:3312 *
Diego Biurrunb78e7192006-10-07 15:30:4613 * FFmpeg is distributed in the hope that it will be useful,
Fabrice Bellardff4ec492002-05-25 22:45:3314 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
Diego Biurrunb78e7192006-10-07 15:30:4619 * License along with FFmpeg; if not, write to the Free Software
Diego Biurrun5509bff2006-01-12 22:43:2620 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellardff4ec492002-05-25 22:45:3321 */
Diego Biurrun115329f2005-12-17 18:14:3822
Michael Niedermayer983e3242003-03-06 11:32:0423/**
Diego Biurrunba87f082010-04-20 14:45:3424 * @file
Michael Niedermayer983e3242003-03-06 11:32:0425 * simpleidct in C.
26 */
Diego Biurrun115329f2005-12-17 18:14:3827
Mans Rullgard3e9409b2011-07-18 15:04:2528#include "libavutil/intreadwrite.h"
Måns Rullgårdedf7c2b2008-12-27 16:25:3629#include "mathops.h"
Arpi37e8dcd2001-12-09 12:39:5430#include "simple_idct.h"
31
Kieran Kunhya699fa8f2017-12-27 01:08:3932#define IN_IDCT_DEPTH 16
33
Mans Rullgarde7a972e2011-07-20 15:05:0534#define BIT_DEPTH 8
35#include "simple_idct_template.c"
36#undef BIT_DEPTH
Michael Niedermayer49739712002-01-14 04:39:5937
Mans Rullgarde7a972e2011-07-20 15:05:0538#define BIT_DEPTH 10
39#include "simple_idct_template.c"
Mans Rullgarde7a972e2011-07-20 15:05:0540#undef BIT_DEPTH
Michael Niedermayer86748db2002-09-01 09:33:0541
Michael Niedermayerae57e822013-09-05 22:10:4542#define BIT_DEPTH 12
43#include "simple_idct_template.c"
44#undef BIT_DEPTH
Kieran Kunhya699fa8f2017-12-27 01:08:3945#undef IN_IDCT_DEPTH
46
47#define IN_IDCT_DEPTH 32
48#define BIT_DEPTH 10
49#include "simple_idct_template.c"
50#undef BIT_DEPTH
51#undef IN_IDCT_DEPTH
Michael Niedermayerae57e822013-09-05 22:10:4552
Fabrice Bellard9bf71512002-10-03 13:41:3353/* 2x4x8 idct */
54
55#define CN_SHIFT 12
56#define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5))
Fabrice Bellard652f0192002-10-03 21:03:5957#define C1 C_FIX(0.6532814824)
58#define C2 C_FIX(0.2705980501)
Fabrice Bellard9bf71512002-10-03 13:41:3359
Fabrice Bellard652f0192002-10-03 21:03:5960/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
61 and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
62#define C_SHIFT (4+1+12)
Fabrice Bellard9bf71512002-10-03 13:41:3363
Diego Biurrun2ec9fa52016-09-07 15:02:0664static inline void idct4col_put(uint8_t *dest, ptrdiff_t line_size, const int16_t *col)
Fabrice Bellard9bf71512002-10-03 13:41:3365{
66 int c0, c1, c2, c3, a0, a1, a2, a3;
Fabrice Bellard9bf71512002-10-03 13:41:3367
68 a0 = col[8*0];
69 a1 = col[8*2];
70 a2 = col[8*4];
71 a3 = col[8*6];
Michael Niedermayer28dc6e72017-02-21 02:14:4972 c0 = ((a0 + a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
73 c2 = ((a0 - a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
Fabrice Bellard9bf71512002-10-03 13:41:3374 c1 = a1 * C1 + a3 * C2;
75 c3 = a1 * C2 - a3 * C1;
Michael Niedermayer689f6512012-03-02 21:09:4476 dest[0] = av_clip_uint8((c0 + c1) >> C_SHIFT);
Fabrice Bellard9bf71512002-10-03 13:41:3377 dest += line_size;
Michael Niedermayer689f6512012-03-02 21:09:4478 dest[0] = av_clip_uint8((c2 + c3) >> C_SHIFT);
Fabrice Bellard9bf71512002-10-03 13:41:3379 dest += line_size;
Michael Niedermayer689f6512012-03-02 21:09:4480 dest[0] = av_clip_uint8((c2 - c3) >> C_SHIFT);
Fabrice Bellard9bf71512002-10-03 13:41:3381 dest += line_size;
Michael Niedermayer689f6512012-03-02 21:09:4482 dest[0] = av_clip_uint8((c0 - c1) >> C_SHIFT);
Fabrice Bellard9bf71512002-10-03 13:41:3383}
84
85#define BF(k) \
86{\
87 int a0, a1;\
88 a0 = ptr[k];\
89 a1 = ptr[8 + k];\
90 ptr[k] = a0 + a1;\
91 ptr[8 + k] = a0 - a1;\
92}
93
94/* only used by DV codec. The input must be interlaced. 128 is added
95 to the pixels before clamping to avoid systematic error
96 (1024*sqrt(2)) offset would be needed otherwise. */
97/* XXX: I think a 1.0/sqrt(2) normalization should be needed to
98 compensate the extra butterfly stage - I don't have the full DV
99 specification */
Diego Biurrun2ec9fa52016-09-07 15:02:06100void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Fabrice Bellard9bf71512002-10-03 13:41:33101{
102 int i;
Diego Biurrun88bd7fd2013-01-20 00:02:29103 int16_t *ptr;
Diego Biurrun115329f2005-12-17 18:14:38104
Fabrice Bellard9bf71512002-10-03 13:41:33105 /* butterfly */
106 ptr = block;
107 for(i=0;i<4;i++) {
108 BF(0);
109 BF(1);
110 BF(2);
111 BF(3);
112 BF(4);
113 BF(5);
114 BF(6);
115 BF(7);
116 ptr += 2 * 8;
117 }
118
119 /* IDCT8 on each line */
120 for(i=0; i<8; i++) {
Kieran Kunhya699fa8f2017-12-27 01:08:39121 idctRowCondDC_int16_8bit(block + i*8, 0);
Fabrice Bellard9bf71512002-10-03 13:41:33122 }
123
124 /* IDCT4 and store */
125 for(i=0;i<8;i++) {
Kostya Shishkovfda767b2007-12-03 06:59:48126 idct4col_put(dest + i, 2 * line_size, block + i);
127 idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i);
Fabrice Bellard9bf71512002-10-03 13:41:33128 }
129}
Michael Niedermayer1457ab52002-12-27 23:51:46130
131/* 8x4 & 4x8 WMV2 IDCT */
132#undef CN_SHIFT
133#undef C_SHIFT
134#undef C_FIX
135#undef C1
136#undef C2
137#define CN_SHIFT 12
Ganesh Ajjanagadde618b3ae2015-11-14 15:55:10138#define C_FIX(x) ((int)((x) * M_SQRT2 * (1 << CN_SHIFT) + 0.5))
Michael Niedermayer1457ab52002-12-27 23:51:46139#define C1 C_FIX(0.6532814824)
140#define C2 C_FIX(0.2705980501)
141#define C3 C_FIX(0.5)
142#define C_SHIFT (4+1+12)
Diego Biurrun2ec9fa52016-09-07 15:02:06143static inline void idct4col_add(uint8_t *dest, ptrdiff_t line_size, const int16_t *col)
Michael Niedermayer1457ab52002-12-27 23:51:46144{
145 int c0, c1, c2, c3, a0, a1, a2, a3;
Michael Niedermayer1457ab52002-12-27 23:51:46146
147 a0 = col[8*0];
148 a1 = col[8*1];
149 a2 = col[8*2];
150 a3 = col[8*3];
151 c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
152 c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
153 c1 = a1 * C1 + a3 * C2;
154 c3 = a1 * C2 - a3 * C1;
Ronald S. Bultjec23acba2012-03-06 00:01:19155 dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT));
Michael Niedermayer1457ab52002-12-27 23:51:46156 dest += line_size;
Ronald S. Bultjec23acba2012-03-06 00:01:19157 dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT));
Michael Niedermayer1457ab52002-12-27 23:51:46158 dest += line_size;
Ronald S. Bultjec23acba2012-03-06 00:01:19159 dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT));
Michael Niedermayer1457ab52002-12-27 23:51:46160 dest += line_size;
Ronald S. Bultjec23acba2012-03-06 00:01:19161 dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT));
Michael Niedermayer1457ab52002-12-27 23:51:46162}
163
164#define RN_SHIFT 15
Ganesh Ajjanagadde618b3ae2015-11-14 15:55:10165#define R_FIX(x) ((int)((x) * M_SQRT2 * (1 << RN_SHIFT) + 0.5))
Michael Niedermayer1457ab52002-12-27 23:51:46166#define R1 R_FIX(0.6532814824)
167#define R2 R_FIX(0.2705980501)
168#define R3 R_FIX(0.5)
169#define R_SHIFT 11
Diego Biurrun88bd7fd2013-01-20 00:02:29170static inline void idct4row(int16_t *row)
Michael Niedermayer1457ab52002-12-27 23:51:46171{
Michael Niedermayer57f7e5c2020-12-17 23:31:08172 unsigned c0, c1, c2, c3;
173 int a0, a1, a2, a3;
Michael Niedermayer1457ab52002-12-27 23:51:46174
175 a0 = row[0];
176 a1 = row[1];
177 a2 = row[2];
178 a3 = row[3];
179 c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
180 c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
181 c1 = a1 * R1 + a3 * R2;
182 c3 = a1 * R2 - a3 * R1;
183 row[0]= (c0 + c1) >> R_SHIFT;
184 row[1]= (c2 + c3) >> R_SHIFT;
185 row[2]= (c2 - c3) >> R_SHIFT;
186 row[3]= (c0 - c1) >> R_SHIFT;
187}
188
Diego Biurrun2ec9fa52016-09-07 15:02:06189void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Michael Niedermayer1457ab52002-12-27 23:51:46190{
191 int i;
192
193 /* IDCT8 on each line */
194 for(i=0; i<4; i++) {
Kieran Kunhya699fa8f2017-12-27 01:08:39195 idctRowCondDC_int16_8bit(block + i*8, 0);
Michael Niedermayer1457ab52002-12-27 23:51:46196 }
197
198 /* IDCT4 and store */
199 for(i=0;i<8;i++) {
200 idct4col_add(dest + i, line_size, block + i);
201 }
202}
203
Diego Biurrun2ec9fa52016-09-07 15:02:06204void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Michael Niedermayer1457ab52002-12-27 23:51:46205{
206 int i;
207
208 /* IDCT4 on each line */
209 for(i=0; i<8; i++) {
210 idct4row(block + i*8);
211 }
212
213 /* IDCT8 and store */
214 for(i=0; i<4; i++){
Kieran Kunhya699fa8f2017-12-27 01:08:39215 idctSparseColAdd_int16_8bit(dest + i, line_size, block + i);
Michael Niedermayer1457ab52002-12-27 23:51:46216 }
217}
218
Diego Biurrun2ec9fa52016-09-07 15:02:06219void ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Kostya Shishkov91823a62007-12-09 05:54:59220{
221 int i;
222
223 /* IDCT4 on each line */
224 for(i=0; i<4; i++) {
225 idct4row(block + i*8);
226 }
227
228 /* IDCT4 and store */
229 for(i=0; i<4; i++){
230 idct4col_add(dest + i, line_size, block + i);
231 }
232}