blob: 0ffbfcaf3399453c47405bd7b5aa0b428b1e6f18 [file] [log] [blame]
Arpi37e8dcd2001-12-09 12:39:541/*
Fabrice Bellardff4ec492002-05-25 22:45:332 * Simple IDCT
3 *
4 * Copyright (c) 2001 Michael Niedermayer <[email protected]>
5 *
Mans Rullgard2912e872011-03-18 17:35:106 * This file is part of Libav.
Diego Biurrunb78e7192006-10-07 15:30:467 *
Mans Rullgard2912e872011-03-18 17:35:108 * Libav is free software; you can redistribute it and/or
Fabrice Bellardff4ec492002-05-25 22:45:339 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
Diego Biurrunb78e7192006-10-07 15:30:4611 * version 2.1 of the License, or (at your option) any later version.
Fabrice Bellardff4ec492002-05-25 22:45:3312 *
Mans Rullgard2912e872011-03-18 17:35:1013 * Libav is distributed in the hope that it will be useful,
Fabrice Bellardff4ec492002-05-25 22:45:3314 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
Mans Rullgard2912e872011-03-18 17:35:1019 * License along with Libav; if not, write to the Free Software
Diego Biurrun5509bff2006-01-12 22:43:2620 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Fabrice Bellardff4ec492002-05-25 22:45:3321 */
Diego Biurrun115329f2005-12-17 18:14:3822
Michael Niedermayer983e3242003-03-06 11:32:0423/**
Diego Biurrunba87f082010-04-20 14:45:3424 * @file
Michael Niedermayer983e3242003-03-06 11:32:0425 * simpleidct in C.
26 */
Diego Biurrun115329f2005-12-17 18:14:3827
Arpi37e8dcd2001-12-09 12:39:5428/*
Fabrice Bellardff4ec492002-05-25 22:45:3329 based upon some outcommented c code from mpeg2dec (idct_mmx.c
Diego Biurrun115329f2005-12-17 18:14:3830 written by Aaron Holtzman <[email protected]>)
Fabrice Bellardff4ec492002-05-25 22:45:3331 */
Mans Rullgard3e9409b2011-07-18 15:04:2532
33#include "libavutil/intreadwrite.h"
Fabrice Bellard6000abf2002-05-18 23:03:2934#include "avcodec.h"
Fabrice Bellardd36a2462002-06-05 18:46:2535#include "dsputil.h"
Måns Rullgårdedf7c2b2008-12-27 16:25:3636#include "mathops.h"
Arpi37e8dcd2001-12-09 12:39:5437#include "simple_idct.h"
38
Arpi37e8dcd2001-12-09 12:39:5439#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
40#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
41#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
Michael Niedermayerccf589a2002-05-02 13:35:3342#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
Arpi37e8dcd2001-12-09 12:39:5443#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
44#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
45#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
46#define ROW_SHIFT 11
47#define COL_SHIFT 20 // 6
Michael Niedermayer49739712002-01-14 04:39:5948
Michael Niedermayer0e153842003-01-15 19:21:2149static inline void idctRowCondDC (DCTELEM * row)
Michael Niedermayer49739712002-01-14 04:39:5950{
Diego Biurrunbb270c02005-12-22 01:10:1151 int a0, a1, a2, a3, b0, b1, b2, b3;
Michael Niedermayer49739712002-01-14 04:39:5952
Aurelien Jacobsb250f9c2009-01-13 23:44:1653#if HAVE_FAST_64BIT
Mans Rullgard3e9409b2011-07-18 15:04:2554#define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
Mans Rullgarddbb5ee82011-07-18 14:23:2255 if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) {
Mans Rullgard3e9409b2011-07-18 15:04:2556 uint64_t temp = (row[0] << 3) & 0xffff;
Mans Rullgarddbb5ee82011-07-18 14:23:2257 temp += temp << 16;
58 temp += temp << 32;
59 ((uint64_t *)row)[0] = temp;
60 ((uint64_t *)row)[1] = temp;
61 return;
Michael Niedermayer0e153842003-01-15 19:21:2162 }
Fabrice Bellard412ba502002-06-05 14:25:0663#else
Mans Rullgarddbb5ee82011-07-18 14:23:2264 if (!(((uint32_t*)row)[1] |
65 ((uint32_t*)row)[2] |
66 ((uint32_t*)row)[3] |
67 row[1])) {
Mans Rullgard3e9409b2011-07-18 15:04:2568 uint32_t temp = (row[0] << 3) & 0xffff;
Mans Rullgarddbb5ee82011-07-18 14:23:2269 temp += temp << 16;
70 ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
Michael Niedermayer0e153842003-01-15 19:21:2171 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
Mans Rullgarddbb5ee82011-07-18 14:23:2272 return;
Michael Niedermayer0e153842003-01-15 19:21:2173 }
Fabrice Bellard412ba502002-06-05 14:25:0674#endif
Michael Niedermayer49739712002-01-14 04:39:5975
Fabrice Bellard412ba502002-06-05 14:25:0676 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
Diego Biurrunbb270c02005-12-22 01:10:1177 a1 = a0;
78 a2 = a0;
79 a3 = a0;
Michael Niedermayer49739712002-01-14 04:39:5980
Fabrice Bellard412ba502002-06-05 14:25:0681 /* no need to optimize : gcc does it */
82 a0 += W2 * row[2];
83 a1 += W6 * row[2];
84 a2 -= W6 * row[2];
85 a3 -= W2 * row[2];
Michael Niedermayer49739712002-01-14 04:39:5986
Måns Rullgårdedf7c2b2008-12-27 16:25:3687 b0 = MUL16(W1, row[1]);
Fabrice Bellard412ba502002-06-05 14:25:0688 MAC16(b0, W3, row[3]);
Måns Rullgårdedf7c2b2008-12-27 16:25:3689 b1 = MUL16(W3, row[1]);
Fabrice Bellard412ba502002-06-05 14:25:0690 MAC16(b1, -W7, row[3]);
Måns Rullgårdedf7c2b2008-12-27 16:25:3691 b2 = MUL16(W5, row[1]);
Fabrice Bellard412ba502002-06-05 14:25:0692 MAC16(b2, -W1, row[3]);
Måns Rullgårdedf7c2b2008-12-27 16:25:3693 b3 = MUL16(W7, row[1]);
Fabrice Bellard412ba502002-06-05 14:25:0694 MAC16(b3, -W5, row[3]);
95
Mans Rullgard3e9409b2011-07-18 15:04:2596 if (AV_RN64A(row + 4)) {
Fabrice Bellard412ba502002-06-05 14:25:0697 a0 += W4*row[4] + W6*row[6];
98 a1 += - W4*row[4] - W2*row[6];
99 a2 += - W4*row[4] + W2*row[6];
100 a3 += W4*row[4] - W6*row[6];
101
102 MAC16(b0, W5, row[5]);
103 MAC16(b0, W7, row[7]);
Diego Biurrun115329f2005-12-17 18:14:38104
Fabrice Bellard412ba502002-06-05 14:25:06105 MAC16(b1, -W1, row[5]);
106 MAC16(b1, -W5, row[7]);
Diego Biurrun115329f2005-12-17 18:14:38107
Fabrice Bellard412ba502002-06-05 14:25:06108 MAC16(b2, W7, row[5]);
109 MAC16(b2, W3, row[7]);
Diego Biurrun115329f2005-12-17 18:14:38110
Fabrice Bellard412ba502002-06-05 14:25:06111 MAC16(b3, W3, row[5]);
112 MAC16(b3, -W1, row[7]);
Diego Biurrunbb270c02005-12-22 01:10:11113 }
Michael Niedermayer49739712002-01-14 04:39:59114
Diego Biurrunbb270c02005-12-22 01:10:11115 row[0] = (a0 + b0) >> ROW_SHIFT;
116 row[7] = (a0 - b0) >> ROW_SHIFT;
117 row[1] = (a1 + b1) >> ROW_SHIFT;
118 row[6] = (a1 - b1) >> ROW_SHIFT;
119 row[2] = (a2 + b2) >> ROW_SHIFT;
120 row[5] = (a2 - b2) >> ROW_SHIFT;
121 row[3] = (a3 + b3) >> ROW_SHIFT;
122 row[4] = (a3 - b3) >> ROW_SHIFT;
Arpi37e8dcd2001-12-09 12:39:54123}
124
Diego Biurrun115329f2005-12-17 18:14:38125static inline void idctSparseColPut (uint8_t *dest, int line_size,
Michael Niedermayer0e153842003-01-15 19:21:21126 DCTELEM * col)
Arpi37e8dcd2001-12-09 12:39:54127{
Diego Biurrunbb270c02005-12-22 01:10:11128 int a0, a1, a2, a3, b0, b1, b2, b3;
Måns Rullgård55fde952006-11-12 20:08:09129 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
Michael Niedermayer49739712002-01-14 04:39:59130
Fabrice Bellard412ba502002-06-05 14:25:06131 /* XXX: I did that only to give same values as previous code */
Diego Biurrunbb270c02005-12-22 01:10:11132 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
133 a1 = a0;
134 a2 = a0;
135 a3 = a0;
Fabrice Bellard412ba502002-06-05 14:25:06136
137 a0 += + W2*col[8*2];
138 a1 += + W6*col[8*2];
139 a2 += - W6*col[8*2];
140 a3 += - W2*col[8*2];
141
Måns Rullgårdedf7c2b2008-12-27 16:25:36142 b0 = MUL16(W1, col[8*1]);
143 b1 = MUL16(W3, col[8*1]);
144 b2 = MUL16(W5, col[8*1]);
145 b3 = MUL16(W7, col[8*1]);
Fabrice Bellard412ba502002-06-05 14:25:06146
147 MAC16(b0, + W3, col[8*3]);
148 MAC16(b1, - W7, col[8*3]);
149 MAC16(b2, - W1, col[8*3]);
150 MAC16(b3, - W5, col[8*3]);
Arpi37e8dcd2001-12-09 12:39:54151
Diego Biurrunbb270c02005-12-22 01:10:11152 if(col[8*4]){
Fabrice Bellard412ba502002-06-05 14:25:06153 a0 += + W4*col[8*4];
154 a1 += - W4*col[8*4];
155 a2 += - W4*col[8*4];
156 a3 += + W4*col[8*4];
Diego Biurrunbb270c02005-12-22 01:10:11157 }
Fabrice Bellard412ba502002-06-05 14:25:06158
Diego Biurrunbb270c02005-12-22 01:10:11159 if (col[8*5]) {
Fabrice Bellard412ba502002-06-05 14:25:06160 MAC16(b0, + W5, col[8*5]);
161 MAC16(b1, - W1, col[8*5]);
162 MAC16(b2, + W7, col[8*5]);
163 MAC16(b3, + W3, col[8*5]);
Diego Biurrunbb270c02005-12-22 01:10:11164 }
Arpi37e8dcd2001-12-09 12:39:54165
Diego Biurrunbb270c02005-12-22 01:10:11166 if(col[8*6]){
Fabrice Bellard412ba502002-06-05 14:25:06167 a0 += + W6*col[8*6];
168 a1 += - W2*col[8*6];
169 a2 += + W2*col[8*6];
170 a3 += - W6*col[8*6];
Diego Biurrunbb270c02005-12-22 01:10:11171 }
Michael Niedermayer49739712002-01-14 04:39:59172
Diego Biurrunbb270c02005-12-22 01:10:11173 if (col[8*7]) {
Fabrice Bellard412ba502002-06-05 14:25:06174 MAC16(b0, + W7, col[8*7]);
175 MAC16(b1, - W5, col[8*7]);
176 MAC16(b2, + W3, col[8*7]);
177 MAC16(b3, - W1, col[8*7]);
Diego Biurrunbb270c02005-12-22 01:10:11178 }
Michael Niedermayer49739712002-01-14 04:39:59179
Fabrice Bellardd36a2462002-06-05 18:46:25180 dest[0] = cm[(a0 + b0) >> COL_SHIFT];
181 dest += line_size;
182 dest[0] = cm[(a1 + b1) >> COL_SHIFT];
183 dest += line_size;
184 dest[0] = cm[(a2 + b2) >> COL_SHIFT];
185 dest += line_size;
186 dest[0] = cm[(a3 + b3) >> COL_SHIFT];
187 dest += line_size;
188 dest[0] = cm[(a3 - b3) >> COL_SHIFT];
189 dest += line_size;
190 dest[0] = cm[(a2 - b2) >> COL_SHIFT];
191 dest += line_size;
192 dest[0] = cm[(a1 - b1) >> COL_SHIFT];
193 dest += line_size;
194 dest[0] = cm[(a0 - b0) >> COL_SHIFT];
195}
196
Diego Biurrun115329f2005-12-17 18:14:38197static inline void idctSparseColAdd (uint8_t *dest, int line_size,
Michael Niedermayer0e153842003-01-15 19:21:21198 DCTELEM * col)
Fabrice Bellardd36a2462002-06-05 18:46:25199{
Diego Biurrunbb270c02005-12-22 01:10:11200 int a0, a1, a2, a3, b0, b1, b2, b3;
Måns Rullgård55fde952006-11-12 20:08:09201 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
Fabrice Bellardd36a2462002-06-05 18:46:25202
203 /* XXX: I did that only to give same values as previous code */
Diego Biurrunbb270c02005-12-22 01:10:11204 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
205 a1 = a0;
206 a2 = a0;
207 a3 = a0;
Fabrice Bellardd36a2462002-06-05 18:46:25208
209 a0 += + W2*col[8*2];
210 a1 += + W6*col[8*2];
211 a2 += - W6*col[8*2];
212 a3 += - W2*col[8*2];
213
Måns Rullgårdedf7c2b2008-12-27 16:25:36214 b0 = MUL16(W1, col[8*1]);
215 b1 = MUL16(W3, col[8*1]);
216 b2 = MUL16(W5, col[8*1]);
217 b3 = MUL16(W7, col[8*1]);
Fabrice Bellardd36a2462002-06-05 18:46:25218
219 MAC16(b0, + W3, col[8*3]);
220 MAC16(b1, - W7, col[8*3]);
221 MAC16(b2, - W1, col[8*3]);
222 MAC16(b3, - W5, col[8*3]);
223
Diego Biurrunbb270c02005-12-22 01:10:11224 if(col[8*4]){
Fabrice Bellardd36a2462002-06-05 18:46:25225 a0 += + W4*col[8*4];
226 a1 += - W4*col[8*4];
227 a2 += - W4*col[8*4];
228 a3 += + W4*col[8*4];
Diego Biurrunbb270c02005-12-22 01:10:11229 }
Fabrice Bellardd36a2462002-06-05 18:46:25230
Diego Biurrunbb270c02005-12-22 01:10:11231 if (col[8*5]) {
Fabrice Bellardd36a2462002-06-05 18:46:25232 MAC16(b0, + W5, col[8*5]);
233 MAC16(b1, - W1, col[8*5]);
234 MAC16(b2, + W7, col[8*5]);
235 MAC16(b3, + W3, col[8*5]);
Diego Biurrunbb270c02005-12-22 01:10:11236 }
Fabrice Bellardd36a2462002-06-05 18:46:25237
Diego Biurrunbb270c02005-12-22 01:10:11238 if(col[8*6]){
Fabrice Bellardd36a2462002-06-05 18:46:25239 a0 += + W6*col[8*6];
240 a1 += - W2*col[8*6];
241 a2 += + W2*col[8*6];
242 a3 += - W6*col[8*6];
Diego Biurrunbb270c02005-12-22 01:10:11243 }
Fabrice Bellardd36a2462002-06-05 18:46:25244
Diego Biurrunbb270c02005-12-22 01:10:11245 if (col[8*7]) {
Fabrice Bellardd36a2462002-06-05 18:46:25246 MAC16(b0, + W7, col[8*7]);
247 MAC16(b1, - W5, col[8*7]);
248 MAC16(b2, + W3, col[8*7]);
249 MAC16(b3, - W1, col[8*7]);
Diego Biurrunbb270c02005-12-22 01:10:11250 }
Fabrice Bellardd36a2462002-06-05 18:46:25251
252 dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
253 dest += line_size;
254 dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)];
255 dest += line_size;
256 dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)];
257 dest += line_size;
258 dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)];
259 dest += line_size;
260 dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)];
261 dest += line_size;
262 dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)];
263 dest += line_size;
264 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];
265 dest += line_size;
266 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
Arpi37e8dcd2001-12-09 12:39:54267}
268
Michael Niedermayer0e153842003-01-15 19:21:21269static inline void idctSparseCol (DCTELEM * col)
Michael Niedermayer86748db2002-09-01 09:33:05270{
Diego Biurrunbb270c02005-12-22 01:10:11271 int a0, a1, a2, a3, b0, b1, b2, b3;
Michael Niedermayer86748db2002-09-01 09:33:05272
273 /* XXX: I did that only to give same values as previous code */
Diego Biurrunbb270c02005-12-22 01:10:11274 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
275 a1 = a0;
276 a2 = a0;
277 a3 = a0;
Michael Niedermayer86748db2002-09-01 09:33:05278
279 a0 += + W2*col[8*2];
280 a1 += + W6*col[8*2];
281 a2 += - W6*col[8*2];
282 a3 += - W2*col[8*2];
283
Måns Rullgårdedf7c2b2008-12-27 16:25:36284 b0 = MUL16(W1, col[8*1]);
285 b1 = MUL16(W3, col[8*1]);
286 b2 = MUL16(W5, col[8*1]);
287 b3 = MUL16(W7, col[8*1]);
Michael Niedermayer86748db2002-09-01 09:33:05288
289 MAC16(b0, + W3, col[8*3]);
290 MAC16(b1, - W7, col[8*3]);
291 MAC16(b2, - W1, col[8*3]);
292 MAC16(b3, - W5, col[8*3]);
293
Diego Biurrunbb270c02005-12-22 01:10:11294 if(col[8*4]){
Michael Niedermayer86748db2002-09-01 09:33:05295 a0 += + W4*col[8*4];
296 a1 += - W4*col[8*4];
297 a2 += - W4*col[8*4];
298 a3 += + W4*col[8*4];
Diego Biurrunbb270c02005-12-22 01:10:11299 }
Michael Niedermayer86748db2002-09-01 09:33:05300
Diego Biurrunbb270c02005-12-22 01:10:11301 if (col[8*5]) {
Michael Niedermayer86748db2002-09-01 09:33:05302 MAC16(b0, + W5, col[8*5]);
303 MAC16(b1, - W1, col[8*5]);
304 MAC16(b2, + W7, col[8*5]);
305 MAC16(b3, + W3, col[8*5]);
Diego Biurrunbb270c02005-12-22 01:10:11306 }
Michael Niedermayer86748db2002-09-01 09:33:05307
Diego Biurrunbb270c02005-12-22 01:10:11308 if(col[8*6]){
Michael Niedermayer86748db2002-09-01 09:33:05309 a0 += + W6*col[8*6];
310 a1 += - W2*col[8*6];
311 a2 += + W2*col[8*6];
312 a3 += - W6*col[8*6];
Diego Biurrunbb270c02005-12-22 01:10:11313 }
Michael Niedermayer86748db2002-09-01 09:33:05314
Diego Biurrunbb270c02005-12-22 01:10:11315 if (col[8*7]) {
Michael Niedermayer86748db2002-09-01 09:33:05316 MAC16(b0, + W7, col[8*7]);
317 MAC16(b1, - W5, col[8*7]);
318 MAC16(b2, + W3, col[8*7]);
319 MAC16(b3, - W1, col[8*7]);
Diego Biurrunbb270c02005-12-22 01:10:11320 }
Michael Niedermayer86748db2002-09-01 09:33:05321
322 col[0 ] = ((a0 + b0) >> COL_SHIFT);
323 col[8 ] = ((a1 + b1) >> COL_SHIFT);
324 col[16] = ((a2 + b2) >> COL_SHIFT);
325 col[24] = ((a3 + b3) >> COL_SHIFT);
326 col[32] = ((a3 - b3) >> COL_SHIFT);
327 col[40] = ((a2 - b2) >> COL_SHIFT);
328 col[48] = ((a1 - b1) >> COL_SHIFT);
329 col[56] = ((a0 - b0) >> COL_SHIFT);
330}
331
Aurelien Jacobs59e6f602007-12-08 21:21:11332void ff_simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
Fabrice Bellard412ba502002-06-05 14:25:06333{
334 int i;
335 for(i=0; i<8; i++)
336 idctRowCondDC(block + i*8);
Diego Biurrun115329f2005-12-17 18:14:38337
Fabrice Bellard412ba502002-06-05 14:25:06338 for(i=0; i<8; i++)
Fabrice Bellardd36a2462002-06-05 18:46:25339 idctSparseColPut(dest + i, line_size, block + i);
340}
341
Aurelien Jacobs59e6f602007-12-08 21:21:11342void ff_simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
Fabrice Bellardd36a2462002-06-05 18:46:25343{
344 int i;
345 for(i=0; i<8; i++)
346 idctRowCondDC(block + i*8);
Diego Biurrun115329f2005-12-17 18:14:38347
Fabrice Bellardd36a2462002-06-05 18:46:25348 for(i=0; i<8; i++)
349 idctSparseColAdd(dest + i, line_size, block + i);
Fabrice Bellard412ba502002-06-05 14:25:06350}
351
Aurelien Jacobs59e6f602007-12-08 21:21:11352void ff_simple_idct(DCTELEM *block)
Michael Niedermayer86748db2002-09-01 09:33:05353{
354 int i;
355 for(i=0; i<8; i++)
356 idctRowCondDC(block + i*8);
Diego Biurrun115329f2005-12-17 18:14:38357
Michael Niedermayer86748db2002-09-01 09:33:05358 for(i=0; i<8; i++)
359 idctSparseCol(block + i);
360}
361
Fabrice Bellard9bf71512002-10-03 13:41:33362/* 2x4x8 idct */
363
364#define CN_SHIFT 12
365#define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5))
Fabrice Bellard652f0192002-10-03 21:03:59366#define C1 C_FIX(0.6532814824)
367#define C2 C_FIX(0.2705980501)
Fabrice Bellard9bf71512002-10-03 13:41:33368
Fabrice Bellard652f0192002-10-03 21:03:59369/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
370 and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
371#define C_SHIFT (4+1+12)
Fabrice Bellard9bf71512002-10-03 13:41:33372
Kostya Shishkovfda767b2007-12-03 06:59:48373static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col)
Fabrice Bellard9bf71512002-10-03 13:41:33374{
375 int c0, c1, c2, c3, a0, a1, a2, a3;
Måns Rullgård55fde952006-11-12 20:08:09376 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
Fabrice Bellard9bf71512002-10-03 13:41:33377
378 a0 = col[8*0];
379 a1 = col[8*2];
380 a2 = col[8*4];
381 a3 = col[8*6];
Fabrice Bellard652f0192002-10-03 21:03:59382 c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
383 c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
Fabrice Bellard9bf71512002-10-03 13:41:33384 c1 = a1 * C1 + a3 * C2;
385 c3 = a1 * C2 - a3 * C1;
386 dest[0] = cm[(c0 + c1) >> C_SHIFT];
387 dest += line_size;
388 dest[0] = cm[(c2 + c3) >> C_SHIFT];
389 dest += line_size;
390 dest[0] = cm[(c2 - c3) >> C_SHIFT];
391 dest += line_size;
392 dest[0] = cm[(c0 - c1) >> C_SHIFT];
393}
394
395#define BF(k) \
396{\
397 int a0, a1;\
398 a0 = ptr[k];\
399 a1 = ptr[8 + k];\
400 ptr[k] = a0 + a1;\
401 ptr[8 + k] = a0 - a1;\
402}
403
404/* only used by DV codec. The input must be interlaced. 128 is added
405 to the pixels before clamping to avoid systematic error
406 (1024*sqrt(2)) offset would be needed otherwise. */
407/* XXX: I think a 1.0/sqrt(2) normalization should be needed to
408 compensate the extra butterfly stage - I don't have the full DV
409 specification */
Aurelien Jacobs59e6f602007-12-08 21:21:11410void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
Fabrice Bellard9bf71512002-10-03 13:41:33411{
412 int i;
Michael Niedermayer0e153842003-01-15 19:21:21413 DCTELEM *ptr;
Diego Biurrun115329f2005-12-17 18:14:38414
Fabrice Bellard9bf71512002-10-03 13:41:33415 /* butterfly */
416 ptr = block;
417 for(i=0;i<4;i++) {
418 BF(0);
419 BF(1);
420 BF(2);
421 BF(3);
422 BF(4);
423 BF(5);
424 BF(6);
425 BF(7);
426 ptr += 2 * 8;
427 }
428
429 /* IDCT8 on each line */
430 for(i=0; i<8; i++) {
431 idctRowCondDC(block + i*8);
432 }
433
434 /* IDCT4 and store */
435 for(i=0;i<8;i++) {
Kostya Shishkovfda767b2007-12-03 06:59:48436 idct4col_put(dest + i, 2 * line_size, block + i);
437 idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i);
Fabrice Bellard9bf71512002-10-03 13:41:33438 }
439}
Michael Niedermayer1457ab52002-12-27 23:51:46440
441/* 8x4 & 4x8 WMV2 IDCT */
442#undef CN_SHIFT
443#undef C_SHIFT
444#undef C_FIX
445#undef C1
446#undef C2
447#define CN_SHIFT 12
448#define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
449#define C1 C_FIX(0.6532814824)
450#define C2 C_FIX(0.2705980501)
451#define C3 C_FIX(0.5)
452#define C_SHIFT (4+1+12)
Zdenek Kabelac0c1a9ed2003-02-11 16:35:48453static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col)
Michael Niedermayer1457ab52002-12-27 23:51:46454{
455 int c0, c1, c2, c3, a0, a1, a2, a3;
Måns Rullgård55fde952006-11-12 20:08:09456 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
Michael Niedermayer1457ab52002-12-27 23:51:46457
458 a0 = col[8*0];
459 a1 = col[8*1];
460 a2 = col[8*2];
461 a3 = col[8*3];
462 c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
463 c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
464 c1 = a1 * C1 + a3 * C2;
465 c3 = a1 * C2 - a3 * C1;
466 dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
467 dest += line_size;
468 dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
469 dest += line_size;
470 dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
471 dest += line_size;
472 dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
473}
474
475#define RN_SHIFT 15
476#define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
477#define R1 R_FIX(0.6532814824)
478#define R2 R_FIX(0.2705980501)
479#define R3 R_FIX(0.5)
480#define R_SHIFT 11
Michael Niedermayer0e153842003-01-15 19:21:21481static inline void idct4row(DCTELEM *row)
Michael Niedermayer1457ab52002-12-27 23:51:46482{
483 int c0, c1, c2, c3, a0, a1, a2, a3;
Måns Rullgård55fde952006-11-12 20:08:09484 //const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
Michael Niedermayer1457ab52002-12-27 23:51:46485
486 a0 = row[0];
487 a1 = row[1];
488 a2 = row[2];
489 a3 = row[3];
490 c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
491 c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
492 c1 = a1 * R1 + a3 * R2;
493 c3 = a1 * R2 - a3 * R1;
494 row[0]= (c0 + c1) >> R_SHIFT;
495 row[1]= (c2 + c3) >> R_SHIFT;
496 row[2]= (c2 - c3) >> R_SHIFT;
497 row[3]= (c0 - c1) >> R_SHIFT;
498}
499
Aurelien Jacobs59e6f602007-12-08 21:21:11500void ff_simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block)
Michael Niedermayer1457ab52002-12-27 23:51:46501{
502 int i;
503
504 /* IDCT8 on each line */
505 for(i=0; i<4; i++) {
506 idctRowCondDC(block + i*8);
507 }
508
509 /* IDCT4 and store */
510 for(i=0;i<8;i++) {
511 idct4col_add(dest + i, line_size, block + i);
512 }
513}
514
Aurelien Jacobs59e6f602007-12-08 21:21:11515void ff_simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block)
Michael Niedermayer1457ab52002-12-27 23:51:46516{
517 int i;
518
519 /* IDCT4 on each line */
520 for(i=0; i<8; i++) {
521 idct4row(block + i*8);
522 }
523
524 /* IDCT8 and store */
525 for(i=0; i<4; i++){
526 idctSparseColAdd(dest + i, line_size, block + i);
527 }
528}
529
Kostya Shishkov91823a62007-12-09 05:54:59530void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block)
531{
532 int i;
533
534 /* IDCT4 on each line */
535 for(i=0; i<4; i++) {
536 idct4row(block + i*8);
537 }
538
539 /* IDCT4 and store */
540 for(i=0; i<4; i++){
541 idct4col_add(dest + i, line_size, block + i);
542 }
543}