Arpi | 37e8dcd | 2001-12-09 12:39:54 | [diff] [blame] | 1 | /* |
Fabrice Bellard | ff4ec49 | 2002-05-25 22:45:33 | [diff] [blame] | 2 | * Simple IDCT |
| 3 | * |
| 4 | * Copyright (c) 2001 Michael Niedermayer <[email protected]> |
| 5 | * |
Mans Rullgard | 2912e87 | 2011-03-18 17:35:10 | [diff] [blame] | 6 | * This file is part of Libav. |
Diego Biurrun | b78e719 | 2006-10-07 15:30:46 | [diff] [blame] | 7 | * |
Mans Rullgard | 2912e87 | 2011-03-18 17:35:10 | [diff] [blame] | 8 | * Libav is free software; you can redistribute it and/or |
Fabrice Bellard | ff4ec49 | 2002-05-25 22:45:33 | [diff] [blame] | 9 | * modify it under the terms of the GNU Lesser General Public |
| 10 | * License as published by the Free Software Foundation; either |
Diego Biurrun | b78e719 | 2006-10-07 15:30:46 | [diff] [blame] | 11 | * version 2.1 of the License, or (at your option) any later version. |
Fabrice Bellard | ff4ec49 | 2002-05-25 22:45:33 | [diff] [blame] | 12 | * |
Mans Rullgard | 2912e87 | 2011-03-18 17:35:10 | [diff] [blame] | 13 | * Libav is distributed in the hope that it will be useful, |
Fabrice Bellard | ff4ec49 | 2002-05-25 22:45:33 | [diff] [blame] | 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 | * Lesser General Public License for more details. |
| 17 | * |
| 18 | * You should have received a copy of the GNU Lesser General Public |
Mans Rullgard | 2912e87 | 2011-03-18 17:35:10 | [diff] [blame] | 19 | * License along with Libav; if not, write to the Free Software |
Diego Biurrun | 5509bff | 2006-01-12 22:43:26 | [diff] [blame] | 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
Fabrice Bellard | ff4ec49 | 2002-05-25 22:45:33 | [diff] [blame] | 21 | */ |
Diego Biurrun | 115329f | 2005-12-17 18:14:38 | [diff] [blame] | 22 | |
Michael Niedermayer | 983e324 | 2003-03-06 11:32:04 | [diff] [blame] | 23 | /** |
Diego Biurrun | ba87f08 | 2010-04-20 14:45:34 | [diff] [blame] | 24 | * @file |
Michael Niedermayer | 983e324 | 2003-03-06 11:32:04 | [diff] [blame] | 25 | * simpleidct in C. |
| 26 | */ |
Diego Biurrun | 115329f | 2005-12-17 18:14:38 | [diff] [blame] | 27 | |
Arpi | 37e8dcd | 2001-12-09 12:39:54 | [diff] [blame] | 28 | /* |
Fabrice Bellard | ff4ec49 | 2002-05-25 22:45:33 | [diff] [blame] | 29 | based upon some outcommented c code from mpeg2dec (idct_mmx.c |
Diego Biurrun | 115329f | 2005-12-17 18:14:38 | [diff] [blame] | 30 | written by Aaron Holtzman <[email protected]>) |
Fabrice Bellard | ff4ec49 | 2002-05-25 22:45:33 | [diff] [blame] | 31 | */ |
Mans Rullgard | 3e9409b | 2011-07-18 15:04:25 | [diff] [blame] | 32 | |
| 33 | #include "libavutil/intreadwrite.h" |
Fabrice Bellard | 6000abf | 2002-05-18 23:03:29 | [diff] [blame] | 34 | #include "avcodec.h" |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 35 | #include "dsputil.h" |
Måns Rullgård | edf7c2b | 2008-12-27 16:25:36 | [diff] [blame] | 36 | #include "mathops.h" |
Arpi | 37e8dcd | 2001-12-09 12:39:54 | [diff] [blame] | 37 | #include "simple_idct.h" |
| 38 | |
Arpi | 37e8dcd | 2001-12-09 12:39:54 | [diff] [blame] | 39 | #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
| 40 | #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
| 41 | #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
Michael Niedermayer | ccf589a | 2002-05-02 13:35:33 | [diff] [blame] | 42 | #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
Arpi | 37e8dcd | 2001-12-09 12:39:54 | [diff] [blame] | 43 | #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
| 44 | #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
| 45 | #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
| 46 | #define ROW_SHIFT 11 |
| 47 | #define COL_SHIFT 20 // 6 |
Michael Niedermayer | 4973971 | 2002-01-14 04:39:59 | [diff] [blame] | 48 | |
Michael Niedermayer | 0e15384 | 2003-01-15 19:21:21 | [diff] [blame] | 49 | static inline void idctRowCondDC (DCTELEM * row) |
Michael Niedermayer | 4973971 | 2002-01-14 04:39:59 | [diff] [blame] | 50 | { |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 51 | int a0, a1, a2, a3, b0, b1, b2, b3; |
Michael Niedermayer | 4973971 | 2002-01-14 04:39:59 | [diff] [blame] | 52 | |
Aurelien Jacobs | b250f9c | 2009-01-13 23:44:16 | [diff] [blame] | 53 | #if HAVE_FAST_64BIT |
Mans Rullgard | 3e9409b | 2011-07-18 15:04:25 | [diff] [blame] | 54 | #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) |
Mans Rullgard | dbb5ee8 | 2011-07-18 14:23:22 | [diff] [blame] | 55 | if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) { |
Mans Rullgard | 3e9409b | 2011-07-18 15:04:25 | [diff] [blame] | 56 | uint64_t temp = (row[0] << 3) & 0xffff; |
Mans Rullgard | dbb5ee8 | 2011-07-18 14:23:22 | [diff] [blame] | 57 | temp += temp << 16; |
| 58 | temp += temp << 32; |
| 59 | ((uint64_t *)row)[0] = temp; |
| 60 | ((uint64_t *)row)[1] = temp; |
| 61 | return; |
Michael Niedermayer | 0e15384 | 2003-01-15 19:21:21 | [diff] [blame] | 62 | } |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 63 | #else |
Mans Rullgard | dbb5ee8 | 2011-07-18 14:23:22 | [diff] [blame] | 64 | if (!(((uint32_t*)row)[1] | |
| 65 | ((uint32_t*)row)[2] | |
| 66 | ((uint32_t*)row)[3] | |
| 67 | row[1])) { |
Mans Rullgard | 3e9409b | 2011-07-18 15:04:25 | [diff] [blame] | 68 | uint32_t temp = (row[0] << 3) & 0xffff; |
Mans Rullgard | dbb5ee8 | 2011-07-18 14:23:22 | [diff] [blame] | 69 | temp += temp << 16; |
| 70 | ((uint32_t*)row)[0]=((uint32_t*)row)[1] = |
Michael Niedermayer | 0e15384 | 2003-01-15 19:21:21 | [diff] [blame] | 71 | ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; |
Mans Rullgard | dbb5ee8 | 2011-07-18 14:23:22 | [diff] [blame] | 72 | return; |
Michael Niedermayer | 0e15384 | 2003-01-15 19:21:21 | [diff] [blame] | 73 | } |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 74 | #endif |
Michael Niedermayer | 4973971 | 2002-01-14 04:39:59 | [diff] [blame] | 75 | |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 76 | a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 77 | a1 = a0; |
| 78 | a2 = a0; |
| 79 | a3 = a0; |
Michael Niedermayer | 4973971 | 2002-01-14 04:39:59 | [diff] [blame] | 80 | |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 81 | /* no need to optimize : gcc does it */ |
| 82 | a0 += W2 * row[2]; |
| 83 | a1 += W6 * row[2]; |
| 84 | a2 -= W6 * row[2]; |
| 85 | a3 -= W2 * row[2]; |
Michael Niedermayer | 4973971 | 2002-01-14 04:39:59 | [diff] [blame] | 86 | |
Måns Rullgård | edf7c2b | 2008-12-27 16:25:36 | [diff] [blame] | 87 | b0 = MUL16(W1, row[1]); |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 88 | MAC16(b0, W3, row[3]); |
Måns Rullgård | edf7c2b | 2008-12-27 16:25:36 | [diff] [blame] | 89 | b1 = MUL16(W3, row[1]); |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 90 | MAC16(b1, -W7, row[3]); |
Måns Rullgård | edf7c2b | 2008-12-27 16:25:36 | [diff] [blame] | 91 | b2 = MUL16(W5, row[1]); |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 92 | MAC16(b2, -W1, row[3]); |
Måns Rullgård | edf7c2b | 2008-12-27 16:25:36 | [diff] [blame] | 93 | b3 = MUL16(W7, row[1]); |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 94 | MAC16(b3, -W5, row[3]); |
| 95 | |
Mans Rullgard | 3e9409b | 2011-07-18 15:04:25 | [diff] [blame] | 96 | if (AV_RN64A(row + 4)) { |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 97 | a0 += W4*row[4] + W6*row[6]; |
| 98 | a1 += - W4*row[4] - W2*row[6]; |
| 99 | a2 += - W4*row[4] + W2*row[6]; |
| 100 | a3 += W4*row[4] - W6*row[6]; |
| 101 | |
| 102 | MAC16(b0, W5, row[5]); |
| 103 | MAC16(b0, W7, row[7]); |
Diego Biurrun | 115329f | 2005-12-17 18:14:38 | [diff] [blame] | 104 | |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 105 | MAC16(b1, -W1, row[5]); |
| 106 | MAC16(b1, -W5, row[7]); |
Diego Biurrun | 115329f | 2005-12-17 18:14:38 | [diff] [blame] | 107 | |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 108 | MAC16(b2, W7, row[5]); |
| 109 | MAC16(b2, W3, row[7]); |
Diego Biurrun | 115329f | 2005-12-17 18:14:38 | [diff] [blame] | 110 | |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 111 | MAC16(b3, W3, row[5]); |
| 112 | MAC16(b3, -W1, row[7]); |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 113 | } |
Michael Niedermayer | 4973971 | 2002-01-14 04:39:59 | [diff] [blame] | 114 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 115 | row[0] = (a0 + b0) >> ROW_SHIFT; |
| 116 | row[7] = (a0 - b0) >> ROW_SHIFT; |
| 117 | row[1] = (a1 + b1) >> ROW_SHIFT; |
| 118 | row[6] = (a1 - b1) >> ROW_SHIFT; |
| 119 | row[2] = (a2 + b2) >> ROW_SHIFT; |
| 120 | row[5] = (a2 - b2) >> ROW_SHIFT; |
| 121 | row[3] = (a3 + b3) >> ROW_SHIFT; |
| 122 | row[4] = (a3 - b3) >> ROW_SHIFT; |
Arpi | 37e8dcd | 2001-12-09 12:39:54 | [diff] [blame] | 123 | } |
| 124 | |
Diego Biurrun | 115329f | 2005-12-17 18:14:38 | [diff] [blame] | 125 | static inline void idctSparseColPut (uint8_t *dest, int line_size, |
Michael Niedermayer | 0e15384 | 2003-01-15 19:21:21 | [diff] [blame] | 126 | DCTELEM * col) |
Arpi | 37e8dcd | 2001-12-09 12:39:54 | [diff] [blame] | 127 | { |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 128 | int a0, a1, a2, a3, b0, b1, b2, b3; |
Måns Rullgård | 55fde95 | 2006-11-12 20:08:09 | [diff] [blame] | 129 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
Michael Niedermayer | 4973971 | 2002-01-14 04:39:59 | [diff] [blame] | 130 | |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 131 | /* XXX: I did that only to give same values as previous code */ |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 132 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
| 133 | a1 = a0; |
| 134 | a2 = a0; |
| 135 | a3 = a0; |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 136 | |
| 137 | a0 += + W2*col[8*2]; |
| 138 | a1 += + W6*col[8*2]; |
| 139 | a2 += - W6*col[8*2]; |
| 140 | a3 += - W2*col[8*2]; |
| 141 | |
Måns Rullgård | edf7c2b | 2008-12-27 16:25:36 | [diff] [blame] | 142 | b0 = MUL16(W1, col[8*1]); |
| 143 | b1 = MUL16(W3, col[8*1]); |
| 144 | b2 = MUL16(W5, col[8*1]); |
| 145 | b3 = MUL16(W7, col[8*1]); |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 146 | |
| 147 | MAC16(b0, + W3, col[8*3]); |
| 148 | MAC16(b1, - W7, col[8*3]); |
| 149 | MAC16(b2, - W1, col[8*3]); |
| 150 | MAC16(b3, - W5, col[8*3]); |
Arpi | 37e8dcd | 2001-12-09 12:39:54 | [diff] [blame] | 151 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 152 | if(col[8*4]){ |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 153 | a0 += + W4*col[8*4]; |
| 154 | a1 += - W4*col[8*4]; |
| 155 | a2 += - W4*col[8*4]; |
| 156 | a3 += + W4*col[8*4]; |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 157 | } |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 158 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 159 | if (col[8*5]) { |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 160 | MAC16(b0, + W5, col[8*5]); |
| 161 | MAC16(b1, - W1, col[8*5]); |
| 162 | MAC16(b2, + W7, col[8*5]); |
| 163 | MAC16(b3, + W3, col[8*5]); |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 164 | } |
Arpi | 37e8dcd | 2001-12-09 12:39:54 | [diff] [blame] | 165 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 166 | if(col[8*6]){ |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 167 | a0 += + W6*col[8*6]; |
| 168 | a1 += - W2*col[8*6]; |
| 169 | a2 += + W2*col[8*6]; |
| 170 | a3 += - W6*col[8*6]; |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 171 | } |
Michael Niedermayer | 4973971 | 2002-01-14 04:39:59 | [diff] [blame] | 172 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 173 | if (col[8*7]) { |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 174 | MAC16(b0, + W7, col[8*7]); |
| 175 | MAC16(b1, - W5, col[8*7]); |
| 176 | MAC16(b2, + W3, col[8*7]); |
| 177 | MAC16(b3, - W1, col[8*7]); |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 178 | } |
Michael Niedermayer | 4973971 | 2002-01-14 04:39:59 | [diff] [blame] | 179 | |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 180 | dest[0] = cm[(a0 + b0) >> COL_SHIFT]; |
| 181 | dest += line_size; |
| 182 | dest[0] = cm[(a1 + b1) >> COL_SHIFT]; |
| 183 | dest += line_size; |
| 184 | dest[0] = cm[(a2 + b2) >> COL_SHIFT]; |
| 185 | dest += line_size; |
| 186 | dest[0] = cm[(a3 + b3) >> COL_SHIFT]; |
| 187 | dest += line_size; |
| 188 | dest[0] = cm[(a3 - b3) >> COL_SHIFT]; |
| 189 | dest += line_size; |
| 190 | dest[0] = cm[(a2 - b2) >> COL_SHIFT]; |
| 191 | dest += line_size; |
| 192 | dest[0] = cm[(a1 - b1) >> COL_SHIFT]; |
| 193 | dest += line_size; |
| 194 | dest[0] = cm[(a0 - b0) >> COL_SHIFT]; |
| 195 | } |
| 196 | |
Diego Biurrun | 115329f | 2005-12-17 18:14:38 | [diff] [blame] | 197 | static inline void idctSparseColAdd (uint8_t *dest, int line_size, |
Michael Niedermayer | 0e15384 | 2003-01-15 19:21:21 | [diff] [blame] | 198 | DCTELEM * col) |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 199 | { |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 200 | int a0, a1, a2, a3, b0, b1, b2, b3; |
Måns Rullgård | 55fde95 | 2006-11-12 20:08:09 | [diff] [blame] | 201 | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 202 | |
| 203 | /* XXX: I did that only to give same values as previous code */ |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 204 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
| 205 | a1 = a0; |
| 206 | a2 = a0; |
| 207 | a3 = a0; |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 208 | |
| 209 | a0 += + W2*col[8*2]; |
| 210 | a1 += + W6*col[8*2]; |
| 211 | a2 += - W6*col[8*2]; |
| 212 | a3 += - W2*col[8*2]; |
| 213 | |
Måns Rullgård | edf7c2b | 2008-12-27 16:25:36 | [diff] [blame] | 214 | b0 = MUL16(W1, col[8*1]); |
| 215 | b1 = MUL16(W3, col[8*1]); |
| 216 | b2 = MUL16(W5, col[8*1]); |
| 217 | b3 = MUL16(W7, col[8*1]); |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 218 | |
| 219 | MAC16(b0, + W3, col[8*3]); |
| 220 | MAC16(b1, - W7, col[8*3]); |
| 221 | MAC16(b2, - W1, col[8*3]); |
| 222 | MAC16(b3, - W5, col[8*3]); |
| 223 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 224 | if(col[8*4]){ |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 225 | a0 += + W4*col[8*4]; |
| 226 | a1 += - W4*col[8*4]; |
| 227 | a2 += - W4*col[8*4]; |
| 228 | a3 += + W4*col[8*4]; |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 229 | } |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 230 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 231 | if (col[8*5]) { |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 232 | MAC16(b0, + W5, col[8*5]); |
| 233 | MAC16(b1, - W1, col[8*5]); |
| 234 | MAC16(b2, + W7, col[8*5]); |
| 235 | MAC16(b3, + W3, col[8*5]); |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 236 | } |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 237 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 238 | if(col[8*6]){ |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 239 | a0 += + W6*col[8*6]; |
| 240 | a1 += - W2*col[8*6]; |
| 241 | a2 += + W2*col[8*6]; |
| 242 | a3 += - W6*col[8*6]; |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 243 | } |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 244 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 245 | if (col[8*7]) { |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 246 | MAC16(b0, + W7, col[8*7]); |
| 247 | MAC16(b1, - W5, col[8*7]); |
| 248 | MAC16(b2, + W3, col[8*7]); |
| 249 | MAC16(b3, - W1, col[8*7]); |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 250 | } |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 251 | |
| 252 | dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; |
| 253 | dest += line_size; |
| 254 | dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; |
| 255 | dest += line_size; |
| 256 | dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; |
| 257 | dest += line_size; |
| 258 | dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; |
| 259 | dest += line_size; |
| 260 | dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; |
| 261 | dest += line_size; |
| 262 | dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; |
| 263 | dest += line_size; |
| 264 | dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; |
| 265 | dest += line_size; |
| 266 | dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; |
Arpi | 37e8dcd | 2001-12-09 12:39:54 | [diff] [blame] | 267 | } |
| 268 | |
Michael Niedermayer | 0e15384 | 2003-01-15 19:21:21 | [diff] [blame] | 269 | static inline void idctSparseCol (DCTELEM * col) |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 270 | { |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 271 | int a0, a1, a2, a3, b0, b1, b2, b3; |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 272 | |
| 273 | /* XXX: I did that only to give same values as previous code */ |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 274 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
| 275 | a1 = a0; |
| 276 | a2 = a0; |
| 277 | a3 = a0; |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 278 | |
| 279 | a0 += + W2*col[8*2]; |
| 280 | a1 += + W6*col[8*2]; |
| 281 | a2 += - W6*col[8*2]; |
| 282 | a3 += - W2*col[8*2]; |
| 283 | |
Måns Rullgård | edf7c2b | 2008-12-27 16:25:36 | [diff] [blame] | 284 | b0 = MUL16(W1, col[8*1]); |
| 285 | b1 = MUL16(W3, col[8*1]); |
| 286 | b2 = MUL16(W5, col[8*1]); |
| 287 | b3 = MUL16(W7, col[8*1]); |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 288 | |
| 289 | MAC16(b0, + W3, col[8*3]); |
| 290 | MAC16(b1, - W7, col[8*3]); |
| 291 | MAC16(b2, - W1, col[8*3]); |
| 292 | MAC16(b3, - W5, col[8*3]); |
| 293 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 294 | if(col[8*4]){ |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 295 | a0 += + W4*col[8*4]; |
| 296 | a1 += - W4*col[8*4]; |
| 297 | a2 += - W4*col[8*4]; |
| 298 | a3 += + W4*col[8*4]; |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 299 | } |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 300 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 301 | if (col[8*5]) { |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 302 | MAC16(b0, + W5, col[8*5]); |
| 303 | MAC16(b1, - W1, col[8*5]); |
| 304 | MAC16(b2, + W7, col[8*5]); |
| 305 | MAC16(b3, + W3, col[8*5]); |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 306 | } |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 307 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 308 | if(col[8*6]){ |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 309 | a0 += + W6*col[8*6]; |
| 310 | a1 += - W2*col[8*6]; |
| 311 | a2 += + W2*col[8*6]; |
| 312 | a3 += - W6*col[8*6]; |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 313 | } |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 314 | |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 315 | if (col[8*7]) { |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 316 | MAC16(b0, + W7, col[8*7]); |
| 317 | MAC16(b1, - W5, col[8*7]); |
| 318 | MAC16(b2, + W3, col[8*7]); |
| 319 | MAC16(b3, - W1, col[8*7]); |
Diego Biurrun | bb270c0 | 2005-12-22 01:10:11 | [diff] [blame] | 320 | } |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 321 | |
| 322 | col[0 ] = ((a0 + b0) >> COL_SHIFT); |
| 323 | col[8 ] = ((a1 + b1) >> COL_SHIFT); |
| 324 | col[16] = ((a2 + b2) >> COL_SHIFT); |
| 325 | col[24] = ((a3 + b3) >> COL_SHIFT); |
| 326 | col[32] = ((a3 - b3) >> COL_SHIFT); |
| 327 | col[40] = ((a2 - b2) >> COL_SHIFT); |
| 328 | col[48] = ((a1 - b1) >> COL_SHIFT); |
| 329 | col[56] = ((a0 - b0) >> COL_SHIFT); |
| 330 | } |
| 331 | |
Aurelien Jacobs | 59e6f60 | 2007-12-08 21:21:11 | [diff] [blame] | 332 | void ff_simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 333 | { |
| 334 | int i; |
| 335 | for(i=0; i<8; i++) |
| 336 | idctRowCondDC(block + i*8); |
Diego Biurrun | 115329f | 2005-12-17 18:14:38 | [diff] [blame] | 337 | |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 338 | for(i=0; i<8; i++) |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 339 | idctSparseColPut(dest + i, line_size, block + i); |
| 340 | } |
| 341 | |
Aurelien Jacobs | 59e6f60 | 2007-12-08 21:21:11 | [diff] [blame] | 342 | void ff_simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block) |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 343 | { |
| 344 | int i; |
| 345 | for(i=0; i<8; i++) |
| 346 | idctRowCondDC(block + i*8); |
Diego Biurrun | 115329f | 2005-12-17 18:14:38 | [diff] [blame] | 347 | |
Fabrice Bellard | d36a246 | 2002-06-05 18:46:25 | [diff] [blame] | 348 | for(i=0; i<8; i++) |
| 349 | idctSparseColAdd(dest + i, line_size, block + i); |
Fabrice Bellard | 412ba50 | 2002-06-05 14:25:06 | [diff] [blame] | 350 | } |
| 351 | |
Aurelien Jacobs | 59e6f60 | 2007-12-08 21:21:11 | [diff] [blame] | 352 | void ff_simple_idct(DCTELEM *block) |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 353 | { |
| 354 | int i; |
| 355 | for(i=0; i<8; i++) |
| 356 | idctRowCondDC(block + i*8); |
Diego Biurrun | 115329f | 2005-12-17 18:14:38 | [diff] [blame] | 357 | |
Michael Niedermayer | 86748db | 2002-09-01 09:33:05 | [diff] [blame] | 358 | for(i=0; i<8; i++) |
| 359 | idctSparseCol(block + i); |
| 360 | } |
| 361 | |
Fabrice Bellard | 9bf7151 | 2002-10-03 13:41:33 | [diff] [blame] | 362 | /* 2x4x8 idct */ |
| 363 | |
| 364 | #define CN_SHIFT 12 |
| 365 | #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) |
Fabrice Bellard | 652f019 | 2002-10-03 21:03:59 | [diff] [blame] | 366 | #define C1 C_FIX(0.6532814824) |
| 367 | #define C2 C_FIX(0.2705980501) |
Fabrice Bellard | 9bf7151 | 2002-10-03 13:41:33 | [diff] [blame] | 368 | |
Fabrice Bellard | 652f019 | 2002-10-03 21:03:59 | [diff] [blame] | 369 | /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, |
| 370 | and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ |
| 371 | #define C_SHIFT (4+1+12) |
Fabrice Bellard | 9bf7151 | 2002-10-03 13:41:33 | [diff] [blame] | 372 | |
Kostya Shishkov | fda767b | 2007-12-03 06:59:48 | [diff] [blame] | 373 | static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col) |
Fabrice Bellard | 9bf7151 | 2002-10-03 13:41:33 | [diff] [blame] | 374 | { |
| 375 | int c0, c1, c2, c3, a0, a1, a2, a3; |
Måns Rullgård | 55fde95 | 2006-11-12 20:08:09 | [diff] [blame] | 376 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
Fabrice Bellard | 9bf7151 | 2002-10-03 13:41:33 | [diff] [blame] | 377 | |
| 378 | a0 = col[8*0]; |
| 379 | a1 = col[8*2]; |
| 380 | a2 = col[8*4]; |
| 381 | a3 = col[8*6]; |
Fabrice Bellard | 652f019 | 2002-10-03 21:03:59 | [diff] [blame] | 382 | c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
| 383 | c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
Fabrice Bellard | 9bf7151 | 2002-10-03 13:41:33 | [diff] [blame] | 384 | c1 = a1 * C1 + a3 * C2; |
| 385 | c3 = a1 * C2 - a3 * C1; |
| 386 | dest[0] = cm[(c0 + c1) >> C_SHIFT]; |
| 387 | dest += line_size; |
| 388 | dest[0] = cm[(c2 + c3) >> C_SHIFT]; |
| 389 | dest += line_size; |
| 390 | dest[0] = cm[(c2 - c3) >> C_SHIFT]; |
| 391 | dest += line_size; |
| 392 | dest[0] = cm[(c0 - c1) >> C_SHIFT]; |
| 393 | } |
| 394 | |
| 395 | #define BF(k) \ |
| 396 | {\ |
| 397 | int a0, a1;\ |
| 398 | a0 = ptr[k];\ |
| 399 | a1 = ptr[8 + k];\ |
| 400 | ptr[k] = a0 + a1;\ |
| 401 | ptr[8 + k] = a0 - a1;\ |
| 402 | } |
| 403 | |
| 404 | /* only used by DV codec. The input must be interlaced. 128 is added |
| 405 | to the pixels before clamping to avoid systematic error |
| 406 | (1024*sqrt(2)) offset would be needed otherwise. */ |
| 407 | /* XXX: I think a 1.0/sqrt(2) normalization should be needed to |
| 408 | compensate the extra butterfly stage - I don't have the full DV |
| 409 | specification */ |
Aurelien Jacobs | 59e6f60 | 2007-12-08 21:21:11 | [diff] [blame] | 410 | void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block) |
Fabrice Bellard | 9bf7151 | 2002-10-03 13:41:33 | [diff] [blame] | 411 | { |
| 412 | int i; |
Michael Niedermayer | 0e15384 | 2003-01-15 19:21:21 | [diff] [blame] | 413 | DCTELEM *ptr; |
Diego Biurrun | 115329f | 2005-12-17 18:14:38 | [diff] [blame] | 414 | |
Fabrice Bellard | 9bf7151 | 2002-10-03 13:41:33 | [diff] [blame] | 415 | /* butterfly */ |
| 416 | ptr = block; |
| 417 | for(i=0;i<4;i++) { |
| 418 | BF(0); |
| 419 | BF(1); |
| 420 | BF(2); |
| 421 | BF(3); |
| 422 | BF(4); |
| 423 | BF(5); |
| 424 | BF(6); |
| 425 | BF(7); |
| 426 | ptr += 2 * 8; |
| 427 | } |
| 428 | |
| 429 | /* IDCT8 on each line */ |
| 430 | for(i=0; i<8; i++) { |
| 431 | idctRowCondDC(block + i*8); |
| 432 | } |
| 433 | |
| 434 | /* IDCT4 and store */ |
| 435 | for(i=0;i<8;i++) { |
Kostya Shishkov | fda767b | 2007-12-03 06:59:48 | [diff] [blame] | 436 | idct4col_put(dest + i, 2 * line_size, block + i); |
| 437 | idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i); |
Fabrice Bellard | 9bf7151 | 2002-10-03 13:41:33 | [diff] [blame] | 438 | } |
| 439 | } |
Michael Niedermayer | 1457ab5 | 2002-12-27 23:51:46 | [diff] [blame] | 440 | |
| 441 | /* 8x4 & 4x8 WMV2 IDCT */ |
| 442 | #undef CN_SHIFT |
| 443 | #undef C_SHIFT |
| 444 | #undef C_FIX |
| 445 | #undef C1 |
| 446 | #undef C2 |
| 447 | #define CN_SHIFT 12 |
| 448 | #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5)) |
| 449 | #define C1 C_FIX(0.6532814824) |
| 450 | #define C2 C_FIX(0.2705980501) |
| 451 | #define C3 C_FIX(0.5) |
| 452 | #define C_SHIFT (4+1+12) |
Zdenek Kabelac | 0c1a9ed | 2003-02-11 16:35:48 | [diff] [blame] | 453 | static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col) |
Michael Niedermayer | 1457ab5 | 2002-12-27 23:51:46 | [diff] [blame] | 454 | { |
| 455 | int c0, c1, c2, c3, a0, a1, a2, a3; |
Måns Rullgård | 55fde95 | 2006-11-12 20:08:09 | [diff] [blame] | 456 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
Michael Niedermayer | 1457ab5 | 2002-12-27 23:51:46 | [diff] [blame] | 457 | |
| 458 | a0 = col[8*0]; |
| 459 | a1 = col[8*1]; |
| 460 | a2 = col[8*2]; |
| 461 | a3 = col[8*3]; |
| 462 | c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1)); |
| 463 | c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); |
| 464 | c1 = a1 * C1 + a3 * C2; |
| 465 | c3 = a1 * C2 - a3 * C1; |
| 466 | dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)]; |
| 467 | dest += line_size; |
| 468 | dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)]; |
| 469 | dest += line_size; |
| 470 | dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)]; |
| 471 | dest += line_size; |
| 472 | dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)]; |
| 473 | } |
| 474 | |
| 475 | #define RN_SHIFT 15 |
| 476 | #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5)) |
| 477 | #define R1 R_FIX(0.6532814824) |
| 478 | #define R2 R_FIX(0.2705980501) |
| 479 | #define R3 R_FIX(0.5) |
| 480 | #define R_SHIFT 11 |
Michael Niedermayer | 0e15384 | 2003-01-15 19:21:21 | [diff] [blame] | 481 | static inline void idct4row(DCTELEM *row) |
Michael Niedermayer | 1457ab5 | 2002-12-27 23:51:46 | [diff] [blame] | 482 | { |
| 483 | int c0, c1, c2, c3, a0, a1, a2, a3; |
Måns Rullgård | 55fde95 | 2006-11-12 20:08:09 | [diff] [blame] | 484 | //const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
Michael Niedermayer | 1457ab5 | 2002-12-27 23:51:46 | [diff] [blame] | 485 | |
| 486 | a0 = row[0]; |
| 487 | a1 = row[1]; |
| 488 | a2 = row[2]; |
| 489 | a3 = row[3]; |
| 490 | c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1)); |
| 491 | c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1)); |
| 492 | c1 = a1 * R1 + a3 * R2; |
| 493 | c3 = a1 * R2 - a3 * R1; |
| 494 | row[0]= (c0 + c1) >> R_SHIFT; |
| 495 | row[1]= (c2 + c3) >> R_SHIFT; |
| 496 | row[2]= (c2 - c3) >> R_SHIFT; |
| 497 | row[3]= (c0 - c1) >> R_SHIFT; |
| 498 | } |
| 499 | |
Aurelien Jacobs | 59e6f60 | 2007-12-08 21:21:11 | [diff] [blame] | 500 | void ff_simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block) |
Michael Niedermayer | 1457ab5 | 2002-12-27 23:51:46 | [diff] [blame] | 501 | { |
| 502 | int i; |
| 503 | |
| 504 | /* IDCT8 on each line */ |
| 505 | for(i=0; i<4; i++) { |
| 506 | idctRowCondDC(block + i*8); |
| 507 | } |
| 508 | |
| 509 | /* IDCT4 and store */ |
| 510 | for(i=0;i<8;i++) { |
| 511 | idct4col_add(dest + i, line_size, block + i); |
| 512 | } |
| 513 | } |
| 514 | |
Aurelien Jacobs | 59e6f60 | 2007-12-08 21:21:11 | [diff] [blame] | 515 | void ff_simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block) |
Michael Niedermayer | 1457ab5 | 2002-12-27 23:51:46 | [diff] [blame] | 516 | { |
| 517 | int i; |
| 518 | |
| 519 | /* IDCT4 on each line */ |
| 520 | for(i=0; i<8; i++) { |
| 521 | idct4row(block + i*8); |
| 522 | } |
| 523 | |
| 524 | /* IDCT8 and store */ |
| 525 | for(i=0; i<4; i++){ |
| 526 | idctSparseColAdd(dest + i, line_size, block + i); |
| 527 | } |
| 528 | } |
| 529 | |
Kostya Shishkov | 91823a6 | 2007-12-09 05:54:59 | [diff] [blame] | 530 | void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block) |
| 531 | { |
| 532 | int i; |
| 533 | |
| 534 | /* IDCT4 on each line */ |
| 535 | for(i=0; i<4; i++) { |
| 536 | idct4row(block + i*8); |
| 537 | } |
| 538 | |
| 539 | /* IDCT4 and store */ |
| 540 | for(i=0; i<4; i++){ |
| 541 | idct4col_add(dest + i, line_size, block + i); |
| 542 | } |
| 543 | } |