## ffmpeg / libavcodec / jrevdct.c @ 8dbcc9f2

History | View | Annotate | Download (33.2 KB)

1 | de6d9b64 | Fabrice Bellard | ```
/*
``` |
---|---|---|---|

2 | ```
* jrevdct.c
``` |
||

3 | ```
*
``` |
||

4 | ```
* Copyright (C) 1991, 1992, Thomas G. Lane.
``` |
||

5 | ```
* This file is part of the Independent JPEG Group's software.
``` |
||

6 | ```
* For conditions of distribution and use, see the accompanying README file.
``` |
||

7 | ```
*
``` |
||

8 | ```
* This file contains the basic inverse-DCT transformation subroutine.
``` |
||

9 | ```
*
``` |
||

10 | ```
* This implementation is based on an algorithm described in
``` |
||

11 | ```
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
``` |
||

12 | ```
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
``` |
||

13 | ```
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
``` |
||

14 | ```
* The primary algorithm described there uses 11 multiplies and 29 adds.
``` |
||

15 | ```
* We use their alternate method with 12 multiplies and 32 adds.
``` |
||

16 | ```
* The advantage of this method is that no data path contains more than one
``` |
||

17 | ```
* multiplication; this allows a very simple and accurate implementation in
``` |
||

18 | ```
* scaled fixed-point arithmetic, with a minimal number of shifts.
``` |
||

19 | ```
*
``` |
||

20 | ```
* I've made lots of modifications to attempt to take advantage of the
``` |
||

21 | ```
* sparse nature of the DCT matrices we're getting. Although the logic
``` |
||

22 | ```
* is cumbersome, it's straightforward and the resulting code is much
``` |
||

23 | ```
* faster.
``` |
||

24 | ```
*
``` |
||

25 | ```
* A better way to do this would be to pass in the DCT block as a sparse
``` |
||

26 | ```
* matrix, perhaps with the difference cases encoded.
``` |
||

27 | ```
*/
``` |
||

28 | 983e3246 | Michael Niedermayer | |

29 | ```
/**
``` |
||

30 | ```
* @file jrevdct.c
``` |
||

31 | ```
* Independent JPEG Group's LLM idct.
``` |
||

32 | ```
*/
``` |
||

33 | |||

34 | de6d9b64 | Fabrice Bellard | #include "common.h" |

35 | #include "dsputil.h" |
||

36 | |||

37 | ```
#define EIGHT_BIT_SAMPLES
``` |
||

38 | |||

39 | #define DCTSIZE 8 |
||

40 | #define DCTSIZE2 64 |
||

41 | |||

42 | ```
#define GLOBAL
``` |
||

43 | |||

44 | ```
#define RIGHT_SHIFT(x, n) ((x) >> (n))
``` |
||

45 | |||

46 | ```
typedef DCTELEM DCTBLOCK[DCTSIZE2];
``` |
||

47 | |||

48 | #define CONST_BITS 13 |
||

49 | |||

50 | ```
/*
``` |
||

51 | ```
* This routine is specialized to the case DCTSIZE = 8.
``` |
||

52 | ```
*/
``` |
||

53 | |||

54 | #if DCTSIZE != 8 |
||

55 | Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ |
||

56 | ```
#endif
``` |
||

57 | |||

58 | |||

59 | ```
/*
``` |
||

60 | ```
* A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
``` |
||

61 | ```
* on each column. Direct algorithms are also available, but they are
``` |
||

62 | ```
* much more complex and seem not to be any faster when reduced to code.
``` |
||

63 | ```
*
``` |
||

64 | ```
* The poop on this scaling stuff is as follows:
``` |
||

65 | ```
*
``` |
||

66 | ```
* Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
``` |
||

67 | ```
* larger than the true IDCT outputs. The final outputs are therefore
``` |
||

68 | ```
* a factor of N larger than desired; since N=8 this can be cured by
``` |
||

69 | ```
* a simple right shift at the end of the algorithm. The advantage of
``` |
||

70 | ```
* this arrangement is that we save two multiplications per 1-D IDCT,
``` |
||

71 | ```
* because the y0 and y4 inputs need not be divided by sqrt(N).
``` |
||

72 | ```
*
``` |
||

73 | ```
* We have to do addition and subtraction of the integer inputs, which
``` |
||

74 | ```
* is no problem, and multiplication by fractional constants, which is
``` |
||

75 | ```
* a problem to do in integer arithmetic. We multiply all the constants
``` |
||

76 | ```
* by CONST_SCALE and convert them to integer constants (thus retaining
``` |
||

77 | ```
* CONST_BITS bits of precision in the constants). After doing a
``` |
||

78 | ```
* multiplication we have to divide the product by CONST_SCALE, with proper
``` |
||

79 | ```
* rounding, to produce the correct output. This division can be done
``` |
||

80 | ```
* cheaply as a right shift of CONST_BITS bits. We postpone shifting
``` |
||

81 | ```
* as long as possible so that partial sums can be added together with
``` |
||

82 | ```
* full fractional precision.
``` |
||

83 | ```
*
``` |
||

84 | ```
* The outputs of the first pass are scaled up by PASS1_BITS bits so that
``` |
||

85 | ```
* they are represented to better-than-integral precision. These outputs
``` |
||

86 | ```
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
``` |
||

87 | ```
* with the recommended scaling. (To scale up 12-bit sample data further, an
``` |
||

88 | ```
* intermediate int32 array would be needed.)
``` |
||

89 | ```
*
``` |
||

90 | ```
* To avoid overflow of the 32-bit intermediate results in pass 2, we must
``` |
||

91 | ```
* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
``` |
||

92 | ```
* shows that the values given below are the most effective.
``` |
||

93 | ```
*/
``` |
||

94 | |||

95 | ```
#ifdef EIGHT_BIT_SAMPLES
``` |
||

96 | #define PASS1_BITS 2 |
||

97 | ```
#else
``` |
||

98 | #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ |
||

99 | ```
#endif
``` |
||

100 | |||

101 | 0c1a9eda | Zdenek Kabelac | #define ONE ((int32_t) 1) |

102 | de6d9b64 | Fabrice Bellard | |

103 | ```
#define CONST_SCALE (ONE << CONST_BITS)
``` |
||

104 | |||

105 | ```
/* Convert a positive real constant to an integer scaled by CONST_SCALE.
``` |
||

106 | ```
* IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
``` |
||

107 | ```
* you will pay a significant penalty in run time. In that case, figure
``` |
||

108 | ```
* the correct integer constant values and insert them by hand.
``` |
||

109 | ```
*/
``` |
||

110 | |||

111 | ```
/* Actually FIX is no longer used, we precomputed them all */
``` |
||

112 | 0c1a9eda | Zdenek Kabelac | #define FIX(x) ((int32_t) ((x) * CONST_SCALE + 0.5)) |

113 | de6d9b64 | Fabrice Bellard | |

114 | 0c1a9eda | Zdenek Kabelac | ```
/* Descale and correctly round an int32_t value that's scaled by N bits.
``` |

115 | de6d9b64 | Fabrice Bellard | ```
* We assume RIGHT_SHIFT rounds towards minus infinity, so adding
``` |

116 | ```
* the fudge factor is correct for either sign of X.
``` |
||

117 | ```
*/
``` |
||

118 | |||

119 | #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) |
||

120 | |||

121 | 0c1a9eda | Zdenek Kabelac | ```
/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
``` |

122 | de6d9b64 | Fabrice Bellard | ```
* For 8-bit samples with the recommended scaling, all the variable
``` |

123 | ```
* and constant values involved are no more than 16 bits wide, so a
``` |
||

124 | ```
* 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
``` |
||

125 | ```
* this provides a useful speedup on many machines.
``` |
||

126 | ```
* There is no way to specify a 16x16->32 multiply in portable C, but
``` |
||

127 | ```
* some C compilers will do the right thing if you provide the correct
``` |
||

128 | ```
* combination of casts.
``` |
||

129 | ```
* NB: for 12-bit samples, a full 32-bit multiplication will be needed.
``` |
||

130 | ```
*/
``` |
||

131 | |||

132 | ```
#ifdef EIGHT_BIT_SAMPLES
``` |
||

133 | #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ |
||

134 | 0c1a9eda | Zdenek Kabelac | #define MULTIPLY(var,const) (((int16_t) (var)) * ((int16_t) (const))) |

135 | de6d9b64 | Fabrice Bellard | ```
#endif
``` |

136 | #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ |
||

137 | 0c1a9eda | Zdenek Kabelac | #define MULTIPLY(var,const) (((int16_t) (var)) * ((int32_t) (const))) |

138 | de6d9b64 | Fabrice Bellard | ```
#endif
``` |

139 | ```
#endif
``` |
||

140 | |||

141 | #ifndef MULTIPLY /* default definition */ |
||

142 | #define MULTIPLY(var,const) ((var) * (const)) |
||

143 | ```
#endif
``` |
||

144 | |||

145 | |||

146 | ```
/*
``` |
||

147 | ```
Unlike our decoder where we approximate the FIXes, we need to use exact
``` |
||

148 | ```
ones here or successive P-frames will drift too much with Reference frame coding
``` |
||

149 | ```
*/
``` |
||

150 | #define FIX_0_211164243 1730 |
||

151 | #define FIX_0_275899380 2260 |
||

152 | #define FIX_0_298631336 2446 |
||

153 | #define FIX_0_390180644 3196 |
||

154 | #define FIX_0_509795579 4176 |
||

155 | #define FIX_0_541196100 4433 |
||

156 | #define FIX_0_601344887 4926 |
||

157 | #define FIX_0_765366865 6270 |
||

158 | #define FIX_0_785694958 6436 |
||

159 | #define FIX_0_899976223 7373 |
||

160 | #define FIX_1_061594337 8697 |
||

161 | #define FIX_1_111140466 9102 |
||

162 | #define FIX_1_175875602 9633 |
||

163 | #define FIX_1_306562965 10703 |
||

164 | #define FIX_1_387039845 11363 |
||

165 | #define FIX_1_451774981 11893 |
||

166 | #define FIX_1_501321110 12299 |
||

167 | #define FIX_1_662939225 13623 |
||

168 | #define FIX_1_847759065 15137 |
||

169 | #define FIX_1_961570560 16069 |
||

170 | #define FIX_2_053119869 16819 |
||

171 | #define FIX_2_172734803 17799 |
||

172 | #define FIX_2_562915447 20995 |
||

173 | #define FIX_3_072711026 25172 |
||

174 | |||

175 | ```
/*
``` |
||

176 | ```
* Perform the inverse DCT on one block of coefficients.
``` |
||

177 | ```
*/
``` |
||

178 | |||

179 | ```
void j_rev_dct(DCTBLOCK data)
``` |
||

180 | { |
||

181 | 0c1a9eda | Zdenek Kabelac | int32_t tmp0, tmp1, tmp2, tmp3; |

182 | int32_t tmp10, tmp11, tmp12, tmp13; |
||

183 | int32_t z1, z2, z3, z4, z5; |
||

184 | int32_t d0, d1, d2, d3, d4, d5, d6, d7; |
||

185 | de6d9b64 | Fabrice Bellard | ```
register DCTELEM *dataptr;
``` |

186 | ```
int rowctr;
``` |
||

187 | |||

188 | ```
/* Pass 1: process rows. */
``` |
||

189 | ```
/* Note results are scaled up by sqrt(8) compared to a true IDCT; */
``` |
||

190 | ```
/* furthermore, we scale the results by 2**PASS1_BITS. */
``` |
||

191 | |||

192 | dataptr = data; |
||

193 | |||

194 | for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { |
||

195 | ```
/* Due to quantization, we will usually find that many of the input
``` |
||

196 | ```
* coefficients are zero, especially the AC terms. We can exploit this
``` |
||

197 | ```
* by short-circuiting the IDCT calculation for any row in which all
``` |
||

198 | ```
* the AC terms are zero. In that case each output is equal to the
``` |
||

199 | ```
* DC coefficient (with scale factor as needed).
``` |
||

200 | ```
* With typical images and quantization tables, half or more of the
``` |
||

201 | ```
* row DCT calculations can be simplified this way.
``` |
||

202 | ```
*/
``` |
||

203 | |||

204 | register int *idataptr = (int*)dataptr; |
||

205 | |||

206 | 13b54752 | Fabrice Bellard | ```
/* WARNING: we do the same permutation as MMX idct to simplify the
``` |

207 | ```
video core */
``` |
||

208 | de6d9b64 | Fabrice Bellard | ```
d0 = dataptr[0];
``` |

209 | 13b54752 | Fabrice Bellard | ```
d2 = dataptr[1];
``` |

210 | ```
d4 = dataptr[2];
``` |
||

211 | ```
d6 = dataptr[3];
``` |
||

212 | ```
d1 = dataptr[4];
``` |
||

213 | ```
d3 = dataptr[5];
``` |
||

214 | ```
d5 = dataptr[6];
``` |
||

215 | de6d9b64 | Fabrice Bellard | ```
d7 = dataptr[7];
``` |

216 | |||

217 | 13b54752 | Fabrice Bellard | if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) { |

218 | de6d9b64 | Fabrice Bellard | ```
/* AC terms all zero */
``` |

219 | ```
if (d0) {
``` |
||

220 | ```
/* Compute a 32 bit value to assign. */
``` |
||

221 | DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); |
||

222 | register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); |
||

223 | |||

224 | ```
idataptr[0] = v;
``` |
||

225 | ```
idataptr[1] = v;
``` |
||

226 | ```
idataptr[2] = v;
``` |
||

227 | ```
idataptr[3] = v;
``` |
||

228 | } |
||

229 | |||

230 | ```
dataptr += DCTSIZE; /* advance pointer to next row */
``` |
||

231 | ```
continue;
``` |
||

232 | } |
||

233 | |||

234 | ```
/* Even part: reverse the even part of the forward DCT. */
``` |
||

235 | ```
/* The rotator is sqrt(2)*c(-6). */
``` |
||

236 | { |
||

237 | ```
if (d6) {
``` |
||

238 | ```
if (d4) {
``` |
||

239 | ```
if (d2) {
``` |
||

240 | ```
if (d0) {
``` |
||

241 | ```
/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
``` |
||

242 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

243 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

244 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

245 | |||

246 | tmp0 = (d0 + d4) << CONST_BITS; |
||

247 | tmp1 = (d0 - d4) << CONST_BITS; |
||

248 | |||

249 | tmp10 = tmp0 + tmp3; |
||

250 | tmp13 = tmp0 - tmp3; |
||

251 | tmp11 = tmp1 + tmp2; |
||

252 | tmp12 = tmp1 - tmp2; |
||

253 | ```
} else {
``` |
||

254 | ```
/* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
``` |
||

255 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

256 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

257 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

258 | |||

259 | tmp0 = d4 << CONST_BITS; |
||

260 | |||

261 | tmp10 = tmp0 + tmp3; |
||

262 | tmp13 = tmp0 - tmp3; |
||

263 | tmp11 = tmp2 - tmp0; |
||

264 | tmp12 = -(tmp0 + tmp2); |
||

265 | } |
||

266 | ```
} else {
``` |
||

267 | ```
if (d0) {
``` |
||

268 | ```
/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
``` |
||

269 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

270 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

271 | |||

272 | tmp0 = (d0 + d4) << CONST_BITS; |
||

273 | tmp1 = (d0 - d4) << CONST_BITS; |
||

274 | |||

275 | tmp10 = tmp0 + tmp3; |
||

276 | tmp13 = tmp0 - tmp3; |
||

277 | tmp11 = tmp1 + tmp2; |
||

278 | tmp12 = tmp1 - tmp2; |
||

279 | ```
} else {
``` |
||

280 | ```
/* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
``` |
||

281 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

282 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

283 | |||

284 | tmp0 = d4 << CONST_BITS; |
||

285 | |||

286 | tmp10 = tmp0 + tmp3; |
||

287 | tmp13 = tmp0 - tmp3; |
||

288 | tmp11 = tmp2 - tmp0; |
||

289 | tmp12 = -(tmp0 + tmp2); |
||

290 | } |
||

291 | } |
||

292 | ```
} else {
``` |
||

293 | ```
if (d2) {
``` |
||

294 | ```
if (d0) {
``` |
||

295 | ```
/* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
``` |
||

296 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

297 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

298 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

299 | |||

300 | tmp0 = d0 << CONST_BITS; |
||

301 | |||

302 | tmp10 = tmp0 + tmp3; |
||

303 | tmp13 = tmp0 - tmp3; |
||

304 | tmp11 = tmp0 + tmp2; |
||

305 | tmp12 = tmp0 - tmp2; |
||

306 | ```
} else {
``` |
||

307 | ```
/* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
``` |
||

308 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

309 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

310 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

311 | |||

312 | tmp10 = tmp3; |
||

313 | tmp13 = -tmp3; |
||

314 | tmp11 = tmp2; |
||

315 | tmp12 = -tmp2; |
||

316 | } |
||

317 | ```
} else {
``` |
||

318 | ```
if (d0) {
``` |
||

319 | ```
/* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
``` |
||

320 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

321 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

322 | |||

323 | tmp0 = d0 << CONST_BITS; |
||

324 | |||

325 | tmp10 = tmp0 + tmp3; |
||

326 | tmp13 = tmp0 - tmp3; |
||

327 | tmp11 = tmp0 + tmp2; |
||

328 | tmp12 = tmp0 - tmp2; |
||

329 | ```
} else {
``` |
||

330 | ```
/* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
``` |
||

331 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

332 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

333 | |||

334 | tmp10 = tmp3; |
||

335 | tmp13 = -tmp3; |
||

336 | tmp11 = tmp2; |
||

337 | tmp12 = -tmp2; |
||

338 | } |
||

339 | } |
||

340 | } |
||

341 | ```
} else {
``` |
||

342 | ```
if (d4) {
``` |
||

343 | ```
if (d2) {
``` |
||

344 | ```
if (d0) {
``` |
||

345 | ```
/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
``` |
||

346 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

347 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

348 | |||

349 | tmp0 = (d0 + d4) << CONST_BITS; |
||

350 | tmp1 = (d0 - d4) << CONST_BITS; |
||

351 | |||

352 | tmp10 = tmp0 + tmp3; |
||

353 | tmp13 = tmp0 - tmp3; |
||

354 | tmp11 = tmp1 + tmp2; |
||

355 | tmp12 = tmp1 - tmp2; |
||

356 | ```
} else {
``` |
||

357 | ```
/* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
``` |
||

358 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

359 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

360 | |||

361 | tmp0 = d4 << CONST_BITS; |
||

362 | |||

363 | tmp10 = tmp0 + tmp3; |
||

364 | tmp13 = tmp0 - tmp3; |
||

365 | tmp11 = tmp2 - tmp0; |
||

366 | tmp12 = -(tmp0 + tmp2); |
||

367 | } |
||

368 | ```
} else {
``` |
||

369 | ```
if (d0) {
``` |
||

370 | ```
/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
``` |
||

371 | tmp10 = tmp13 = (d0 + d4) << CONST_BITS; |
||

372 | tmp11 = tmp12 = (d0 - d4) << CONST_BITS; |
||

373 | ```
} else {
``` |
||

374 | ```
/* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
``` |
||

375 | tmp10 = tmp13 = d4 << CONST_BITS; |
||

376 | tmp11 = tmp12 = -tmp10; |
||

377 | } |
||

378 | } |
||

379 | ```
} else {
``` |
||

380 | ```
if (d2) {
``` |
||

381 | ```
if (d0) {
``` |
||

382 | ```
/* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
``` |
||

383 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

384 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

385 | |||

386 | tmp0 = d0 << CONST_BITS; |
||

387 | |||

388 | tmp10 = tmp0 + tmp3; |
||

389 | tmp13 = tmp0 - tmp3; |
||

390 | tmp11 = tmp0 + tmp2; |
||

391 | tmp12 = tmp0 - tmp2; |
||

392 | ```
} else {
``` |
||

393 | ```
/* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
``` |
||

394 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

395 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

396 | |||

397 | tmp10 = tmp3; |
||

398 | tmp13 = -tmp3; |
||

399 | tmp11 = tmp2; |
||

400 | tmp12 = -tmp2; |
||

401 | } |
||

402 | ```
} else {
``` |
||

403 | ```
if (d0) {
``` |
||

404 | ```
/* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
``` |
||

405 | tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; |
||

406 | ```
} else {
``` |
||

407 | ```
/* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
``` |
||

408 | ```
tmp10 = tmp13 = tmp11 = tmp12 = 0;
``` |
||

409 | } |
||

410 | } |
||

411 | } |
||

412 | } |
||

413 | |||

414 | ```
/* Odd part per figure 8; the matrix is unitary and hence its
``` |
||

415 | ```
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
``` |
||

416 | ```
*/
``` |
||

417 | |||

418 | ```
if (d7) {
``` |
||

419 | ```
if (d5) {
``` |
||

420 | ```
if (d3) {
``` |
||

421 | ```
if (d1) {
``` |
||

422 | ```
/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
``` |
||

423 | z1 = d7 + d1; |
||

424 | z2 = d5 + d3; |
||

425 | z3 = d7 + d3; |
||

426 | z4 = d5 + d1; |
||

427 | z5 = MULTIPLY(z3 + z4, FIX_1_175875602); |
||

428 | |||

429 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

430 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

431 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

432 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

433 | z1 = MULTIPLY(-z1, FIX_0_899976223); |
||

434 | z2 = MULTIPLY(-z2, FIX_2_562915447); |
||

435 | z3 = MULTIPLY(-z3, FIX_1_961570560); |
||

436 | z4 = MULTIPLY(-z4, FIX_0_390180644); |
||

437 | |||

438 | z3 += z5; |
||

439 | z4 += z5; |
||

440 | |||

441 | tmp0 += z1 + z3; |
||

442 | tmp1 += z2 + z4; |
||

443 | tmp2 += z2 + z3; |
||

444 | tmp3 += z1 + z4; |
||

445 | ```
} else {
``` |
||

446 | ```
/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
``` |
||

447 | z2 = d5 + d3; |
||

448 | z3 = d7 + d3; |
||

449 | z5 = MULTIPLY(z3 + d5, FIX_1_175875602); |
||

450 | |||

451 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

452 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

453 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

454 | z1 = MULTIPLY(-d7, FIX_0_899976223); |
||

455 | z2 = MULTIPLY(-z2, FIX_2_562915447); |
||

456 | z3 = MULTIPLY(-z3, FIX_1_961570560); |
||

457 | z4 = MULTIPLY(-d5, FIX_0_390180644); |
||

458 | |||

459 | z3 += z5; |
||

460 | z4 += z5; |
||

461 | |||

462 | tmp0 += z1 + z3; |
||

463 | tmp1 += z2 + z4; |
||

464 | tmp2 += z2 + z3; |
||

465 | tmp3 = z1 + z4; |
||

466 | } |
||

467 | ```
} else {
``` |
||

468 | ```
if (d1) {
``` |
||

469 | ```
/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
``` |
||

470 | z1 = d7 + d1; |
||

471 | z4 = d5 + d1; |
||

472 | z5 = MULTIPLY(d7 + z4, FIX_1_175875602); |
||

473 | |||

474 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

475 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

476 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

477 | z1 = MULTIPLY(-z1, FIX_0_899976223); |
||

478 | z2 = MULTIPLY(-d5, FIX_2_562915447); |
||

479 | z3 = MULTIPLY(-d7, FIX_1_961570560); |
||

480 | z4 = MULTIPLY(-z4, FIX_0_390180644); |
||

481 | |||

482 | z3 += z5; |
||

483 | z4 += z5; |
||

484 | |||

485 | tmp0 += z1 + z3; |
||

486 | tmp1 += z2 + z4; |
||

487 | tmp2 = z2 + z3; |
||

488 | tmp3 += z1 + z4; |
||

489 | ```
} else {
``` |
||

490 | ```
/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
``` |
||

491 | tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
||

492 | z1 = MULTIPLY(-d7, FIX_0_899976223); |
||

493 | z3 = MULTIPLY(-d7, FIX_1_961570560); |
||

494 | tmp1 = MULTIPLY(-d5, FIX_0_509795579); |
||

495 | z2 = MULTIPLY(-d5, FIX_2_562915447); |
||

496 | z4 = MULTIPLY(-d5, FIX_0_390180644); |
||

497 | z5 = MULTIPLY(d5 + d7, FIX_1_175875602); |
||

498 | |||

499 | z3 += z5; |
||

500 | z4 += z5; |
||

501 | |||

502 | tmp0 += z3; |
||

503 | tmp1 += z4; |
||

504 | tmp2 = z2 + z3; |
||

505 | tmp3 = z1 + z4; |
||

506 | } |
||

507 | } |
||

508 | ```
} else {
``` |
||

509 | ```
if (d3) {
``` |
||

510 | ```
if (d1) {
``` |
||

511 | ```
/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
``` |
||

512 | z1 = d7 + d1; |
||

513 | z3 = d7 + d3; |
||

514 | z5 = MULTIPLY(z3 + d1, FIX_1_175875602); |
||

515 | |||

516 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

517 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

518 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

519 | z1 = MULTIPLY(-z1, FIX_0_899976223); |
||

520 | z2 = MULTIPLY(-d3, FIX_2_562915447); |
||

521 | z3 = MULTIPLY(-z3, FIX_1_961570560); |
||

522 | z4 = MULTIPLY(-d1, FIX_0_390180644); |
||

523 | |||

524 | z3 += z5; |
||

525 | z4 += z5; |
||

526 | |||

527 | tmp0 += z1 + z3; |
||

528 | tmp1 = z2 + z4; |
||

529 | tmp2 += z2 + z3; |
||

530 | tmp3 += z1 + z4; |
||

531 | ```
} else {
``` |
||

532 | ```
/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
``` |
||

533 | z3 = d7 + d3; |
||

534 | |||

535 | tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
||

536 | z1 = MULTIPLY(-d7, FIX_0_899976223); |
||

537 | tmp2 = MULTIPLY(d3, FIX_0_509795579); |
||

538 | z2 = MULTIPLY(-d3, FIX_2_562915447); |
||

539 | z5 = MULTIPLY(z3, FIX_1_175875602); |
||

540 | z3 = MULTIPLY(-z3, FIX_0_785694958); |
||

541 | |||

542 | tmp0 += z3; |
||

543 | tmp1 = z2 + z5; |
||

544 | tmp2 += z3; |
||

545 | tmp3 = z1 + z5; |
||

546 | } |
||

547 | ```
} else {
``` |
||

548 | ```
if (d1) {
``` |
||

549 | ```
/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
``` |
||

550 | z1 = d7 + d1; |
||

551 | z5 = MULTIPLY(z1, FIX_1_175875602); |
||

552 | |||

553 | z1 = MULTIPLY(z1, FIX_0_275899380); |
||

554 | z3 = MULTIPLY(-d7, FIX_1_961570560); |
||

555 | tmp0 = MULTIPLY(-d7, FIX_1_662939225); |
||

556 | z4 = MULTIPLY(-d1, FIX_0_390180644); |
||

557 | tmp3 = MULTIPLY(d1, FIX_1_111140466); |
||

558 | |||

559 | tmp0 += z1; |
||

560 | tmp1 = z4 + z5; |
||

561 | tmp2 = z3 + z5; |
||

562 | tmp3 += z1; |
||

563 | ```
} else {
``` |
||

564 | ```
/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
``` |
||

565 | tmp0 = MULTIPLY(-d7, FIX_1_387039845); |
||

566 | tmp1 = MULTIPLY(d7, FIX_1_175875602); |
||

567 | tmp2 = MULTIPLY(-d7, FIX_0_785694958); |
||

568 | tmp3 = MULTIPLY(d7, FIX_0_275899380); |
||

569 | } |
||

570 | } |
||

571 | } |
||

572 | ```
} else {
``` |
||

573 | ```
if (d5) {
``` |
||

574 | ```
if (d3) {
``` |
||

575 | ```
if (d1) {
``` |
||

576 | ```
/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
``` |
||

577 | z2 = d5 + d3; |
||

578 | z4 = d5 + d1; |
||

579 | z5 = MULTIPLY(d3 + z4, FIX_1_175875602); |
||

580 | |||

581 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

582 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

583 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

584 | z1 = MULTIPLY(-d1, FIX_0_899976223); |
||

585 | z2 = MULTIPLY(-z2, FIX_2_562915447); |
||

586 | z3 = MULTIPLY(-d3, FIX_1_961570560); |
||

587 | z4 = MULTIPLY(-z4, FIX_0_390180644); |
||

588 | |||

589 | z3 += z5; |
||

590 | z4 += z5; |
||

591 | |||

592 | tmp0 = z1 + z3; |
||

593 | tmp1 += z2 + z4; |
||

594 | tmp2 += z2 + z3; |
||

595 | tmp3 += z1 + z4; |
||

596 | ```
} else {
``` |
||

597 | ```
/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
``` |
||

598 | z2 = d5 + d3; |
||

599 | |||

600 | z5 = MULTIPLY(z2, FIX_1_175875602); |
||

601 | tmp1 = MULTIPLY(d5, FIX_1_662939225); |
||

602 | z4 = MULTIPLY(-d5, FIX_0_390180644); |
||

603 | z2 = MULTIPLY(-z2, FIX_1_387039845); |
||

604 | tmp2 = MULTIPLY(d3, FIX_1_111140466); |
||

605 | z3 = MULTIPLY(-d3, FIX_1_961570560); |
||

606 | |||

607 | tmp0 = z3 + z5; |
||

608 | tmp1 += z2; |
||

609 | tmp2 += z2; |
||

610 | tmp3 = z4 + z5; |
||

611 | } |
||

612 | ```
} else {
``` |
||

613 | ```
if (d1) {
``` |
||

614 | ```
/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
``` |
||

615 | z4 = d5 + d1; |
||

616 | |||

617 | z5 = MULTIPLY(z4, FIX_1_175875602); |
||

618 | z1 = MULTIPLY(-d1, FIX_0_899976223); |
||

619 | tmp3 = MULTIPLY(d1, FIX_0_601344887); |
||

620 | tmp1 = MULTIPLY(-d5, FIX_0_509795579); |
||

621 | z2 = MULTIPLY(-d5, FIX_2_562915447); |
||

622 | z4 = MULTIPLY(z4, FIX_0_785694958); |
||

623 | |||

624 | tmp0 = z1 + z5; |
||

625 | tmp1 += z4; |
||

626 | tmp2 = z2 + z5; |
||

627 | tmp3 += z4; |
||

628 | ```
} else {
``` |
||

629 | ```
/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
``` |
||

630 | tmp0 = MULTIPLY(d5, FIX_1_175875602); |
||

631 | tmp1 = MULTIPLY(d5, FIX_0_275899380); |
||

632 | tmp2 = MULTIPLY(-d5, FIX_1_387039845); |
||

633 | tmp3 = MULTIPLY(d5, FIX_0_785694958); |
||

634 | } |
||

635 | } |
||

636 | ```
} else {
``` |
||

637 | ```
if (d3) {
``` |
||

638 | ```
if (d1) {
``` |
||

639 | ```
/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
``` |
||

640 | z5 = d1 + d3; |
||

641 | tmp3 = MULTIPLY(d1, FIX_0_211164243); |
||

642 | tmp2 = MULTIPLY(-d3, FIX_1_451774981); |
||

643 | z1 = MULTIPLY(d1, FIX_1_061594337); |
||

644 | z2 = MULTIPLY(-d3, FIX_2_172734803); |
||

645 | z4 = MULTIPLY(z5, FIX_0_785694958); |
||

646 | z5 = MULTIPLY(z5, FIX_1_175875602); |
||

647 | |||

648 | tmp0 = z1 - z4; |
||

649 | tmp1 = z2 + z4; |
||

650 | tmp2 += z5; |
||

651 | tmp3 += z5; |
||

652 | ```
} else {
``` |
||

653 | ```
/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
``` |
||

654 | tmp0 = MULTIPLY(-d3, FIX_0_785694958); |
||

655 | tmp1 = MULTIPLY(-d3, FIX_1_387039845); |
||

656 | tmp2 = MULTIPLY(-d3, FIX_0_275899380); |
||

657 | tmp3 = MULTIPLY(d3, FIX_1_175875602); |
||

658 | } |
||

659 | ```
} else {
``` |
||

660 | ```
if (d1) {
``` |
||

661 | ```
/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
``` |
||

662 | tmp0 = MULTIPLY(d1, FIX_0_275899380); |
||

663 | tmp1 = MULTIPLY(d1, FIX_0_785694958); |
||

664 | tmp2 = MULTIPLY(d1, FIX_1_175875602); |
||

665 | tmp3 = MULTIPLY(d1, FIX_1_387039845); |
||

666 | ```
} else {
``` |
||

667 | ```
/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
``` |
||

668 | ```
tmp0 = tmp1 = tmp2 = tmp3 = 0;
``` |
||

669 | } |
||

670 | } |
||

671 | } |
||

672 | } |
||

673 | } |
||

674 | ```
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
``` |
||

675 | |||

676 | ```
dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
``` |
||

677 | ```
dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
``` |
||

678 | ```
dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
``` |
||

679 | ```
dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
``` |
||

680 | ```
dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
``` |
||

681 | ```
dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
``` |
||

682 | ```
dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
``` |
||

683 | ```
dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
``` |
||

684 | |||

685 | ```
dataptr += DCTSIZE; /* advance pointer to next row */
``` |
||

686 | } |
||

687 | |||

688 | ```
/* Pass 2: process columns. */
``` |
||

689 | ```
/* Note that we must descale the results by a factor of 8 == 2**3, */
``` |
||

690 | ```
/* and also undo the PASS1_BITS scaling. */
``` |
||

691 | |||

692 | dataptr = data; |
||

693 | for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { |
||

694 | ```
/* Columns of zeroes can be exploited in the same way as we did with rows.
``` |
||

695 | ```
* However, the row calculation has created many nonzero AC terms, so the
``` |
||

696 | ```
* simplification applies less often (typically 5% to 10% of the time).
``` |
||

697 | ```
* On machines with very fast multiplication, it's possible that the
``` |
||

698 | ```
* test takes more time than it's worth. In that case this section
``` |
||

699 | ```
* may be commented out.
``` |
||

700 | ```
*/
``` |
||

701 | |||

702 | ```
d0 = dataptr[DCTSIZE*0];
``` |
||

703 | ```
d1 = dataptr[DCTSIZE*1];
``` |
||

704 | ```
d2 = dataptr[DCTSIZE*2];
``` |
||

705 | ```
d3 = dataptr[DCTSIZE*3];
``` |
||

706 | ```
d4 = dataptr[DCTSIZE*4];
``` |
||

707 | ```
d5 = dataptr[DCTSIZE*5];
``` |
||

708 | ```
d6 = dataptr[DCTSIZE*6];
``` |
||

709 | ```
d7 = dataptr[DCTSIZE*7];
``` |
||

710 | |||

711 | ```
/* Even part: reverse the even part of the forward DCT. */
``` |
||

712 | ```
/* The rotator is sqrt(2)*c(-6). */
``` |
||

713 | ```
if (d6) {
``` |
||

714 | ```
if (d4) {
``` |
||

715 | ```
if (d2) {
``` |
||

716 | ```
if (d0) {
``` |
||

717 | ```
/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
``` |
||

718 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

719 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

720 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

721 | |||

722 | tmp0 = (d0 + d4) << CONST_BITS; |
||

723 | tmp1 = (d0 - d4) << CONST_BITS; |
||

724 | |||

725 | tmp10 = tmp0 + tmp3; |
||

726 | tmp13 = tmp0 - tmp3; |
||

727 | tmp11 = tmp1 + tmp2; |
||

728 | tmp12 = tmp1 - tmp2; |
||

729 | ```
} else {
``` |
||

730 | ```
/* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
``` |
||

731 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

732 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

733 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

734 | |||

735 | tmp0 = d4 << CONST_BITS; |
||

736 | |||

737 | tmp10 = tmp0 + tmp3; |
||

738 | tmp13 = tmp0 - tmp3; |
||

739 | tmp11 = tmp2 - tmp0; |
||

740 | tmp12 = -(tmp0 + tmp2); |
||

741 | } |
||

742 | ```
} else {
``` |
||

743 | ```
if (d0) {
``` |
||

744 | ```
/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
``` |
||

745 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

746 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

747 | |||

748 | tmp0 = (d0 + d4) << CONST_BITS; |
||

749 | tmp1 = (d0 - d4) << CONST_BITS; |
||

750 | |||

751 | tmp10 = tmp0 + tmp3; |
||

752 | tmp13 = tmp0 - tmp3; |
||

753 | tmp11 = tmp1 + tmp2; |
||

754 | tmp12 = tmp1 - tmp2; |
||

755 | ```
} else {
``` |
||

756 | ```
/* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
``` |
||

757 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

758 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

759 | |||

760 | tmp0 = d4 << CONST_BITS; |
||

761 | |||

762 | tmp10 = tmp0 + tmp3; |
||

763 | tmp13 = tmp0 - tmp3; |
||

764 | tmp11 = tmp2 - tmp0; |
||

765 | tmp12 = -(tmp0 + tmp2); |
||

766 | } |
||

767 | } |
||

768 | ```
} else {
``` |
||

769 | ```
if (d2) {
``` |
||

770 | ```
if (d0) {
``` |
||

771 | ```
/* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
``` |
||

772 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

773 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

774 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

775 | |||

776 | tmp0 = d0 << CONST_BITS; |
||

777 | |||

778 | tmp10 = tmp0 + tmp3; |
||

779 | tmp13 = tmp0 - tmp3; |
||

780 | tmp11 = tmp0 + tmp2; |
||

781 | tmp12 = tmp0 - tmp2; |
||

782 | ```
} else {
``` |
||

783 | ```
/* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
``` |
||

784 | z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
||

785 | tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
||

786 | tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
||

787 | |||

788 | tmp10 = tmp3; |
||

789 | tmp13 = -tmp3; |
||

790 | tmp11 = tmp2; |
||

791 | tmp12 = -tmp2; |
||

792 | } |
||

793 | ```
} else {
``` |
||

794 | ```
if (d0) {
``` |
||

795 | ```
/* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
``` |
||

796 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

797 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

798 | |||

799 | tmp0 = d0 << CONST_BITS; |
||

800 | |||

801 | tmp10 = tmp0 + tmp3; |
||

802 | tmp13 = tmp0 - tmp3; |
||

803 | tmp11 = tmp0 + tmp2; |
||

804 | tmp12 = tmp0 - tmp2; |
||

805 | ```
} else {
``` |
||

806 | ```
/* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
``` |
||

807 | tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
||

808 | tmp3 = MULTIPLY(d6, FIX_0_541196100); |
||

809 | |||

810 | tmp10 = tmp3; |
||

811 | tmp13 = -tmp3; |
||

812 | tmp11 = tmp2; |
||

813 | tmp12 = -tmp2; |
||

814 | } |
||

815 | } |
||

816 | } |
||

817 | ```
} else {
``` |
||

818 | ```
if (d4) {
``` |
||

819 | ```
if (d2) {
``` |
||

820 | ```
if (d0) {
``` |
||

821 | ```
/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
``` |
||

822 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

823 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

824 | |||

825 | tmp0 = (d0 + d4) << CONST_BITS; |
||

826 | tmp1 = (d0 - d4) << CONST_BITS; |
||

827 | |||

828 | tmp10 = tmp0 + tmp3; |
||

829 | tmp13 = tmp0 - tmp3; |
||

830 | tmp11 = tmp1 + tmp2; |
||

831 | tmp12 = tmp1 - tmp2; |
||

832 | ```
} else {
``` |
||

833 | ```
/* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
``` |
||

834 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

835 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

836 | |||

837 | tmp0 = d4 << CONST_BITS; |
||

838 | |||

839 | tmp10 = tmp0 + tmp3; |
||

840 | tmp13 = tmp0 - tmp3; |
||

841 | tmp11 = tmp2 - tmp0; |
||

842 | tmp12 = -(tmp0 + tmp2); |
||

843 | } |
||

844 | ```
} else {
``` |
||

845 | ```
if (d0) {
``` |
||

846 | ```
/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
``` |
||

847 | tmp10 = tmp13 = (d0 + d4) << CONST_BITS; |
||

848 | tmp11 = tmp12 = (d0 - d4) << CONST_BITS; |
||

849 | ```
} else {
``` |
||

850 | ```
/* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
``` |
||

851 | tmp10 = tmp13 = d4 << CONST_BITS; |
||

852 | tmp11 = tmp12 = -tmp10; |
||

853 | } |
||

854 | } |
||

855 | ```
} else {
``` |
||

856 | ```
if (d2) {
``` |
||

857 | ```
if (d0) {
``` |
||

858 | ```
/* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
``` |
||

859 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

860 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

861 | |||

862 | tmp0 = d0 << CONST_BITS; |
||

863 | |||

864 | tmp10 = tmp0 + tmp3; |
||

865 | tmp13 = tmp0 - tmp3; |
||

866 | tmp11 = tmp0 + tmp2; |
||

867 | tmp12 = tmp0 - tmp2; |
||

868 | ```
} else {
``` |
||

869 | ```
/* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
``` |
||

870 | tmp2 = MULTIPLY(d2, FIX_0_541196100); |
||

871 | tmp3 = MULTIPLY(d2, FIX_1_306562965); |
||

872 | |||

873 | tmp10 = tmp3; |
||

874 | tmp13 = -tmp3; |
||

875 | tmp11 = tmp2; |
||

876 | tmp12 = -tmp2; |
||

877 | } |
||

878 | ```
} else {
``` |
||

879 | ```
if (d0) {
``` |
||

880 | ```
/* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
``` |
||

881 | tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; |
||

882 | ```
} else {
``` |
||

883 | ```
/* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
``` |
||

884 | ```
tmp10 = tmp13 = tmp11 = tmp12 = 0;
``` |
||

885 | } |
||

886 | } |
||

887 | } |
||

888 | } |
||

889 | |||

890 | ```
/* Odd part per figure 8; the matrix is unitary and hence its
``` |
||

891 | ```
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
``` |
||

892 | ```
*/
``` |
||

893 | ```
if (d7) {
``` |
||

894 | ```
if (d5) {
``` |
||

895 | ```
if (d3) {
``` |
||

896 | ```
if (d1) {
``` |
||

897 | ```
/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
``` |
||

898 | z1 = d7 + d1; |
||

899 | z2 = d5 + d3; |
||

900 | z3 = d7 + d3; |
||

901 | z4 = d5 + d1; |
||

902 | z5 = MULTIPLY(z3 + z4, FIX_1_175875602); |
||

903 | |||

904 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

905 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

906 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

907 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

908 | z1 = MULTIPLY(-z1, FIX_0_899976223); |
||

909 | z2 = MULTIPLY(-z2, FIX_2_562915447); |
||

910 | z3 = MULTIPLY(-z3, FIX_1_961570560); |
||

911 | z4 = MULTIPLY(-z4, FIX_0_390180644); |
||

912 | |||

913 | z3 += z5; |
||

914 | z4 += z5; |
||

915 | |||

916 | tmp0 += z1 + z3; |
||

917 | tmp1 += z2 + z4; |
||

918 | tmp2 += z2 + z3; |
||

919 | tmp3 += z1 + z4; |
||

920 | ```
} else {
``` |
||

921 | ```
/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
``` |
||

922 | z1 = d7; |
||

923 | z2 = d5 + d3; |
||

924 | z3 = d7 + d3; |
||

925 | z5 = MULTIPLY(z3 + d5, FIX_1_175875602); |
||

926 | |||

927 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

928 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

929 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

930 | z1 = MULTIPLY(-d7, FIX_0_899976223); |
||

931 | z2 = MULTIPLY(-z2, FIX_2_562915447); |
||

932 | z3 = MULTIPLY(-z3, FIX_1_961570560); |
||

933 | z4 = MULTIPLY(-d5, FIX_0_390180644); |
||

934 | |||

935 | z3 += z5; |
||

936 | z4 += z5; |
||

937 | |||

938 | tmp0 += z1 + z3; |
||

939 | tmp1 += z2 + z4; |
||

940 | tmp2 += z2 + z3; |
||

941 | tmp3 = z1 + z4; |
||

942 | } |
||

943 | ```
} else {
``` |
||

944 | ```
if (d1) {
``` |
||

945 | ```
/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
``` |
||

946 | z1 = d7 + d1; |
||

947 | z2 = d5; |
||

948 | z3 = d7; |
||

949 | z4 = d5 + d1; |
||

950 | z5 = MULTIPLY(z3 + z4, FIX_1_175875602); |
||

951 | |||

952 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

953 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

954 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

955 | z1 = MULTIPLY(-z1, FIX_0_899976223); |
||

956 | z2 = MULTIPLY(-d5, FIX_2_562915447); |
||

957 | z3 = MULTIPLY(-d7, FIX_1_961570560); |
||

958 | z4 = MULTIPLY(-z4, FIX_0_390180644); |
||

959 | |||

960 | z3 += z5; |
||

961 | z4 += z5; |
||

962 | |||

963 | tmp0 += z1 + z3; |
||

964 | tmp1 += z2 + z4; |
||

965 | tmp2 = z2 + z3; |
||

966 | tmp3 += z1 + z4; |
||

967 | ```
} else {
``` |
||

968 | ```
/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
``` |
||

969 | tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
||

970 | z1 = MULTIPLY(-d7, FIX_0_899976223); |
||

971 | z3 = MULTIPLY(-d7, FIX_1_961570560); |
||

972 | tmp1 = MULTIPLY(-d5, FIX_0_509795579); |
||

973 | z2 = MULTIPLY(-d5, FIX_2_562915447); |
||

974 | z4 = MULTIPLY(-d5, FIX_0_390180644); |
||

975 | z5 = MULTIPLY(d5 + d7, FIX_1_175875602); |
||

976 | |||

977 | z3 += z5; |
||

978 | z4 += z5; |
||

979 | |||

980 | tmp0 += z3; |
||

981 | tmp1 += z4; |
||

982 | tmp2 = z2 + z3; |
||

983 | tmp3 = z1 + z4; |
||

984 | } |
||

985 | } |
||

986 | ```
} else {
``` |
||

987 | ```
if (d3) {
``` |
||

988 | ```
if (d1) {
``` |
||

989 | ```
/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
``` |
||

990 | z1 = d7 + d1; |
||

991 | z3 = d7 + d3; |
||

992 | z5 = MULTIPLY(z3 + d1, FIX_1_175875602); |
||

993 | |||

994 | tmp0 = MULTIPLY(d7, FIX_0_298631336); |
||

995 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

996 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

997 | z1 = MULTIPLY(-z1, FIX_0_899976223); |
||

998 | z2 = MULTIPLY(-d3, FIX_2_562915447); |
||

999 | z3 = MULTIPLY(-z3, FIX_1_961570560); |
||

1000 | z4 = MULTIPLY(-d1, FIX_0_390180644); |
||

1001 | |||

1002 | z3 += z5; |
||

1003 | z4 += z5; |
||

1004 | |||

1005 | tmp0 += z1 + z3; |
||

1006 | tmp1 = z2 + z4; |
||

1007 | tmp2 += z2 + z3; |
||

1008 | tmp3 += z1 + z4; |
||

1009 | ```
} else {
``` |
||

1010 | ```
/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
``` |
||

1011 | z3 = d7 + d3; |
||

1012 | |||

1013 | tmp0 = MULTIPLY(-d7, FIX_0_601344887); |
||

1014 | z1 = MULTIPLY(-d7, FIX_0_899976223); |
||

1015 | tmp2 = MULTIPLY(d3, FIX_0_509795579); |
||

1016 | z2 = MULTIPLY(-d3, FIX_2_562915447); |
||

1017 | z5 = MULTIPLY(z3, FIX_1_175875602); |
||

1018 | z3 = MULTIPLY(-z3, FIX_0_785694958); |
||

1019 | |||

1020 | tmp0 += z3; |
||

1021 | tmp1 = z2 + z5; |
||

1022 | tmp2 += z3; |
||

1023 | tmp3 = z1 + z5; |
||

1024 | } |
||

1025 | ```
} else {
``` |
||

1026 | ```
if (d1) {
``` |
||

1027 | ```
/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
``` |
||

1028 | z1 = d7 + d1; |
||

1029 | z5 = MULTIPLY(z1, FIX_1_175875602); |
||

1030 | |||

1031 | z1 = MULTIPLY(z1, FIX_0_275899380); |
||

1032 | z3 = MULTIPLY(-d7, FIX_1_961570560); |
||

1033 | tmp0 = MULTIPLY(-d7, FIX_1_662939225); |
||

1034 | z4 = MULTIPLY(-d1, FIX_0_390180644); |
||

1035 | tmp3 = MULTIPLY(d1, FIX_1_111140466); |
||

1036 | |||

1037 | tmp0 += z1; |
||

1038 | tmp1 = z4 + z5; |
||

1039 | tmp2 = z3 + z5; |
||

1040 | tmp3 += z1; |
||

1041 | ```
} else {
``` |
||

1042 | ```
/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
``` |
||

1043 | tmp0 = MULTIPLY(-d7, FIX_1_387039845); |
||

1044 | tmp1 = MULTIPLY(d7, FIX_1_175875602); |
||

1045 | tmp2 = MULTIPLY(-d7, FIX_0_785694958); |
||

1046 | tmp3 = MULTIPLY(d7, FIX_0_275899380); |
||

1047 | } |
||

1048 | } |
||

1049 | } |
||

1050 | ```
} else {
``` |
||

1051 | ```
if (d5) {
``` |
||

1052 | ```
if (d3) {
``` |
||

1053 | ```
if (d1) {
``` |
||

1054 | ```
/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
``` |
||

1055 | z2 = d5 + d3; |
||

1056 | z4 = d5 + d1; |
||

1057 | z5 = MULTIPLY(d3 + z4, FIX_1_175875602); |
||

1058 | |||

1059 | tmp1 = MULTIPLY(d5, FIX_2_053119869); |
||

1060 | tmp2 = MULTIPLY(d3, FIX_3_072711026); |
||

1061 | tmp3 = MULTIPLY(d1, FIX_1_501321110); |
||

1062 | z1 = MULTIPLY(-d1, FIX_0_899976223); |
||

1063 | z2 = MULTIPLY(-z2, FIX_2_562915447); |
||

1064 | z3 = MULTIPLY(-d3, FIX_1_961570560); |
||

1065 | z4 = MULTIPLY(-z4, FIX_0_390180644); |
||

1066 | |||

1067 | z3 += z5; |
||

1068 | z4 += z5; |
||

1069 | |||

1070 | tmp0 = z1 + z3; |
||

1071 | tmp1 += z2 + z4; |
||

1072 | tmp2 += z2 + z3; |
||

1073 | tmp3 += z1 + z4; |
||

1074 | ```
} else {
``` |
||

1075 | ```
/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
``` |
||

1076 | z2 = d5 + d3; |
||

1077 | |||

1078 | z5 = MULTIPLY(z2, FIX_1_175875602); |
||

1079 | tmp1 = MULTIPLY(d5, FIX_1_662939225); |
||

1080 | z4 = MULTIPLY(-d5, FIX_0_390180644); |
||

1081 | z2 = MULTIPLY(-z2, FIX_1_387039845); |
||

1082 | tmp2 = MULTIPLY(d3, FIX_1_111140466); |
||

1083 | z3 = MULTIPLY(-d3, FIX_1_961570560); |
||

1084 | |||

1085 | tmp0 = z3 + z5; |
||

1086 | tmp1 += z2; |
||

1087 | tmp2 += z2; |
||

1088 | tmp3 = z4 + z5; |
||

1089 | } |
||

1090 | ```
} else {
``` |
||

1091 | ```
if (d1) {
``` |
||

1092 | ```
/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
``` |
||

1093 | z4 = d5 + d1; |
||

1094 | |||

1095 | z5 = MULTIPLY(z4, FIX_1_175875602); |
||

1096 | z1 = MULTIPLY(-d1, FIX_0_899976223); |
||

1097 | tmp3 = MULTIPLY(d1, FIX_0_601344887); |
||

1098 | tmp1 = MULTIPLY(-d5, FIX_0_509795579); |
||

1099 | z2 = MULTIPLY(-d5, FIX_2_562915447); |
||

1100 | z4 = MULTIPLY(z4, FIX_0_785694958); |
||

1101 | |||

1102 | tmp0 = z1 + z5; |
||

1103 | tmp1 += z4; |
||

1104 | tmp2 = z2 + z5; |
||

1105 | tmp3 += z4; |
||

1106 | ```
} else {
``` |
||

1107 | ```
/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
``` |
||

1108 | tmp0 = MULTIPLY(d5, FIX_1_175875602); |
||

1109 | tmp1 = MULTIPLY(d5, FIX_0_275899380); |
||

1110 | tmp2 = MULTIPLY(-d5, FIX_1_387039845); |
||

1111 | tmp3 = MULTIPLY(d5, FIX_0_785694958); |
||

1112 | } |
||

1113 | } |
||

1114 | ```
} else {
``` |
||

1115 | ```
if (d3) {
``` |
||

1116 | ```
if (d1) {
``` |
||

1117 | ```
/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
``` |
||

1118 | z5 = d1 + d3; |
||

1119 | tmp3 = MULTIPLY(d1, FIX_0_211164243); |
||

1120 | tmp2 = MULTIPLY(-d3, FIX_1_451774981); |
||

1121 | z1 = MULTIPLY(d1, FIX_1_061594337); |
||

1122 | z2 = MULTIPLY(-d3, FIX_2_172734803); |
||

1123 | z4 = MULTIPLY(z5, FIX_0_785694958); |
||

1124 | z5 = MULTIPLY(z5, FIX_1_175875602); |
||

1125 | |||

1126 | tmp0 = z1 - z4; |
||

1127 | tmp1 = z2 + z4; |
||

1128 | tmp2 += z5; |
||

1129 | tmp3 += z5; |
||

1130 | ```
} else {
``` |
||

1131 | ```
/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
``` |
||

1132 | tmp0 = MULTIPLY(-d3, FIX_0_785694958); |
||

1133 | tmp1 = MULTIPLY(-d3, FIX_1_387039845); |
||

1134 | tmp2 = MULTIPLY(-d3, FIX_0_275899380); |
||

1135 | tmp3 = MULTIPLY(d3, FIX_1_175875602); |
||

1136 | } |
||

1137 | ```
} else {
``` |
||

1138 | ```
if (d1) {
``` |
||

1139 | ```
/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
``` |
||

1140 | tmp0 = MULTIPLY(d1, FIX_0_275899380); |
||

1141 | tmp1 = MULTIPLY(d1, FIX_0_785694958); |
||

1142 | tmp2 = MULTIPLY(d1, FIX_1_175875602); |
||

1143 | tmp3 = MULTIPLY(d1, FIX_1_387039845); |
||

1144 | ```
} else {
``` |
||

1145 | ```
/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
``` |
||

1146 | ```
tmp0 = tmp1 = tmp2 = tmp3 = 0;
``` |
||

1147 | } |
||

1148 | } |
||

1149 | } |
||

1150 | } |
||

1151 | |||

1152 | ```
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
``` |
||

1153 | |||

1154 | ```
dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
``` |
||

1155 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1156 | ```
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
``` |
||

1157 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1158 | ```
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
``` |
||

1159 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1160 | ```
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
``` |
||

1161 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1162 | ```
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
``` |
||

1163 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1164 | ```
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
``` |
||

1165 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1166 | ```
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
``` |
||

1167 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1168 | ```
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
``` |
||

1169 | ```
CONST_BITS+PASS1_BITS+3);
``` |
||

1170 | |||

1171 | ```
dataptr++; /* advance pointer to next column */
``` |
||

1172 | } |
||

1173 | } |
||

1174 | |||

1175 | cd4af68a | Zdenek Kabelac | ```
#undef FIX
``` |

1176 | `#undef CONST_BITS` |