/*----------------------------------------------------------------------------/ / TJpgDec - Tiny JPEG Decompressor R0.01c (C)ChaN, 2019 /-----------------------------------------------------------------------------/ / The TJpgDec is a generic JPEG decompressor module for tiny embedded systems. / This is a free software that opened for education, research and commercial / developments under license policy of following terms. / / Copyright (C) 2019, ChaN, all right reserved. / / * The TJpgDec module is a free software and there is NO WARRANTY. / * No restriction on use. You can use, modify and redistribute it for / personal, non-profit or commercial products UNDER YOUR RESPONSIBILITY. / * Redistributions of source code must retain the above copyright notice. / /-----------------------------------------------------------------------------/ / Oct 04, 2011 R0.01 First release. / Feb 19, 2012 R0.01a Fixed decompression fails when scan starts with an escape seq. / Sep 03, 2012 R0.01b Added JD_TBLCLIP option. / Mar 16, 2019 R0.01c Supprted stdint.h. /-----------------------------------------------------------------------------/ / original source is here : http://elm-chan.org/fsw/tjpgd/00index.html / / Modified for LGFX by lovyan03, 2020 / add support grayscale jpeg / add bayer pattern / tweak for 32bit processor /----------------------------------------------------------------------------*/ #include "lgfx_tjpgd.h" #include // for memcpy memset /*-----------------------------------------------*/ /* Zigzag-order to raster-order conversion table */ /*-----------------------------------------------*/ //#define ZIG(n) Zig[n] static const uint8_t Zig[64] = { /* Zigzag-order to raster-order conversion table */ 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 }; /*-------------------------------------------------*/ /* Input scale factor of Arai algorithm */ /* (scaled up 16 bits for fixed point operations) */ /*-------------------------------------------------*/ //#define IPSF(n) Ipsf[n] static const uint16_t Ipsf[64] = { /* See also aa_idct.png */ (uint16_t)(1.00000*8192), (uint16_t)(1.38704*8192), (uint16_t)(1.30656*8192), (uint16_t)(1.17588*8192), (uint16_t)(1.00000*8192), (uint16_t)(0.78570*8192), (uint16_t)(0.54120*8192), (uint16_t)(0.27590*8192), (uint16_t)(1.38704*8192), (uint16_t)(1.92388*8192), (uint16_t)(1.81226*8192), (uint16_t)(1.63099*8192), (uint16_t)(1.38704*8192), (uint16_t)(1.08979*8192), (uint16_t)(0.75066*8192), (uint16_t)(0.38268*8192), (uint16_t)(1.30656*8192), (uint16_t)(1.81226*8192), (uint16_t)(1.70711*8192), (uint16_t)(1.53636*8192), (uint16_t)(1.30656*8192), (uint16_t)(1.02656*8192), (uint16_t)(0.70711*8192), (uint16_t)(0.36048*8192), (uint16_t)(1.17588*8192), (uint16_t)(1.63099*8192), (uint16_t)(1.53636*8192), (uint16_t)(1.38268*8192), (uint16_t)(1.17588*8192), (uint16_t)(0.92388*8192), (uint16_t)(0.63638*8192), (uint16_t)(0.32442*8192), (uint16_t)(1.00000*8192), (uint16_t)(1.38704*8192), (uint16_t)(1.30656*8192), (uint16_t)(1.17588*8192), (uint16_t)(1.00000*8192), (uint16_t)(0.78570*8192), (uint16_t)(0.54120*8192), (uint16_t)(0.27590*8192), (uint16_t)(0.78570*8192), (uint16_t)(1.08979*8192), (uint16_t)(1.02656*8192), (uint16_t)(0.92388*8192), (uint16_t)(0.78570*8192), (uint16_t)(0.61732*8192), (uint16_t)(0.42522*8192), (uint16_t)(0.21677*8192), (uint16_t)(0.54120*8192), (uint16_t)(0.75066*8192), (uint16_t)(0.70711*8192), (uint16_t)(0.63638*8192), (uint16_t)(0.54120*8192), (uint16_t)(0.42522*8192), (uint16_t)(0.29290*8192), (uint16_t)(0.14932*8192), (uint16_t)(0.27590*8192), (uint16_t)(0.38268*8192), (uint16_t)(0.36048*8192), (uint16_t)(0.32442*8192), (uint16_t)(0.27590*8192), (uint16_t)(0.21678*8192), (uint16_t)(0.14932*8192), (uint16_t)(0.07612*8192) }; /*---------------------------------------------*/ /* Conversion table for fast clipping process */ /*---------------------------------------------*/ #if JD_TBLCLIP //#define BYTECLIP(v) Clip8[(uint16_t)(v) & 0x3FF] static const uint8_t Clip8[1024] = { /* 0..255 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, /* 256..511 */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, /* -512..-257 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* -256..-1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #else /* JD_TBLCLIP */ static inline int32_t BYTECLIP ( int32_t val ) { return (val < 0) ? 0 : (val > 255) ? 255 : val; } #endif /*---------------------------------------------*/ /* Output 4x4 bayer pattern table */ /*---------------------------------------------*/ #if JD_BAYER static const int8_t Bayer[16] = { 0, 4, 1, 5,-2, 2,-1, 3, 1, 5, 0, 4,-1, 3,-2, 2}; #endif /*-----------------------------------------------------------------------*/ /* Allocate a memory block from memory pool */ /*-----------------------------------------------------------------------*/ static uint8_t* alloc_pool ( /* Pointer to allocated memory block (NULL:no memory available) */ lgfxJdec* jd, /* Pointer to the decompressor object */ uint_fast16_t nd /* Number of bytes to allocate */ ) { uint8_t *rp = 0; nd = (nd + 3) & ~3; /* Align block size to the word boundary */ if (jd->sz_pool >= nd) { jd->sz_pool -= nd; rp = jd->pool; /* Get start of available memory pool */ jd->pool = (rp + nd); /* Allocate requierd bytes */ } return rp; /* Return allocated memory block (NULL:no memory to allocate) */ } /*-----------------------------------------------------------------------*/ /* Create de-quantization and prescaling tables with a DQT segment */ /*-----------------------------------------------------------------------*/ static int32_t create_qt_tbl ( /* 0:OK, !0:Failed */ lgfxJdec* jd, /* Pointer to the decompressor object */ const uint8_t* data, /* Pointer to the quantizer tables */ uint_fast16_t ndata /* Size of input data */ ) { const uint8_t* dataend = data + ndata; do { /* Process all tables in the segment */ size_t d = *data++; /* Get table property */ if (d & 0xF0) return JDR_FMT1; /* Err: not 8-bit resolution */ int32_t *pb = (int32_t*)alloc_pool(jd, 64 * sizeof (int32_t));/* Allocate a memory block for the table */ if (!pb) return JDR_MEM1; /* Err: not enough memory */ jd->qttbl[d & 3] = pb; /* Register the table */ for (size_t i = 0; i < 64; ++i) { /* Load the table */ uint_fast8_t z = Zig[i]; /* Zigzag-order to raster-order conversion */ pb[z] = (int32_t)((uint32_t)data[i] * Ipsf[z]); /* Apply scale factor of Arai algorithm to the de-quantizers */ } } while (dataend != (data += 64)); return JDR_OK; } /*-----------------------------------------------------------------------*/ /* Create huffman code tables with a DHT segment */ /*-----------------------------------------------------------------------*/ static int32_t create_huffman_tbl ( /* 0:OK, !0:Failed */ lgfxJdec* jd, /* Pointer to the decompressor object */ const uint8_t* data, /* Pointer to the packed huffman tables */ int_fast16_t ndata /* Size of input data */ ) { uint_fast16_t np; uint8_t *pb, *pd; uint16_t *ph; do { /* Process all tables in the segment */ uint_fast8_t d = *data++; /* Get table number and class */ if (d & 0xEE) return JDR_FMT1; /* Err: invalid class/number */ uint_fast8_t cls = d >> 4; /* class = dc(0)/ac(1), table number = 0/1 */ uint_fast8_t num = d & 0x0F; pb = alloc_pool(jd, 16); /* Allocate a memory block for the bit distribution table */ if (!pb) return JDR_MEM1; /* Err: not enough memory */ jd->huffbits[num][cls] = pb - 1; np = 0; size_t i = 0; do { /* Load number of patterns for 1 to 16-bit code */ np += (pb[i] = data[i]); /* Get sum of code words for each code */ } while (++i < 16); ph = (uint16_t*)alloc_pool(jd, np * sizeof (uint16_t));/* Allocate a memory block for the code word table */ if (!ph) return JDR_MEM1; /* Err: not enough memory */ jd->huffcode[num][cls] = ph - 1; uint_fast16_t hc = 0; i = 0; do { /* Re-build huffman code word table */ size_t b = pb[i]; while (b--) *ph++ = hc++; hc <<= 1; } while (++i < 16); pd = alloc_pool(jd, np); /* Allocate a memory block for the decoded data */ if (!pd) return JDR_MEM1; /* Err: not enough memory */ jd->huffdata[num][cls] = pd - 1; memcpy(pd, data += 16, np); /* Load decoded data corresponds to each code ward */ data += np; } while (ndata -= 17 + np); return JDR_OK; } /*-----------------------------------------------------------------------*/ /* Extract N bits from input stream */ /*-----------------------------------------------------------------------*/ static int32_t bitext ( /* >=0: extracted data, <0: error code */ lgfxJdec* jd, /* Pointer to the decompressor object */ uint_fast8_t nbit /* Number of bits to extract (1 to 11) */ ) { uint_fast8_t msk = jd->dbit; uint8_t *dp = jd->dptr; uint32_t w = *dp; if (msk < nbit) { do { /* Next byte? */ uint8_t *dpend = jd->dpend; if (++dp == dpend) { /* No input data is available, re-fill input buffer */ dp = jd->inbuf; /* Top of input buffer */ dpend = dp + jd->infunc(jd->device, dp, JD_SZBUF); if (dp == dpend) return 0 - (int32_t)JDR_INP; /* Err: read error or wrong stream termination */ jd->dpend = dpend; } uint_fast8_t s = *dp; w = (w << 8) + s; if (s == 0xff) { /* Is start of flag sequence? */ if (++dp == dpend) { /* No input data is available, re-fill input buffer */ dp = jd->inbuf; /* Top of input buffer */ dpend = dp + jd->infunc(jd->device, dp, JD_SZBUF); if (dp == dpend) return 0 - (int32_t)JDR_INP; /* Err: read error or wrong stream termination */ jd->dpend = dpend; } if (*dp != 0) return 0 - (int32_t)JDR_FMT1; /* Err: unexpected flag is detected (may be collapted data) */ *dp = 0xff; /* The flag is a data 0xFF */ } jd->dptr = dp; msk += 8; /* Read from MSB */ } while (msk < nbit); } msk -= nbit; jd->dbit = msk; return (w >> msk) & ((1 << nbit) - 1); /* Get bits */ } /*-----------------------------------------------------------------------*/ /* Extract a huffman decoded data from input stream */ /*-----------------------------------------------------------------------*/ static int32_t huffext ( /* >=0: decoded data, <0: error code */ lgfxJdec* jd, /* Pointer to the decompressor object */ const uint8_t* hb, /* Pointer to the bit distribution table */ const uint16_t* hc, /* Pointer to the code word table */ const uint8_t* hd /* Pointer to the data table */ ) { const uint8_t* hb_end = hb + 16 + 1; uint_fast8_t msk = jd->dbit; uint32_t w = *jd->dptr & ((1ul << msk) - 1); for (;;) { if (!msk) { /* Next byte? */ uint8_t *dp = jd->dptr; uint8_t *dpend = jd->dpend; msk = 8; if (++dp == dpend) { /* No input data is available, re-fill input buffer */ dp = jd->inbuf; /* Top of input buffer */ jd->dpend = dpend = dp + jd->infunc(jd->device, dp, JD_SZBUF); if (dp == dpend) return 0 - (int32_t)JDR_INP; /* Err: read error or wrong stream termination */ } uint_fast8_t s = *dp; w = (w << 8) + s; if (*dp == 0xff) { /* Is start of flag sequence? */ if (++dp == dpend) { /* No input data is available, re-fill input buffer */ dp = jd->inbuf; /* Top of input buffer */ jd->dpend = dpend = dp + jd->infunc(jd->device, dp, JD_SZBUF); if (dp == dpend) return 0 - (int32_t)JDR_INP; /* Err: read error or wrong stream termination */ } if (*dp != 0) return 0 - (int32_t)JDR_FMT1; /* Err: unexpected flag is detected (may be collapted data) */ *dp = 0xff; /* The flag is a data 0xFF */ } jd->dptr = dp; } do { uint_fast16_t v = w >> --msk; uint_fast8_t nc = *++hb; if (hb == hb_end) return 0 - (int32_t)JDR_FMT1; /* Err: code not found (may be collapted data) */ if (nc) { const uint8_t* hd_end = hd + nc; do { /* Search the code word in this bit length */ if (v == *++hc) goto huffext_match; /* Matched? */ } while (++hd != hd_end); } } while (msk); } huffext_match: jd->dbit = msk; return *++hd; /* Return the decoded data */ } /*-----------------------------------------------------------------------*/ /* Apply Inverse-DCT in Arai Algorithm (see also aa_idct.png) */ /*-----------------------------------------------------------------------*/ static void block_idct ( int32_t* src, /* Input block data (de-quantized and pre-scaled for Arai Algorithm) */ int16_t* dst /* Pointer to the destination to store the block as byte array */ ) { const int32_t M13 = (int32_t)(1.41421*256), M2 = (int32_t)(1.08239*256), M4 = (int32_t)(2.61313*256), M5 = (int32_t)(1.84776*256); int32_t v0, v1, v2, v3, v4, v5, v6, v7; int32_t t10, t11, t12, t13; /* Process columns */ for (int i = 0; i < 8; ++i) { /* Get and Process the odd elements */ v4 = src[8 * 7]; v5 = src[8 * 1]; v6 = src[8 * 5]; v7 = src[8 * 3]; t10 = v5 - v4; t11 = v5 + v4; t12 = v6 - v7; v7 += v6; v5 = (t11 - v7) * M13 >> 8; t13 = (t10 + t12) * M5 >> 8; v6 = t13 - ((t12 * M4 >> 8) + (v7 += t11)); v4 = t13 - ((t10 * M2 >> 8) + (v5 -= v6)); /* Get and Process the even elements */ v0 = src[8 * 0]; v2 = src[8 * 4]; t10 = v0 + v2; t12 = v0 - v2; v1 = src[8 * 2]; v3 = src[8 * 6]; t11 = (v1 - v3) * M13 >> 8; v3 += v1; t11 -= v3; v0 = t10 + v3; v3 = t10 - v3; v1 = t12 + t11; v2 = t12 - t11; /* Write-back transformed values */ src[8 * 0] = v0 + v7; src[8 * 7] = v0 - v7; src[8 * 1] = v1 + v6; src[8 * 6] = v1 - v6; src[8 * 2] = v2 + v5; src[8 * 5] = v2 - v5; src[8 * 3] = v3 + v4; src[8 * 4] = v3 - v4; ++src; /* Next column */ } /* Process rows */ src -= 8; for (int i = 0; i < 8; ++i) { /* Get and Process the odd elements */ v4 = src[1]; v5 = src[7] + v4; v4 = (v4 << 1) - v5; v6 = src[5]; v7 = src[3] + v6; v6 = (v6 << 1) - v7; v7 += v5; v5 = (v5 << 1) - v7; t13 = v4 + v6; t13 = t13 * M5 >> 8; v6 = v6 * M4 >> 8; v6 += v7; v6 = t13 - v6; v5 = v5 * M13 >> 8; v5 -= v6; v4 = v4 * M2 >> 8; v4 += v5; v4 = t13 - v4; /* Get and Process the even elements */ v0 = src[0] + (128L << 8); /* remove DC offset (-128) here */ v2 = src[4]; t10 = v0 + v2; t12 = v0 - v2; v1 = src[2]; v3 = src[6] + v1; t11 = (v1 << 1) - v3; t11 = t11 * M13 >> 8; t11 -= v3; v0 = t10 + v3; v3 = t10 - v3; v1 = t12 + t11; v2 = t12 - t11; dst[0] = (v0 + v7) >> 8; dst[7] = (v0 - v7) >> 8; dst[1] = (v1 + v6) >> 8; dst[6] = (v1 - v6) >> 8; dst[2] = (v2 + v5) >> 8; dst[5] = (v2 - v5) >> 8; dst[3] = (v3 + v4) >> 8; dst[4] = (v3 - v4) >> 8; dst += 8; src += 8; /* Next row */ } } /*-----------------------------------------------------------------------*/ /* Load all blocks in the MCU into working buffer */ /*-----------------------------------------------------------------------*/ static JRESULT mcu_load ( lgfxJdec* jd /* Pointer to the decompressor object */ ) { int32_t *tmp = (int32_t*)jd->workbuf; /* Block working buffer for de-quantize and IDCT */ int32_t b, d, e; uint32_t blk, nby, nbc; int16_t *bp; const uint8_t *hb, *hd; const uint16_t *hc; nby = jd->msx * jd->msy; /* Number of Y blocks (1, 2 or 4) */ nbc = jd->comps_in_frame - 1; /* Number of C blocks (2 or 0(grayscale)) */ bp = jd->mcubuf; /* Pointer to the first block */ for (blk = 0; blk < nby + nbc; ++blk) { size_t cmp = (blk < nby) ? 0 : blk - nby + 1; /* Component number 0:Y, 1:Cb, 2:Cr */ size_t id = cmp ? 1 : 0; /* Huffman table ID of the component */ /* Extract a DC element from input stream */ hb = jd->huffbits[id][0]; /* Huffman table for the DC element */ hc = jd->huffcode[id][0]; hd = jd->huffdata[id][0]; b = huffext(jd, hb, hc, hd); /* Extract a huffman coded data (bit length) */ if (b < 0) return (JRESULT)(-b); /* Err: invalid code or input */ d = jd->dcv[cmp]; /* DC value of previous block */ if (b) { /* If there is any difference from previous block */ e = bitext(jd, b); /* Extract data bits */ if (e < 0) return (JRESULT)(-e); /* Err: input */ b = 1 << (b - 1); /* MSB position */ if (!(e & b)) e -= (b << 1) - 1; /* Restore sign if needed */ d += e; /* Get current value */ jd->dcv[cmp] = d; /* Save current DC value for next block */ } const int32_t *dqf = jd->qttbl[jd->qtid[cmp]]; /* De-quantizer table ID for this component */ tmp[0] = d * dqf[0] >> 8; /* De-quantize, apply scale factor of Arai algorithm and descale 8 bits */ /* Extract following 63 AC elements from input stream */ memset(&tmp[1], 0, 63*sizeof(int32_t)); /* Clear rest of elements */ hb = jd->huffbits[id][1]; /* Huffman table for the AC elements */ hc = jd->huffcode[id][1]; hd = jd->huffdata[id][1]; uint_fast8_t i = 1; /* Top of the AC elements */ do { b = huffext(jd, hb, hc, hd); /* Extract a huffman coded value (zero runs and bit length) */ if (b == 0) break; /* EOB? */ if (b < 0) return (JRESULT)(-b); /* Err: invalid code or input error */ i += b >> 4; /* Number of leading zero elements Skip zero elements */ if (b &= 0x0F) { /* Bit length */ d = bitext(jd, b); /* Extract data bits */ if (d < 0) return (JRESULT)(-d);/* Err: input device */ b = 1 << (b - 1); /* MSB position */ if (!(d & b)) d -= (b << 1) - 1;/* Restore negative value if needed */ uint_fast8_t z = Zig[i]; /* Zigzag-order to raster-order converted index */ tmp[z] = d * dqf[z] >> 8; /* De-quantize, apply scale factor of Arai algorithm and descale 8 bits */ } } while (++i < 64); /* Next AC element */ if (i == 1 || (JD_USE_SCALE && jd->scale == 3)) { d = (int16_t)((*tmp >> 8) + 128); /* If scale ratio is 1/8, IDCT can be ommited and only DC element is used */ for (i = 0; i < 64; bp[i++] = d) ; } else { block_idct(tmp, bp); /* Apply IDCT and store the block to the MCU buffer */ } bp += 64; /* Next block */ } return JDR_OK; /* All blocks have been loaded successfully */ } /*-----------------------------------------------------------------------*/ /* Output an MCU: Convert YCrCb to RGB and output it in RGB form */ /*-----------------------------------------------------------------------*/ static JRESULT mcu_output ( lgfxJdec* jd, /* Pointer to the decompressor object */ uint32_t (*outfunc)(void*, void*, JRECT*), /* RGB output function */ uint32_t x, /* MCU position in the image (left of the MCU) */ uint32_t y /* MCU position in the image (top of the MCU) */ ) { const int_fast16_t FP_SHIFT = 8; uint32_t ix, iy, mx, my, rx, ry; int32_t yy, cb, cr; int16_t *py, *pc; uint8_t *rgb24; JRECT rect; mx = jd->msx << 3; my = jd->msy << 3; /* MCU size (pixel) */ rx = (mx < jd->width - x) ? mx : jd->width - x; /* Output rectangular size (it may be clipped at right/bottom end) */ ry = (my < jd->height - y) ? my : jd->height - y; if (JD_USE_SCALE) { rx >>= jd->scale; ry >>= jd->scale; if (!rx || !ry) return JDR_OK; /* Skip this MCU if all pixel is to be rounded off */ x >>= jd->scale; y >>= jd->scale; } rect.left = x; rect.right = x + rx - 1; /* Rectangular area in the frame buffer */ rect.top = y; rect.bottom = y + ry - 1; uint8_t* workbuf = (uint8_t*)jd->workbuf; if (!JD_USE_SCALE || jd->scale != 3) { /* Not for 1/8 scaling */ uint_fast8_t ixshift = (mx == 16); uint_fast8_t iyshift = (my == 16); /* Build an RGB MCU from discrete comopnents */ rgb24 = workbuf; iy = 0; do { #if JD_BAYER const int8_t* btbl = &Bayer[(iy & 3) << 2]; #endif py = &jd->mcubuf[((iy & 8) + iy) << 3]; pc = &jd->mcubuf[((mx << iyshift) + (iy >> iyshift)) << 3]; ix = 0; do { do { cb = (pc[ 0] - 128); /* Get Cb/Cr component and restore right level */ cr = (pc[64] - 128); ++pc; /* Convert CbCr to RGB */ int32_t rr = ((int32_t)(1.402 * (1<> FP_SHIFT; int32_t gg = ((int32_t)(0.34414 * (1<> FP_SHIFT; int32_t bb = ((int32_t)(1.772 * (1<> FP_SHIFT; do { #if JD_BAYER yy = *py + btbl[ix & 3]; /* Get Y component */ #else yy = *py; /* Get Y component */ #endif ++py; /* Convert YCbCr to RGB */ rgb24[0] = BYTECLIP(yy + rr); rgb24[1] = BYTECLIP(yy - gg); rgb24[2] = BYTECLIP(yy + bb); rgb24 += 3; } while (++ix & ixshift); } while (ix & 7); py += 64 - 8; /* Jump to next block if double block heigt */ } while (ix != mx); } while (++iy < my); /* Descale the MCU rectangular if needed */ if (JD_USE_SCALE && jd->scale) { uint32_t x_, y_, r_, g_, b_, s_, w_; uint8_t *op; /* Get averaged RGB value of each square correcponds to a pixel */ s_ = jd->scale * 2; /* Bumber of shifts for averaging */ w_ = 1 << jd->scale; /* Width of square */ op = workbuf; iy = 0; do { ix = 0; do { rgb24 = &workbuf[(iy * mx + ix) * 3]; r_ = g_ = b_ = 0; y_ = 0; do { /* Accumulate RGB value in the square */ x_ = 0; do { r_ += rgb24[x_*3 ]; g_ += rgb24[x_*3+1]; b_ += rgb24[x_*3+2]; } while (++x_ < w_); rgb24 += mx * 3; } while (++y_ < w_); /* Put the averaged RGB value as a pixel */ op[0] = r_ >> s_; op[1] = g_ >> s_; op[2] = b_ >> s_; op += 3; } while ((ix += w_) < mx); } while ((iy += w_) < my); } } else { /* For only 1/8 scaling (left-top pixel in each block are the DC value of the block) */ /* Build a 1/8 descaled RGB MCU from discrete comopnents */ rgb24 = workbuf; pc = jd->mcubuf + mx * my; cb = pc[0] - 128; /* Get Cb/Cr component and restore right level */ cr = pc[64] - 128; iy = 0; do { py = jd->mcubuf; if (iy == 8) py += 64 * 2; ix = 0; do { yy = *py; /* Get Y component */ py += 64; /* Convert YCbCr to RGB */ rgb24[0] = BYTECLIP(yy + (((int32_t)(1.402 * (1<> FP_SHIFT)); rgb24[1] = BYTECLIP(yy - (((int32_t)(0.34414 * (1<> FP_SHIFT)); rgb24[2] = BYTECLIP(yy + (((int32_t)(1.772 * (1<> FP_SHIFT)); rgb24 += 3; } while ((ix += 8) < mx); } while ((iy += 8) < my); } /* Squeeze up pixel table if a part of MCU is to be truncated */ mx >>= jd->scale; if (rx < mx) { uint8_t *s_, *d; s_ = d = workbuf; for (size_t y_ = 1; y_ < ry; ++y_) { memcpy(d += rx * 3, s_ += mx * 3, rx * 3); /* Copy effective pixels */ } } /* Convert RGB888 to RGB565 if needed */ if (JD_FORMAT == 1) { uint8_t *s = workbuf; uint16_t *d = (uint16_t*)s; uint_fast16_t w; uint_fast16_t n = rx * ry; do { w = (*s++ & 0xF8) << 8; /* RRRRR----------- */ w |= (*s++ & 0xFC) << 3; /* -----GGGGGG----- */ w |= *s++ >> 3; /* -----------BBBBB */ *d++ = w; } while (--n); } /* Output the RGB rectangular */ return outfunc(jd->device, workbuf, &rect) ? JDR_OK : JDR_INTR; } /*-----------------------------------------------------------------------*/ /* Process restart interval */ /*-----------------------------------------------------------------------*/ static JRESULT restart ( lgfxJdec* jd, /* Pointer to the decompressor object */ uint16_t rstn /* Expected restert sequense number */ ) { uint16_t d; uint8_t *dp = jd->dptr, *dpend = jd->dpend; /* Discard padding bits and get two bytes from the input stream */ d = 0; for (int i = 0; i < 2; ++i) { if (++dp == dpend) { /* No input data is available, re-fill input buffer */ dp = jd->inbuf; jd->dpend = dpend = dp + jd->infunc(jd->device, dp, JD_SZBUF); if (dp == dpend) return JDR_INP; } d = (d << 8) | *dp; /* Get a byte */ } jd->dptr = dp; jd->dbit = 0; /* Check the marker */ if ((d & 0xFFD8) != 0xFFD0 || (d & 7) != (rstn & 7)) { return JDR_FMT1; /* Err: expected RSTn marker is not detected (may be collapted data) */ } /* Reset DC offset */ jd->dcv[2] = jd->dcv[1] = jd->dcv[0] = 0; return JDR_OK; } /*-----------------------------------------------------------------------*/ /* Analyze the JPEG image and Initialize decompressor object */ /*-----------------------------------------------------------------------*/ //#define LDB_WORD(ptr) (uint16_t)(((uint16_t)*((uint8_t*)(ptr))<<8)|(uint16_t)*(uint8_t*)((ptr)+1)) static inline uint16_t LDB_WORD(uint8_t* ptr) { return ptr[0]<<8 | ptr[1]; } JRESULT lgfx_jd_prepare ( lgfxJdec* jd, /* Blank decompressor object */ uint32_t (*infunc)(void*, uint8_t*, uint32_t), /* JPEG strem input function */ void* pool, /* Working buffer for the decompression session */ uint_fast16_t sz_pool, /* Size of working buffer */ void* dev /* I/O device identifier for the session */ ) { uint8_t *seg; uint32_t ofs; size_t n; int32_t rc; if (!pool) return JDR_PAR; jd->pool = (uint8_t*)pool; /* Work memroy */ jd->sz_pool = sz_pool; /* Size of given work memory */ jd->infunc = infunc; /* Stream input function */ jd->device = dev; /* I/O device identifier */ jd->nrst = 0; /* No restart interval (default) */ // memset(jd->huffbits, 0, sizeof(uint8_t*) * 4); /* Nulls pointers */ // memset(jd->huffcode, 0, sizeof(uint16_t*) * 4); // memset(jd->huffdata, 0, sizeof(uint8_t*) * 4); // memset(jd->qttbl, 0, sizeof(uint32_t*) * 4); jd->inbuf = seg = alloc_pool(jd, JD_SZBUF); /* Allocate stream input buffer */ if (!seg) return JDR_MEM1; if (infunc(dev, seg, 2) != 2) return JDR_INP;/* Check SOI marker */ if (LDB_WORD(seg) != 0xFFD8) return JDR_FMT1; /* Err: SOI is not detected */ ofs = 2; for (;;) { if (infunc(dev, seg, 1) != 1) return JDR_INP; if (seg[0] != 0xFF) return JDR_FMT1; /* Check a JPEG marker */ do { if (infunc(dev, &seg[1], 1) != 1) return JDR_INP; } while (seg[1] == 0xFF); if (infunc(dev, &seg[2], 2) != 2) return JDR_INP; uint_fast16_t len = LDB_WORD(seg + 2) - 2; /* Length field */ ofs += 4 + len; /* Number of bytes loaded */ switch (seg[1]) { /* Marker */ case 0xC0: /* SOF0 (baseline JPEG) */ {/* Load segment data */ if (len > JD_SZBUF) return JDR_MEM2; if (infunc(dev, seg, len) != len) return JDR_INP; jd->width = LDB_WORD(seg+3); /* Image width in unit of pixel */ jd->height = LDB_WORD(seg+1); /* Image height in unit of pixel */ jd->comps_in_frame = seg[5]; if (seg[5] != 1 && seg[5] != 3) return JDR_FMT3; /* Err: Supports only Y/Cb/Cr or Y(Grayscale) format */ /* Check three image components */ for (size_t i = 0; i < seg[5]; ++i) { uint_fast8_t b = seg[7 + 3 * i]; /* Get sampling factor */ if (!i) { /* Y component */ if (b != 0x11 && b != 0x22 && b != 0x21) { /* Check sampling factor */ return JDR_FMT3; /* Err: Supports only 4:4:4, 4:2:0 or 4:2:2 */ } jd->msx = b >> 4; jd->msy = b & 15; /* Size of MCU [blocks] */ } else { /* Cb/Cr component */ if (b != 0x11) return JDR_FMT3; /* Err: Sampling factor of Cr/Cb must be 1 */ } b = seg[8 + 3 * i]; /* Get dequantizer table ID for this component */ if (b > 3) return JDR_FMT3; /* Err: Invalid ID */ jd->qtid[i] = b; } } break; case 0xDD: /* DRI */ {/* Load segment data */ if (len > JD_SZBUF) return JDR_MEM2; if (infunc(dev, seg, len) != len) return JDR_INP; /* Get restart interval (MCUs) */ jd->nrst = LDB_WORD(seg); } break; case 0xC4: /* DHT */ {/* Load segment data */ if (len > JD_SZBUF) return JDR_MEM2; if (infunc(dev, seg, len) != len) return JDR_INP; /* Create huffman tables */ rc = create_huffman_tbl(jd, seg, len); if (rc) return (JRESULT)rc; } break; case 0xDB: /* DQT */ {/* Load segment data */ if (len > JD_SZBUF) return JDR_MEM2; if (infunc(dev, seg, len) != len) return JDR_INP; /* Create de-quantizer tables */ rc = create_qt_tbl(jd, seg, len); if (rc) return (JRESULT)rc; } break; case 0xDA: /* SOS */ {/* Load segment data */ if (len > JD_SZBUF) return JDR_MEM2; if (infunc(dev, seg, len) != len) return JDR_INP; if (!jd->width || !jd->height) return JDR_FMT1; /* Err: Invalid image size */ if (seg[0] != jd->comps_in_frame) return JDR_FMT3; /* Err: Supports only three color or grayscale components format */ /* Check if all tables corresponding to each components have been loaded */ for (size_t i = 0; i < jd->comps_in_frame; ++i) { uint_fast8_t b = seg[2 + 2 * i]; /* Get huffman table ID */ if (b != 0x00 && b != 0x11) return JDR_FMT3; /* Err: Different table number for DC/AC element */ b = i ? 1 : 0; if (!jd->huffbits[b][0] || !jd->huffbits[b][1]) { /* Check dc/ac huffman table for this component */ return JDR_FMT1; /* Err: Nnot loaded */ } if (!jd->qttbl[jd->qtid[i]]) { /* Check dequantizer table for this component */ return JDR_FMT1; /* Err: Not loaded */ } } /* Allocate working buffer for MCU and RGB */ n = jd->msy * jd->msx; /* Number of Y blocks in the MCU */ if (!n) return JDR_FMT1; /* Err: SOF0 has not been loaded */ len = n * 64 * 2 + 64; /* Allocate buffer for IDCT and RGB output */ if (len < 256) len = 256; /* but at least 256 byte is required for IDCT */ jd->workbuf = alloc_pool(jd, len); /* and it may occupy a part of following MCU working buffer for RGB output */ if (!jd->workbuf) return JDR_MEM1; /* Err: not enough memory */ size_t mcubuf_len = (n + 2) * 64; jd->mcubuf = (int16_t*)alloc_pool(jd, mcubuf_len * sizeof(int16_t)); /* Allocate MCU working buffer */ if (!jd->mcubuf) return JDR_MEM1; /* Err: not enough memory */ if (jd->comps_in_frame == 1) { for (size_t i = n * 16; i < mcubuf_len; ++i) { jd->mcubuf[i] = 128; /* Cb/Cr clear ( for grayscale )*/ } } /* Pre-load the JPEG data to extract it from the bit stream */ ofs %= JD_SZBUF; /* Align read offset to JD_SZBUF */ int32_t dc = infunc(dev, seg + ofs, JD_SZBUF - ofs); jd->dptr = seg + ofs - 1; jd->dpend = seg + ofs + dc; jd->dbit = 0; /* Prepare to read bit stream */ } return JDR_OK; /* Initialization succeeded. Ready to decompress the JPEG image. */ case 0xC1: /* SOF1 */ case 0xC2: /* SOF2 */ case 0xC3: /* SOF3 */ case 0xC5: /* SOF5 */ case 0xC6: /* SOF6 */ case 0xC7: /* SOF7 */ case 0xC9: /* SOF9 */ case 0xCA: /* SOF10 */ case 0xCB: /* SOF11 */ case 0xCD: /* SOF13 */ case 0xCE: /* SOF14 */ case 0xCF: /* SOF15 */ case 0xD9: /* EOI */ return JDR_FMT3; /* Unsuppoted JPEG standard (may be progressive JPEG) */ default: /* Unknown segment (comment, exif or etc..) */ /* Skip segment data */ if (infunc(dev, 0, len) != len) { /* Null pointer specifies to skip bytes of stream */ return JDR_INP; } } } } /*-----------------------------------------------------------------------*/ /* Start to decompress the JPEG picture */ /*-----------------------------------------------------------------------*/ JRESULT lgfx_jd_decomp ( lgfxJdec* jd, /* Initialized decompression object */ uint32_t (*outfunc)(void*, void*, JRECT*), /* RGB output function */ uint_fast8_t scale /* Output de-scaling factor (0 to 3) */ ) { uint32_t x, y, mx, my; uint32_t nrst, rst, rsc; JRESULT rc; if (scale > (JD_USE_SCALE ? 3 : 0)) return JDR_PAR; jd->scale = scale; nrst = jd->nrst; mx = jd->msx << 3; my = jd->msy << 3; /* Size of the MCU (pixel) */ jd->dcv[2] = jd->dcv[1] = jd->dcv[0] = 0; /* Initialize DC values */ rst = rsc = 0; rc = JDR_OK; for (y = 0; y < jd->height; y += my) { /* Vertical loop of MCUs */ x = 0; do { /* Horizontal loop of MCUs */ if (nrst && rst++ == nrst) { /* Process restart interval if enabled */ rc = restart(jd, rsc++); if (rc != JDR_OK) return rc; rst = 1; } rc = mcu_load(jd); /* Load an MCU (decompress huffman coded stream and apply IDCT) */ if (rc != JDR_OK) return rc; rc = mcu_output(jd, outfunc, x, y); /* Output the MCU (color space conversion, scaling and output) */ if (rc != JDR_OK) return rc; } while ( (x += mx) < jd->width); } return rc; }