From d97df262f6d25971e05c19731179996041f9e07f Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <2536374+asmorkalov@users.noreply.github.com>
Date: Sat, 25 May 2024 13:03:12 +0300
Subject: [PATCH] Merge pull request #25623 from asmorkalov:as/jpegturbo_3.0.3

Libjpeg-turbo update to version 3.0.3 #25623

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 3rdparty/libjpeg-turbo/CMakeLists.txt         |  105 +-
 3rdparty/libjpeg-turbo/LICENSE.md             |   33 +-
 3rdparty/libjpeg-turbo/README.ijg             |   24 +-
 3rdparty/libjpeg-turbo/README.md              |   78 +-
 3rdparty/libjpeg-turbo/jconfig.h.in           |   68 +-
 3rdparty/libjpeg-turbo/jconfig.h.win.in       |   33 -
 3rdparty/libjpeg-turbo/jconfigint.h.in        |   44 +-
 .../{src/jversion.h => jversion.h.in}         |   14 +-
 3rdparty/libjpeg-turbo/src/cjpeg.c            |  841 +++++
 3rdparty/libjpeg-turbo/src/cmyk.h             |   61 +
 3rdparty/libjpeg-turbo/src/djpeg.c            |  932 +++++
 3rdparty/libjpeg-turbo/src/example.c          |  643 ++++
 3rdparty/libjpeg-turbo/src/jcapimin.c         |   27 +-
 3rdparty/libjpeg-turbo/src/jcapistd.c         |   53 +-
 3rdparty/libjpeg-turbo/src/jccoefct.c         |   47 +-
 3rdparty/libjpeg-turbo/src/jccolext.c         |   64 +-
 3rdparty/libjpeg-turbo/src/jccolor.c          |  248 +-
 3rdparty/libjpeg-turbo/src/jcdctmgr.c         |   94 +-
 3rdparty/libjpeg-turbo/src/jcdiffct.c         |  411 +++
 3rdparty/libjpeg-turbo/src/jchuff.c           |  202 +-
 3rdparty/libjpeg-turbo/src/jchuff.h           |   16 +-
 3rdparty/libjpeg-turbo/src/jcinit.c           |  121 +-
 3rdparty/libjpeg-turbo/src/jclhuff.c          |  587 +++
 3rdparty/libjpeg-turbo/src/jclossls.c         |  319 ++
 3rdparty/libjpeg-turbo/src/jcmainct.c         |   45 +-
 3rdparty/libjpeg-turbo/src/jcmarker.c         |   36 +-
 3rdparty/libjpeg-turbo/src/jcmaster.c         |  312 +-
 3rdparty/libjpeg-turbo/src/jcmaster.h         |   43 +
 3rdparty/libjpeg-turbo/src/jcparam.c          |   52 +-
 3rdparty/libjpeg-turbo/src/jcphuff.c          |   90 +-
 3rdparty/libjpeg-turbo/src/jcprepct.c         |   88 +-
 3rdparty/libjpeg-turbo/src/jcsample.c         |  103 +-
 3rdparty/libjpeg-turbo/src/jcstest.c          |  126 +
 3rdparty/libjpeg-turbo/src/jctrans.c          |   16 +-
 3rdparty/libjpeg-turbo/src/jdapimin.c         |   21 +-
 3rdparty/libjpeg-turbo/src/jdapistd.c         |  162 +-
 3rdparty/libjpeg-turbo/src/jdatadst-tj.c      |  198 +
 3rdparty/libjpeg-turbo/src/jdatadst.c         |   10 -
 3rdparty/libjpeg-turbo/src/jdatasrc-tj.c      |  194 +
 3rdparty/libjpeg-turbo/src/jdatasrc.c         |    6 -
 3rdparty/libjpeg-turbo/src/jdcoefct.c         |   77 +-
 3rdparty/libjpeg-turbo/src/jdcoefct.h         |    5 +
 3rdparty/libjpeg-turbo/src/jdcol565.c         |   62 +-
 3rdparty/libjpeg-turbo/src/jdcolext.c         |   48 +-
 3rdparty/libjpeg-turbo/src/jdcolor.c          |  193 +-
 3rdparty/libjpeg-turbo/src/jdct.h             |  135 +-
 3rdparty/libjpeg-turbo/src/jddctmgr.c         |   61 +-
 3rdparty/libjpeg-turbo/src/jddiffct.c         |  403 ++
 3rdparty/libjpeg-turbo/src/jdhuff.c           |   20 +-
 3rdparty/libjpeg-turbo/src/jdhuff.h           |    7 +-
 3rdparty/libjpeg-turbo/src/jdinput.c          |   63 +-
 3rdparty/libjpeg-turbo/src/jdlhuff.c          |  302 ++
 3rdparty/libjpeg-turbo/src/jdlossls.c         |  289 ++
 3rdparty/libjpeg-turbo/src/jdmainct.c         |  112 +-
 3rdparty/libjpeg-turbo/src/jdmainct.h         |   15 +-
 3rdparty/libjpeg-turbo/src/jdmarker.c         |   26 +-
 3rdparty/libjpeg-turbo/src/jdmaster.c         |  643 ++--
 3rdparty/libjpeg-turbo/src/jdmerge.c          |   72 +-
 3rdparty/libjpeg-turbo/src/jdmerge.h          |    9 +-
 3rdparty/libjpeg-turbo/src/jdmrg565.c         |   43 +-
 3rdparty/libjpeg-turbo/src/jdmrgext.c         |   40 +-
 3rdparty/libjpeg-turbo/src/jdphuff.c          |    4 +-
 3rdparty/libjpeg-turbo/src/jdpostct.c         |  109 +-
 3rdparty/libjpeg-turbo/src/jdsample.c         |  128 +-
 3rdparty/libjpeg-turbo/src/jdsample.h         |    9 +-
 3rdparty/libjpeg-turbo/src/jdtrans.c          |   12 +-
 3rdparty/libjpeg-turbo/src/jerror.c           |   18 +-
 3rdparty/libjpeg-turbo/src/jerror.h           |   17 +-
 3rdparty/libjpeg-turbo/src/jfdctfst.c         |    2 +-
 3rdparty/libjpeg-turbo/src/jfdctint.c         |    4 +-
 3rdparty/libjpeg-turbo/src/jidctflt.c         |   14 +-
 3rdparty/libjpeg-turbo/src/jidctfst.c         |   18 +-
 3rdparty/libjpeg-turbo/src/jidctint.c         |  136 +-
 3rdparty/libjpeg-turbo/src/jidctred.c         |   38 +-
 3rdparty/libjpeg-turbo/src/jinclude.h         |   16 +-
 3rdparty/libjpeg-turbo/src/jlossls.h          |  101 +
 3rdparty/libjpeg-turbo/src/jmemmgr.c          |  192 +-
 3rdparty/libjpeg-turbo/src/jmemsys.h          |   31 -
 3rdparty/libjpeg-turbo/src/jmorecfg.h         |   41 +-
 .../src/{jpeg_nbits_table.h => jpeg_nbits.c}  |   38 +-
 3rdparty/libjpeg-turbo/src/jpeg_nbits.h       |   43 +
 .../src/{jpegcomp.h => jpegapicomp.h}         |    2 +-
 3rdparty/libjpeg-turbo/src/jpegint.h          |  226 +-
 3rdparty/libjpeg-turbo/src/jpeglib.h          |  153 +-
 3rdparty/libjpeg-turbo/src/jquant1.c          |  164 +-
 3rdparty/libjpeg-turbo/src/jquant2.c          |  130 +-
 3rdparty/libjpeg-turbo/src/jsamplecomp.h      |  336 ++
 3rdparty/libjpeg-turbo/src/jsimd.h            |   12 +-
 3rdparty/libjpeg-turbo/src/jsimd_none.c       |  431 ---
 3rdparty/libjpeg-turbo/src/jutils.c           |   25 +-
 3rdparty/libjpeg-turbo/src/jversion.h.in      |   12 +-
 3rdparty/libjpeg-turbo/src/libjpeg.map.in     |   11 +
 3rdparty/libjpeg-turbo/src/libjpeg.txt        | 3282 +++++++++++++++++
 3rdparty/libjpeg-turbo/src/rdbmp.c            |  689 ++++
 3rdparty/libjpeg-turbo/src/rdcolmap.c         |  261 ++
 3rdparty/libjpeg-turbo/src/rdgif.c            |  720 ++++
 3rdparty/libjpeg-turbo/src/rdjpgcom.c         |  493 +++
 3rdparty/libjpeg-turbo/src/rdppm.c            |  890 +++++
 3rdparty/libjpeg-turbo/src/rdswitch.c         |  428 +++
 3rdparty/libjpeg-turbo/src/rdtarga.c          |  507 +++
 .../libjpeg-turbo/src/simd/CMakeLists.txt     |   76 +-
 .../src/simd/arm/aarch32/jsimd.c              |   14 +-
 .../src/simd/arm/aarch64/jsimd.c              |   19 +-
 .../libjpeg-turbo/src/simd/arm/jcphuff-neon.c |  183 +-
 .../libjpeg-turbo/src/simd/arm/jdcolor-neon.c |    1 -
 .../libjpeg-turbo/src/simd/arm/jdmerge-neon.c |    1 -
 .../src/simd/arm/jidctint-neon.c              |    1 -
 .../src/simd/i386/jccolext-avx2.asm           |   22 +-
 .../src/simd/i386/jccolext-mmx.asm            |   22 +-
 .../src/simd/i386/jccolext-sse2.asm           |   22 +-
 .../src/simd/i386/jccolor-avx2.asm            |    6 +-
 .../src/simd/i386/jccolor-mmx.asm             |    6 +-
 .../src/simd/i386/jccolor-sse2.asm            |    6 +-
 .../src/simd/i386/jcgray-avx2.asm             |    6 +-
 .../src/simd/i386/jcgray-mmx.asm              |    6 +-
 .../src/simd/i386/jcgray-sse2.asm             |    6 +-
 .../src/simd/i386/jcgryext-avx2.asm           |   22 +-
 .../src/simd/i386/jcgryext-mmx.asm            |   22 +-
 .../src/simd/i386/jcgryext-sse2.asm           |   22 +-
 .../src/simd/i386/jchuff-sse2.asm             |   17 +-
 .../src/simd/i386/jcsample-avx2.asm           |   18 +-
 .../src/simd/i386/jcsample-mmx.asm            |   14 +-
 .../src/simd/i386/jcsample-sse2.asm           |   18 +-
 .../src/simd/i386/jdcolext-avx2.asm           |   20 +-
 .../src/simd/i386/jdcolext-mmx.asm            |   20 +-
 .../src/simd/i386/jdcolext-sse2.asm           |   20 +-
 .../src/simd/i386/jdcolor-avx2.asm            |    6 +-
 .../src/simd/i386/jdcolor-mmx.asm             |    6 +-
 .../src/simd/i386/jdcolor-sse2.asm            |    6 +-
 .../src/simd/i386/jdmerge-avx2.asm            |    6 +-
 .../src/simd/i386/jdmerge-mmx.asm             |    6 +-
 .../src/simd/i386/jdmerge-sse2.asm            |    6 +-
 .../src/simd/i386/jdmrgext-avx2.asm           |   20 +-
 .../src/simd/i386/jdmrgext-mmx.asm            |   20 +-
 .../src/simd/i386/jdmrgext-sse2.asm           |   20 +-
 .../src/simd/i386/jdsample-avx2.asm           |   58 +-
 .../src/simd/i386/jdsample-mmx.asm            |   58 +-
 .../src/simd/i386/jdsample-sse2.asm           |   58 +-
 .../src/simd/i386/jfdctflt-3dn.asm            |   16 +-
 .../src/simd/i386/jfdctflt-sse.asm            |   16 +-
 .../src/simd/i386/jfdctfst-mmx.asm            |   16 +-
 .../src/simd/i386/jfdctfst-sse2.asm           |   12 +-
 .../src/simd/i386/jfdctint-avx2.asm           |   24 +-
 .../src/simd/i386/jfdctint-mmx.asm            |   16 +-
 .../src/simd/i386/jfdctint-sse2.asm           |   12 +-
 .../src/simd/i386/jidctflt-3dn.asm            |   22 +-
 .../src/simd/i386/jidctflt-sse.asm            |   38 +-
 .../src/simd/i386/jidctflt-sse2.asm           |   38 +-
 .../src/simd/i386/jidctfst-mmx.asm            |   18 +-
 .../src/simd/i386/jidctfst-sse2.asm           |   14 +-
 .../src/simd/i386/jidctint-avx2.asm           |   26 +-
 .../src/simd/i386/jidctint-mmx.asm            |   18 +-
 .../src/simd/i386/jidctint-sse2.asm           |   14 +-
 .../src/simd/i386/jidctred-mmx.asm            |   18 +-
 .../src/simd/i386/jidctred-sse2.asm           |   16 +-
 .../src/simd/i386/jquant-3dn.asm              |    6 +-
 .../src/simd/i386/jquant-mmx.asm              |    8 +-
 .../src/simd/i386/jquant-sse.asm              |    6 +-
 .../src/simd/i386/jquantf-sse2.asm            |    6 +-
 .../src/simd/i386/jquanti-sse2.asm            |    6 +-
 3rdparty/libjpeg-turbo/src/simd/i386/jsimd.c  |   84 +-
 3rdparty/libjpeg-turbo/src/simd/jsimd.h       |   12 +-
 3rdparty/libjpeg-turbo/src/simd/mips/jsimd.c  |   14 +-
 .../libjpeg-turbo/src/simd/mips64/jsimd.c     |   14 +-
 .../libjpeg-turbo/src/simd/nasm/jsimdext.inc  |   66 +-
 .../libjpeg-turbo/src/simd/powerpc/jsimd.c    |   27 +-
 .../src/simd/x86_64/jccolext-avx2.asm         |   24 +-
 .../src/simd/x86_64/jccolext-sse2.asm         |   24 +-
 .../src/simd/x86_64/jccolor-avx2.asm          |    6 +-
 .../src/simd/x86_64/jccolor-sse2.asm          |    6 +-
 .../src/simd/x86_64/jcgray-avx2.asm           |    6 +-
 .../src/simd/x86_64/jcgray-sse2.asm           |    6 +-
 .../src/simd/x86_64/jcgryext-avx2.asm         |   24 +-
 .../src/simd/x86_64/jcgryext-sse2.asm         |   24 +-
 .../src/simd/x86_64/jchuff-sse2.asm           |   72 +-
 .../src/simd/x86_64/jcphuff-sse2.asm          |   42 +-
 .../src/simd/x86_64/jcsample-avx2.asm         |   14 +-
 .../src/simd/x86_64/jcsample-sse2.asm         |   14 +-
 .../src/simd/x86_64/jdcolext-avx2.asm         |   24 +-
 .../src/simd/x86_64/jdcolext-sse2.asm         |   24 +-
 .../src/simd/x86_64/jdcolor-avx2.asm          |    6 +-
 .../src/simd/x86_64/jdcolor-sse2.asm          |    6 +-
 .../src/simd/x86_64/jdmerge-avx2.asm          |    6 +-
 .../src/simd/x86_64/jdmerge-sse2.asm          |    6 +-
 .../src/simd/x86_64/jdmrgext-avx2.asm         |   30 +-
 .../src/simd/x86_64/jdmrgext-sse2.asm         |   30 +-
 .../src/simd/x86_64/jdsample-avx2.asm         |   56 +-
 .../src/simd/x86_64/jdsample-sse2.asm         |   46 +-
 .../src/simd/x86_64/jfdctflt-sse.asm          |   28 +-
 .../src/simd/x86_64/jfdctfst-sse2.asm         |   28 +-
 .../src/simd/x86_64/jfdctint-avx2.asm         |   24 +-
 .../src/simd/x86_64/jfdctint-sse2.asm         |   28 +-
 .../src/simd/x86_64/jidctflt-sse2.asm         |   48 +-
 .../src/simd/x86_64/jidctfst-sse2.asm         |   30 +-
 .../src/simd/x86_64/jidctint-avx2.asm         |   28 +-
 .../src/simd/x86_64/jidctint-sse2.asm         |   30 +-
 .../src/simd/x86_64/jidctred-sse2.asm         |   36 +-
 .../src/simd/x86_64/jquantf-sse2.asm          |   14 +-
 .../src/simd/x86_64/jquanti-avx2.asm          |   14 +-
 .../src/simd/x86_64/jquanti-sse2.asm          |   14 +-
 .../libjpeg-turbo/src/simd/x86_64/jsimd.c     |   60 +-
 .../src/simd/x86_64/jsimdcpu.asm              |    4 +
 3rdparty/libjpeg-turbo/src/structure.txt      |  981 +++++
 3rdparty/libjpeg-turbo/src/tjbench.c          | 1323 +++++++
 3rdparty/libjpeg-turbo/src/tjutil.c           |   70 +
 3rdparty/libjpeg-turbo/src/tjutil.h           |   53 +
 3rdparty/libjpeg-turbo/src/transupp.c         | 2377 ++++++++++++
 3rdparty/libjpeg-turbo/src/transupp.h         |  231 ++
 3rdparty/libjpeg-turbo/src/turbojpeg-jni.c    | 1400 +++++++
 3rdparty/libjpeg-turbo/src/turbojpeg-mapfile  |  108 +
 .../libjpeg-turbo/src/turbojpeg-mapfile.jni   |  142 +
 3rdparty/libjpeg-turbo/src/turbojpeg-mp.c     |  541 +++
 3rdparty/libjpeg-turbo/src/turbojpeg.c        | 2921 +++++++++++++++
 3rdparty/libjpeg-turbo/src/turbojpeg.h        | 2328 ++++++++++++
 214 files changed, 30212 insertions(+), 3523 deletions(-)
 delete mode 100644 3rdparty/libjpeg-turbo/jconfig.h.win.in
 rename 3rdparty/libjpeg-turbo/{src/jversion.h => jversion.h.in} (79%)
 create mode 100644 3rdparty/libjpeg-turbo/src/cjpeg.c
 create mode 100644 3rdparty/libjpeg-turbo/src/cmyk.h
 create mode 100644 3rdparty/libjpeg-turbo/src/djpeg.c
 create mode 100644 3rdparty/libjpeg-turbo/src/example.c
 create mode 100644 3rdparty/libjpeg-turbo/src/jcdiffct.c
 create mode 100644 3rdparty/libjpeg-turbo/src/jclhuff.c
 create mode 100644 3rdparty/libjpeg-turbo/src/jclossls.c
 create mode 100644 3rdparty/libjpeg-turbo/src/jcmaster.h
 create mode 100644 3rdparty/libjpeg-turbo/src/jcstest.c
 create mode 100644 3rdparty/libjpeg-turbo/src/jdatadst-tj.c
 create mode 100644 3rdparty/libjpeg-turbo/src/jdatasrc-tj.c
 create mode 100644 3rdparty/libjpeg-turbo/src/jddiffct.c
 create mode 100644 3rdparty/libjpeg-turbo/src/jdlhuff.c
 create mode 100644 3rdparty/libjpeg-turbo/src/jdlossls.c
 create mode 100644 3rdparty/libjpeg-turbo/src/jlossls.h
 rename 3rdparty/libjpeg-turbo/src/{jpeg_nbits_table.h => jpeg_nbits.c} (99%)
 create mode 100644 3rdparty/libjpeg-turbo/src/jpeg_nbits.h
 rename 3rdparty/libjpeg-turbo/src/{jpegcomp.h => jpegapicomp.h} (98%)
 create mode 100644 3rdparty/libjpeg-turbo/src/jsamplecomp.h
 delete mode 100644 3rdparty/libjpeg-turbo/src/jsimd_none.c
 create mode 100644 3rdparty/libjpeg-turbo/src/libjpeg.map.in
 create mode 100644 3rdparty/libjpeg-turbo/src/libjpeg.txt
 create mode 100644 3rdparty/libjpeg-turbo/src/rdbmp.c
 create mode 100644 3rdparty/libjpeg-turbo/src/rdcolmap.c
 create mode 100644 3rdparty/libjpeg-turbo/src/rdgif.c
 create mode 100644 3rdparty/libjpeg-turbo/src/rdjpgcom.c
 create mode 100644 3rdparty/libjpeg-turbo/src/rdppm.c
 create mode 100644 3rdparty/libjpeg-turbo/src/rdswitch.c
 create mode 100644 3rdparty/libjpeg-turbo/src/rdtarga.c
 create mode 100644 3rdparty/libjpeg-turbo/src/structure.txt
 create mode 100644 3rdparty/libjpeg-turbo/src/tjbench.c
 create mode 100644 3rdparty/libjpeg-turbo/src/tjutil.c
 create mode 100644 3rdparty/libjpeg-turbo/src/tjutil.h
 create mode 100644 3rdparty/libjpeg-turbo/src/transupp.c
 create mode 100644 3rdparty/libjpeg-turbo/src/transupp.h
 create mode 100644 3rdparty/libjpeg-turbo/src/turbojpeg-jni.c
 create mode 100644 3rdparty/libjpeg-turbo/src/turbojpeg-mapfile
 create mode 100644 3rdparty/libjpeg-turbo/src/turbojpeg-mapfile.jni
 create mode 100644 3rdparty/libjpeg-turbo/src/turbojpeg-mp.c
 create mode 100644 3rdparty/libjpeg-turbo/src/turbojpeg.c
 create mode 100644 3rdparty/libjpeg-turbo/src/turbojpeg.h

diff --git a/3rdparty/libjpeg-turbo/CMakeLists.txt b/3rdparty/libjpeg-turbo/CMakeLists.txt
index a995707852..79ffab9cd3 100644
--- a/3rdparty/libjpeg-turbo/CMakeLists.txt
+++ b/3rdparty/libjpeg-turbo/CMakeLists.txt
@@ -1,12 +1,43 @@
 project(${JPEG_LIBRARY} C)
 
-ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wsign-compare -Wshorten-64-to-32 -Wimplicit-fallthrough)
+macro(boolean_number var)
+  if(${var})
+    set(${var} 1 ${ARGN})
+  else()
+    set(${var} 0 ${ARGN})
+  endif()
+endmacro()
 
-set(VERSION_MAJOR 2)
-set(VERSION_MINOR 1)
-set(VERSION_REVISION 3)
-set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_REVISION})
-set(LIBJPEG_TURBO_VERSION_NUMBER 2001003)
+ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wsign-compare -Wshorten-64-to-32 -Wimplicit-fallthrough)
+if(APPLE)
+  ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-variable) # NEON flags are not used on Mac
+endif()
+
+if(CV_GCC AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13)
+  # src/jchuff.c:1042:22: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
+  ocv_warnings_disable(CMAKE_C_FLAGS -Wstringop-overflow)
+endif()
+
+set(VERSION 3.0.3)
+set(COPYRIGHT_YEAR "1991-2024")
+string(REPLACE "." ";" VERSION_TRIPLET ${VERSION})
+list(GET VERSION_TRIPLET 0 VERSION_MAJOR)
+list(GET VERSION_TRIPLET 1 VERSION_MINOR)
+list(GET VERSION_TRIPLET 2 VERSION_REVISION)
+function(pad_number NUMBER OUTPUT_LEN)
+  string(LENGTH "${${NUMBER}}" INPUT_LEN)
+  if(INPUT_LEN LESS OUTPUT_LEN)
+    math(EXPR ZEROES "${OUTPUT_LEN} - ${INPUT_LEN} - 1")
+    set(NUM ${${NUMBER}})
+    foreach(C RANGE ${ZEROES})
+      set(NUM "0${NUM}")
+    endforeach()
+    set(${NUMBER} ${NUM} PARENT_SCOPE)
+  endif()
+endfunction()
+pad_number(VERSION_MINOR 3)
+pad_number(VERSION_REVISION 3)
+set(LIBJPEG_TURBO_VERSION_NUMBER ${VERSION_MAJOR}${VERSION_MINOR}${VERSION_REVISION})
 
 string(TIMESTAMP BUILD "opencv-${OPENCV_VERSION}-libjpeg-turbo")
 if(CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -66,8 +97,8 @@ include(CheckCSourceCompiles)
 include(CheckIncludeFiles)
 include(CheckTypeSize)
 
-check_type_size("size_t" SIZEOF_SIZE_T)
-check_type_size("unsigned long" SIZEOF_UNSIGNED_LONG)
+check_type_size("size_t" SIZE_T)
+check_type_size("unsigned long" UNSIGNED_LONG)
 
 if(SIZEOF_SIZE_T EQUAL SIZEOF_UNSIGNED_LONG)
   check_c_source_compiles("int main(int argc, char **argv) { unsigned long a = argc;  return __builtin_ctzl(a); }"
@@ -110,26 +141,27 @@ set(JPEG_LIB_VERSION "${VERSION}-${JPEG_LIB_VERSION}" PARENT_SCOPE)
 
 set(THREAD_LOCAL "")  # WITH_TURBOJPEG is not used
 
+add_definitions(-DNO_GETENV -DNO_PUTENV)
+
 if(MSVC)
   add_definitions(-W3 -wd4996 -wd4018)
 endif()
 
-if(WIN32)
-  configure_file(jconfig.h.win.in jconfig.h)
-else()
-  configure_file(jconfig.h.in jconfig.h)
-endif()
-configure_file(jconfigint.h.in jconfigint.h)
-
 include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/src)
 
-set(JPEG_SOURCES jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c
-        jcicc.c jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c
-        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c jdatadst.c
-        jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdicc.c jdinput.c
-        jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c
-        jdtrans.c jerror.c jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c
-        jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c)
+set(JPEG16_SOURCES jcapistd.c jccolor.c jcdiffct.c jclossls.c jcmainct.c
+    jcprepct.c jcsample.c jdapistd.c jdcolor.c jddiffct.c jdlossls.c jdmainct.c
+    jdpostct.c jdsample.c jutils.c)
+
+set(JPEG12_SOURCES ${JPEG16_SOURCES} jccoefct.c jcdctmgr.c jdcoefct.c
+    jddctmgr.c jdmerge.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c
+    jidctred.c jquant1.c jquant2.c)
+
+set(JPEG_SOURCES ${JPEG12_SOURCES} jcapimin.c jchuff.c jcicc.c jcinit.c
+    jclhuff.c jcmarker.c jcmaster.c jcomapi.c jcparam.c jcphuff.c jctrans.c
+    jdapimin.c jdatadst.c jdatasrc.c jdhuff.c jdicc.c jdinput.c jdlhuff.c
+    jdmarker.c jdmaster.c jdphuff.c jdtrans.c jerror.c jfdctflt.c jmemmgr.c
+    jmemnobs.c jpeg_nbits.c)
 
 if(WITH_ARITH_ENC OR WITH_ARITH_DEC)
   set(JPEG_SOURCES ${JPEG_SOURCES} jaricom.c)
@@ -143,7 +175,7 @@ if(WITH_ARITH_DEC)
   set(JPEG_SOURCES ${JPEG_SOURCES} jdarith.c)
 endif()
 
-if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID STREQUAL "Clang")
+if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
   # Use the maximum optimization level for release builds
   foreach(var CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_RELWITHDEBINFO)
     if(${var} MATCHES "-O2")
@@ -166,6 +198,10 @@ if(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
   endif()
 endif()
 
+include(CheckTypeSize)
+check_type_size("size_t" SIZE_T)
+check_type_size("unsigned long" UNSIGNED_LONG)
+
 if(ENABLE_LIBJPEG_TURBO_SIMD)
   add_subdirectory(src/simd)
   if(NEON_INTRINSICS)
@@ -181,19 +217,28 @@ if(WITH_SIMD)
   if(MSVC_IDE OR XCODE)
     set_source_files_properties(${SIMD_OBJS} PROPERTIES GENERATED 1)
   endif()
-else()
-  add_library(jsimd OBJECT src/jsimd_none.c)
-  set_target_properties(jsimd PROPERTIES FOLDER "3rdparty")
-  if(NOT WIN32 AND (CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED))
-    set_target_properties(jsimd PROPERTIES POSITION_INDEPENDENT_CODE 1)
-  endif()
+  set(SIMD_TARGET_OBJECTS $<TARGET_OBJECTS:simd>)
 endif()
 
+configure_file(jversion.h.in jversion.h)
+configure_file(jconfig.h.in jconfig.h)
+configure_file(jconfigint.h.in jconfigint.h)
+
+ocv_list_add_prefix(JPEG16_SOURCES src/)
+ocv_list_add_prefix(JPEG12_SOURCES src/)
 ocv_list_add_prefix(JPEG_SOURCES src/)
 
 set(JPEG_SOURCES ${JPEG_SOURCES} ${SIMD_OBJS})
 
-add_library(${JPEG_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${JPEG_SOURCES} $<TARGET_OBJECTS:jsimd> ${SIMD_OBJS})
+add_library(jpeg12-static OBJECT ${JPEG12_SOURCES})
+set_property(TARGET jpeg12-static PROPERTY COMPILE_FLAGS
+  "-DBITS_IN_JSAMPLE=12")
+add_library(jpeg16-static OBJECT ${JPEG16_SOURCES})
+set_property(TARGET jpeg16-static PROPERTY COMPILE_FLAGS
+  "-DBITS_IN_JSAMPLE=16")
+add_library(${JPEG_LIBRARY} STATIC ${JPEG_SOURCES} ${SIMD_TARGET_OBJECTS}
+  ${SIMD_OBJS} $<TARGET_OBJECTS:jpeg12-static>
+  $<TARGET_OBJECTS:jpeg16-static>)
 
 set_target_properties(${JPEG_LIBRARY}
   PROPERTIES OUTPUT_NAME ${JPEG_LIBRARY}
diff --git a/3rdparty/libjpeg-turbo/LICENSE.md b/3rdparty/libjpeg-turbo/LICENSE.md
index d753e1d76a..2204864fa1 100644
--- a/3rdparty/libjpeg-turbo/LICENSE.md
+++ b/3rdparty/libjpeg-turbo/LICENSE.md
@@ -1,30 +1,33 @@
 libjpeg-turbo Licenses
 ======================
 
-libjpeg-turbo is covered by three compatible BSD-style open source licenses:
+libjpeg-turbo is covered by two compatible BSD-style open source licenses:
 
 - The IJG (Independent JPEG Group) License, which is listed in
   [README.ijg](README.ijg)
 
-  This license applies to the libjpeg API library and associated programs
-  (any code inherited from libjpeg, and any modifications to that code.)
+  This license applies to the libjpeg API library and associated programs,
+  including any code inherited from libjpeg and any modifications to that
+  code.  Note that the libjpeg-turbo SIMD source code bears the
+  [zlib License](https://opensource.org/licenses/Zlib), but in the context of
+  the overall libjpeg API library, the terms of the zlib License are subsumed
+  by the terms of the IJG License.
 
 - The Modified (3-clause) BSD License, which is listed below
 
-  This license covers the TurboJPEG API library and associated programs, as
-  well as the build system.
-
-- The [zlib License](https://opensource.org/licenses/Zlib)
-
-  This license is a subset of the other two, and it covers the libjpeg-turbo
-  SIMD extensions.
+  This license applies to the TurboJPEG API library and associated programs, as
+  well as the build system.  Note that the TurboJPEG API library wraps the
+  libjpeg API library, so in the context of the overall TurboJPEG API library,
+  both the terms of the IJG License and the terms of the Modified (3-clause)
+  BSD License apply.
 
 
 Complying with the libjpeg-turbo Licenses
 =========================================
 
 This section provides a roll-up of the libjpeg-turbo licensing terms, to the
-best of our understanding.
+best of our understanding.  This is not a license in and of itself.  It is
+intended solely for clarification.
 
 1.  If you are distributing a modified version of the libjpeg-turbo source,
     then:
@@ -38,7 +41,7 @@ best of our understanding.
         - Clauses 1 and 3 of the zlib License
 
     2.  You must add your own copyright notice to the header of each source
-        file you modified, so others can tell that you modified that file (if
+        file you modified, so others can tell that you modified that file.  (If
         there is not an existing copyright header in that file, then you can
         simply add a notice stating that you modified the file.)
 
@@ -91,7 +94,7 @@ best of our understanding.
 The Modified (3-clause) BSD License
 ===================================
 
-Copyright (C)2009-2022 D. R. Commander.  All Rights Reserved.<br>
+Copyright (C)2009-2023 D. R. Commander.  All Rights Reserved.<br>
 Copyright (C)2015 Viktor Szathmáry.  All Rights Reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -119,8 +122,8 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
 
-Why Three Licenses?
-===================
+Why Two Licenses?
+=================
 
 The zlib License could have been used instead of the Modified (3-clause) BSD
 License, and since the IJG License effectively subsumes the distribution
diff --git a/3rdparty/libjpeg-turbo/README.ijg b/3rdparty/libjpeg-turbo/README.ijg
index 9453c19501..8f3768265f 100644
--- a/3rdparty/libjpeg-turbo/README.ijg
+++ b/3rdparty/libjpeg-turbo/README.ijg
@@ -43,7 +43,7 @@ User documentation:
   change.log        Version-to-version change highlights.
 Programmer and internal documentation:
   libjpeg.txt       How to use the JPEG library in your own programs.
-  example.txt       Sample code for calling the JPEG library.
+  example.c         Sample code for calling the JPEG library.
   structure.txt     Overview of the JPEG library's internal structure.
   coderules.txt     Coding style rules --- please read if you contribute code.
 
@@ -68,17 +68,17 @@ other abrupt features may not compress well with JPEG, and a higher JPEG
 quality may have to be used to avoid visible compression artifacts with such
 images.
 
-JPEG is lossy, meaning that the output pixels are not necessarily identical to
-the input pixels.  However, on photographic content and other "smooth" images,
-very good compression ratios can be obtained with no visible compression
-artifacts, and extremely high compression ratios are possible if you are
-willing to sacrifice image quality (by reducing the "quality" setting in the
-compressor.)
+JPEG is normally lossy, meaning that the output pixels are not necessarily
+identical to the input pixels.  However, on photographic content and other
+"smooth" images, very good compression ratios can be obtained with no visible
+compression artifacts, and extremely high compression ratios are possible if
+you are willing to sacrifice image quality (by reducing the "quality" setting
+in the compressor.)
 
-This software implements JPEG baseline, extended-sequential, and progressive
-compression processes.  Provision is made for supporting all variants of these
-processes, although some uncommon parameter settings aren't implemented yet.
-We have made no provision for supporting the hierarchical or lossless
+This software implements JPEG baseline, extended-sequential, progressive, and
+lossless compression processes.  Provision is made for supporting all variants
+of these processes, although some uncommon parameter settings aren't
+implemented yet.  We have made no provision for supporting the hierarchical
 processes defined in the standard.
 
 We provide a set of library routines for reading and writing JPEG image files,
@@ -241,7 +241,7 @@ This software implements ITU T.81 | ISO/IEC 10918 with some extensions from
 ITU T.871 | ISO/IEC 10918-5 (JPEG File Interchange Format-- see REFERENCES).
 Informally, the term "JPEG image" or "JPEG file" most often refers to JFIF or
 a subset thereof, but there are other formats containing the name "JPEG" that
-are incompatible with the DCT-based JPEG standard or with JFIF (for instance,
+are incompatible with the original JPEG standard or with JFIF (for instance,
 JPEG 2000 and JPEG XR).  This software therefore does not support these
 formats.  Indeed, one of the original reasons for developing this free software
 was to help force convergence on a common, interoperable format standard for
diff --git a/3rdparty/libjpeg-turbo/README.md b/3rdparty/libjpeg-turbo/README.md
index 01e391ea7c..923e61d231 100644
--- a/3rdparty/libjpeg-turbo/README.md
+++ b/3rdparty/libjpeg-turbo/README.md
@@ -21,7 +21,26 @@ derivative of libjpeg v6b developed by Miyasaka Masaru.  The TigerVNC and
 VirtualGL projects made numerous enhancements to the codec in 2009, and in
 early 2010, libjpeg-turbo spun off into an independent project, with the goal
 of making high-speed JPEG compression/decompression technology available to a
-broader range of users and developers.
+broader range of users and developers.  libjpeg-turbo is an ISO/IEC and ITU-T
+reference implementation of the JPEG standard.
+
+More information about libjpeg-turbo can be found at
+<https://libjpeg-turbo.org>.
+
+
+Funding
+=======
+
+libjpeg-turbo is an independent open source project, but we rely on patronage
+and funded development in order to maintain that independence.  The easiest way
+to ensure that libjpeg-turbo remains community-focused and free of any one
+organization's agenda is to
+[sponsor our project through GitHub](https://github.com/sponsors/libjpeg-turbo).
+All sponsorship money goes directly toward funding the labor necessary to
+maintain libjpeg-turbo, support the user community, and implement bug fixes and
+strategically important features.
+
+[![Sponsor libjpeg-turbo](https://img.shields.io/github/sponsors/libjpeg-turbo?label=Sponsor&logo=GitHub)](https://github.com/sponsors/libjpeg-turbo)
 
 
 License
@@ -245,16 +264,6 @@ programs that need them, without breaking ABI compatibility for programs that
 don't, and it allows those functions to be provided in the "official"
 libjpeg-turbo binaries.
 
-Those who are concerned about maintaining strict conformance with the libjpeg
-v6b or v7 API can pass an argument of `-DWITH_MEM_SRCDST=0` to `cmake` prior to
-building libjpeg-turbo.  This will restore the pre-1.3 behavior, in which
-`jpeg_mem_src()` and `jpeg_mem_dest()` are only included when emulating the
-libjpeg v8 API/ABI.
-
-On Un*x systems, including the in-memory source/destination managers changes
-the dynamic library version from 62.2.0 to 62.3.0 if using libjpeg v6b API/ABI
-emulation and from 7.2.0 to 7.3.0 if using libjpeg v7 API/ABI emulation.
-
 Note that, on most Un*x systems, the dynamic linker will not look for a
 function in a library until that function is actually used.  Thus, if a program
 is built against libjpeg-turbo 1.3+ and uses `jpeg_mem_src()` or
@@ -274,30 +283,35 @@ Mathematical Compatibility
 ==========================
 
 For the most part, libjpeg-turbo should produce identical output to libjpeg
-v6b.  The one exception to this is when using the floating point DCT/IDCT, in
-which case the outputs of libjpeg v6b and libjpeg-turbo can differ for the
-following reasons:
+v6b.  There are two exceptions:
 
-- The SSE/SSE2 floating point DCT implementation in libjpeg-turbo is ever so
-  slightly more accurate than the implementation in libjpeg v6b, but not by
-  any amount perceptible to human vision (generally in the range of 0.01 to
-  0.08 dB gain in PNSR.)
+1. When decompressing a JPEG image that uses 4:4:0 chrominance subsampling, the
+outputs of libjpeg v6b and libjpeg-turbo can differ because libjpeg-turbo
+implements a "fancy" (smooth) 4:4:0 upsampling algorithm and libjpeg did not.
 
-- When not using the SIMD extensions, libjpeg-turbo uses the more accurate
-  (and slightly faster) floating point IDCT algorithm introduced in libjpeg
-  v8a as opposed to the algorithm used in libjpeg v6b.  It should be noted,
-  however, that this algorithm basically brings the accuracy of the floating
-  point IDCT in line with the accuracy of the accurate integer IDCT.  The
-  floating point DCT/IDCT algorithms are mainly a legacy feature, and they do
-  not produce significantly more accuracy than the accurate integer algorithms
-  (to put numbers on this, the typical difference in PNSR between the two
-  algorithms is less than 0.10 dB, whereas changing the quality level by 1 in
-  the upper range of the quality scale is typically more like a 1.0 dB
-  difference.)
+2. When using the floating point DCT/IDCT, the outputs of libjpeg v6b and
+libjpeg-turbo can differ for the following reasons:
 
-- If the floating point algorithms in libjpeg-turbo are not implemented using
-  SIMD instructions on a particular platform, then the accuracy of the
-  floating point DCT/IDCT can depend on the compiler settings.
+    - The SSE/SSE2 floating point DCT implementation in libjpeg-turbo is ever
+      so slightly more accurate than the implementation in libjpeg v6b, but not
+      by any amount perceptible to human vision (generally in the range of 0.01
+      to 0.08 dB gain in PNSR.)
+
+    - When not using the SIMD extensions, libjpeg-turbo uses the more accurate
+      (and slightly faster) floating point IDCT algorithm introduced in libjpeg
+      v8a as opposed to the algorithm used in libjpeg v6b.  It should be noted,
+      however, that this algorithm basically brings the accuracy of the
+      floating point IDCT in line with the accuracy of the accurate integer
+      IDCT.  The floating point DCT/IDCT algorithms are mainly a legacy
+      feature, and they do not produce significantly more accuracy than the
+      accurate integer algorithms.  (To put numbers on this, the typical
+      difference in PNSR between the two algorithms is less than 0.10 dB,
+      whereas changing the quality level by 1 in the upper range of the quality
+      scale is typically more like a 1.0 dB difference.)
+
+    - If the floating point algorithms in libjpeg-turbo are not implemented
+      using SIMD instructions on a particular platform, then the accuracy of
+      the floating point DCT/IDCT can depend on the compiler settings.
 
 While libjpeg-turbo does emulate the libjpeg v8 API/ABI, under the hood it is
 still using the same algorithms as libjpeg v6b, so there are several specific
diff --git a/3rdparty/libjpeg-turbo/jconfig.h.in b/3rdparty/libjpeg-turbo/jconfig.h.in
index d4284d97b8..6cb82962ff 100644
--- a/3rdparty/libjpeg-turbo/jconfig.h.in
+++ b/3rdparty/libjpeg-turbo/jconfig.h.in
@@ -9,60 +9,52 @@
 /* libjpeg-turbo version in integer form */
 #define LIBJPEG_TURBO_VERSION_NUMBER  @LIBJPEG_TURBO_VERSION_NUMBER@
 
-/* Support arithmetic encoding */
+/* Support arithmetic encoding when using 8-bit samples */
 #cmakedefine C_ARITH_CODING_SUPPORTED 1
 
-/* Support arithmetic decoding */
+/* Support arithmetic decoding when using 8-bit samples */
 #cmakedefine D_ARITH_CODING_SUPPORTED 1
 
 /* Support in-memory source/destination managers */
-#cmakedefine MEM_SRCDST_SUPPORTED 1
+#define MEM_SRCDST_SUPPORTED  1
 
-/* Use accelerated SIMD routines. */
+/* Use accelerated SIMD routines when using 8-bit samples */
 #cmakedefine WITH_SIMD 1
 
-/*
- * Define BITS_IN_JSAMPLE as either
- *   8   for 8-bit sample values (the usual setting)
- *   12  for 12-bit sample values
- * Only 8 and 12 are legal data precisions for lossy JPEG according to the
- * JPEG standard, and the IJG code does not support anything else!
- * We do not support run-time selection of data precision, sorry.
+/* This version of libjpeg-turbo supports run-time selection of data precision,
+ * so BITS_IN_JSAMPLE is no longer used to specify the data precision at build
+ * time.  However, some downstream software expects the macro to be defined.
+ * Since 12-bit data precision is an opt-in feature that requires explicitly
+ * calling 12-bit-specific libjpeg API functions and using 12-bit-specific data
+ * types, the unmodified portion of the libjpeg API still behaves as if it were
+ * built for 8-bit precision, and JSAMPLE is still literally an 8-bit data
+ * type.  Thus, it is correct to define BITS_IN_JSAMPLE to 8 here.
  */
+#ifndef BITS_IN_JSAMPLE
+#define BITS_IN_JSAMPLE  8
+#endif
 
-#define BITS_IN_JSAMPLE  @BITS_IN_JSAMPLE@      /* use 8 or 12 */
+#ifdef _WIN32
 
-/* Define to 1 if you have the <locale.h> header file. */
-#cmakedefine HAVE_LOCALE_H 1
+#undef RIGHT_SHIFT_IS_UNSIGNED
 
-/* Define to 1 if you have the <stddef.h> header file. */
-#cmakedefine HAVE_STDDEF_H 1
+/* Define "boolean" as unsigned char, not int, per Windows custom */
+#ifndef __RPCNDR_H__            /* don't conflict if rpcndr.h already read */
+typedef unsigned char boolean;
+#endif
+#define HAVE_BOOLEAN            /* prevent jmorecfg.h from redefining it */
 
-/* Define to 1 if you have the <stdlib.h> header file. */
-#cmakedefine HAVE_STDLIB_H 1
+/* Define "INT32" as int, not long, per Windows custom */
+#if !(defined(_BASETSD_H_) || defined(_BASETSD_H))   /* don't conflict if basetsd.h already read */
+typedef short INT16;
+typedef signed int INT32;
+#endif
+#define XMD_H                   /* prevent jmorecfg.h from redefining it */
 
-/* Define if you need to include <sys/types.h> to get size_t. */
-#cmakedefine NEED_SYS_TYPES_H 1
-
-/* Define if you have BSD-like bzero and bcopy in <strings.h> rather than
-   memset/memcpy in <string.h>. */
-#cmakedefine NEED_BSD_STRINGS 1
-
-/* Define to 1 if the system has the type `unsigned char'. */
-#cmakedefine HAVE_UNSIGNED_CHAR 1
-
-/* Define to 1 if the system has the type `unsigned short'. */
-#cmakedefine HAVE_UNSIGNED_SHORT 1
-
-/* Compiler does not support pointers to undefined structures. */
-#cmakedefine INCOMPLETE_TYPES_BROKEN 1
+#else
 
 /* Define if your (broken) compiler shifts signed values as if they were
    unsigned. */
 #cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1
 
-/* Define to empty if `const' does not conform to ANSI C. */
-/* #undef const */
-
-/* Define to `unsigned int' if <sys/types.h> does not define. */
-/* #undef size_t */
+#endif
diff --git a/3rdparty/libjpeg-turbo/jconfig.h.win.in b/3rdparty/libjpeg-turbo/jconfig.h.win.in
deleted file mode 100644
index 13cceef01d..0000000000
--- a/3rdparty/libjpeg-turbo/jconfig.h.win.in
+++ /dev/null
@@ -1,33 +0,0 @@
-#define JPEG_LIB_VERSION  @JPEG_LIB_VERSION@
-#define LIBJPEG_TURBO_VERSION  @VERSION@
-#define LIBJPEG_TURBO_VERSION_NUMBER  @LIBJPEG_TURBO_VERSION_NUMBER@
-
-#cmakedefine C_ARITH_CODING_SUPPORTED
-#cmakedefine D_ARITH_CODING_SUPPORTED
-#cmakedefine MEM_SRCDST_SUPPORTED
-#cmakedefine WITH_SIMD
-
-#define BITS_IN_JSAMPLE  @BITS_IN_JSAMPLE@      /* use 8 or 12 */
-
-#define HAVE_STDDEF_H
-#define HAVE_STDLIB_H
-#undef NEED_SYS_TYPES_H
-#undef NEED_BSD_STRINGS
-
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-#undef INCOMPLETE_TYPES_BROKEN
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-/* Define "boolean" as unsigned char, not int, per Windows custom */
-#ifndef __RPCNDR_H__            /* don't conflict if rpcndr.h already read */
-typedef unsigned char boolean;
-#endif
-#define HAVE_BOOLEAN            /* prevent jmorecfg.h from redefining it */
-
-/* Define "INT32" as int, not long, per Windows custom */
-#if !(defined(_BASETSD_H_) || defined(_BASETSD_H))   /* don't conflict if basetsd.h already read */
-typedef short INT16;
-typedef signed int INT32;
-#endif
-#define XMD_H                   /* prevent jmorecfg.h from redefining it */
diff --git a/3rdparty/libjpeg-turbo/jconfigint.h.in b/3rdparty/libjpeg-turbo/jconfigint.h.in
index a46979df1e..5c14e32a1d 100644
--- a/3rdparty/libjpeg-turbo/jconfigint.h.in
+++ b/3rdparty/libjpeg-turbo/jconfigint.h.in
@@ -1,19 +1,14 @@
 /* libjpeg-turbo build number */
 #define BUILD  "@BUILD@"
 
+/* How to hide global symbols. */
+#define HIDDEN  @HIDDEN@
+
 /* Compiler's inline keyword */
 #undef inline
 
 /* How to obtain function inlining. */
-#ifndef INLINE
-#if defined(__GNUC__)
-#define INLINE inline __attribute__((always_inline))
-#elif defined(_MSC_VER)
-#define INLINE __forceinline
-#else
-#define INLINE
-#endif
-#endif
+#define INLINE  @INLINE@
 
 /* How to obtain thread-local storage */
 #define THREAD_LOCAL  @THREAD_LOCAL@
@@ -25,7 +20,7 @@
 #define VERSION  "@VERSION@"
 
 /* The size of `size_t', as computed by sizeof. */
-#define SIZEOF_SIZE_T  @SIZEOF_SIZE_T@
+#define SIZEOF_SIZE_T  @SIZE_T@
 
 /* Define if your compiler has __builtin_ctzl() and sizeof(unsigned long) == sizeof(size_t). */
 #cmakedefine HAVE_BUILTIN_CTZL
@@ -50,3 +45,32 @@
 #else
 #define FALLTHROUGH
 #endif
+
+/*
+ * Define BITS_IN_JSAMPLE as either
+ *   8   for 8-bit sample values (the usual setting)
+ *   12  for 12-bit sample values
+ * Only 8 and 12 are legal data precisions for lossy JPEG according to the
+ * JPEG standard, and the IJG code does not support anything else!
+ */
+
+#ifndef BITS_IN_JSAMPLE
+#define BITS_IN_JSAMPLE  8      /* use 8 or 12 */
+#endif
+
+#undef C_ARITH_CODING_SUPPORTED
+#undef D_ARITH_CODING_SUPPORTED
+#undef WITH_SIMD
+
+#if BITS_IN_JSAMPLE == 8
+
+/* Support arithmetic encoding */
+#cmakedefine C_ARITH_CODING_SUPPORTED 1
+
+/* Support arithmetic decoding */
+#cmakedefine D_ARITH_CODING_SUPPORTED 1
+
+/* Use accelerated SIMD routines. */
+#cmakedefine WITH_SIMD 1
+
+#endif
diff --git a/3rdparty/libjpeg-turbo/src/jversion.h b/3rdparty/libjpeg-turbo/jversion.h.in
similarity index 79%
rename from 3rdparty/libjpeg-turbo/src/jversion.h
rename to 3rdparty/libjpeg-turbo/jversion.h.in
index 2ab534af41..fc0ce3e09e 100644
--- a/3rdparty/libjpeg-turbo/src/jversion.h
+++ b/3rdparty/libjpeg-turbo/jversion.h.in
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2012-2021, D. R. Commander.
+ * Copyright (C) 2010, 2012-2024, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -36,19 +36,21 @@
  *   their code
  */
 
-#define JCOPYRIGHT \
-  "Copyright (C) 2009-2021 D. R. Commander\n" \
+#define JCOPYRIGHT1 \
+  "Copyright (C) 2009-2024 D. R. Commander\n" \
   "Copyright (C) 2015, 2020 Google, Inc.\n" \
   "Copyright (C) 2019-2020 Arm Limited\n" \
   "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
   "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
-  "Copyright (C) 2015 Intel Corporation\n" \
+  "Copyright (C) 2015 Intel Corporation\n"
+#define JCOPYRIGHT2 \
   "Copyright (C) 2013-2014 Linaro Limited\n" \
   "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
   "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
   "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
   "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
-  "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding"
+  "Copyright (C) 1999 Ken Murchison\n" \
+  "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding\n"
 
 #define JCOPYRIGHT_SHORT \
-  "Copyright (C) 1991-2021 The libjpeg-turbo Project and many others"
+  "Copyright (C) @COPYRIGHT_YEAR@ The libjpeg-turbo Project and many others"
diff --git a/3rdparty/libjpeg-turbo/src/cjpeg.c b/3rdparty/libjpeg-turbo/src/cjpeg.c
new file mode 100644
index 0000000000..44c39bec20
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/cjpeg.c
@@ -0,0 +1,841 @@
+/*
+ * cjpeg.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2003-2011 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2013-2014, 2017, 2019-2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains a command-line user interface for the JPEG compressor.
+ * It should work on any system with Unix- or MS-DOS-style command lines.
+ *
+ * Two different command line styles are permitted, depending on the
+ * compile-time switch TWO_FILE_COMMANDLINE:
+ *      cjpeg [options]  inputfile outputfile
+ *      cjpeg [options]  [inputfile]
+ * In the second style, output is always to standard output, which you'd
+ * normally redirect to a file or pipe to some other program.  Input is
+ * either from a named file or from standard input (typically redirected).
+ * The second style is convenient on Unix but is unhelpful on systems that
+ * don't support pipes.  Also, you MUST use the first style if your system
+ * doesn't do binary I/O to stdin/stdout.
+ * To simplify script writing, the "-outfile" switch is provided.  The syntax
+ *      cjpeg [options]  -outfile outputfile  inputfile
+ * works regardless of which command line style is used.
+ */
+
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+#ifdef CJPEG_FUZZER
+#define JPEG_INTERNALS
+#endif
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include "jversion.h"           /* for version message */
+#include "jconfigint.h"
+
+
+/* Create the add-on message string table. */
+
+#define JMESSAGE(code, string)  string,
+
+static const char * const cdjpeg_message_table[] = {
+#include "cderror.h"
+  NULL
+};
+
+
+/*
+ * This routine determines what format the input file is,
+ * and selects the appropriate input-reading module.
+ *
+ * To determine which family of input formats the file belongs to,
+ * we may look only at the first byte of the file, since C does not
+ * guarantee that more than one character can be pushed back with ungetc.
+ * Looking at additional bytes would require one of these approaches:
+ *     1) assume we can fseek() the input file (fails for piped input);
+ *     2) assume we can push back more than one character (works in
+ *        some C implementations, but unportable);
+ *     3) provide our own buffering (breaks input readers that want to use
+ *        stdio directly);
+ * or  4) don't put back the data, and modify the input_init methods to assume
+ *        they start reading after the start of file.
+ * #1 is attractive for MS-DOS but is untenable on Unix.
+ *
+ * The most portable solution for file types that can't be identified by their
+ * first byte is to make the user tell us what they are.  This is also the
+ * only approach for "raw" file types that contain only arbitrary values.
+ * We presently apply this method for Targa files.  Most of the time Targa
+ * files start with 0x00, so we recognize that case.  Potentially, however,
+ * a Targa file could start with any byte value (byte 0 is the length of the
+ * seldom-used ID field), so we provide a switch to force Targa input mode.
+ */
+
+static boolean is_targa;        /* records user -targa switch */
+
+
+LOCAL(cjpeg_source_ptr)
+select_file_type(j_compress_ptr cinfo, FILE *infile)
+{
+  int c;
+
+  if (is_targa) {
+#ifdef TARGA_SUPPORTED
+    return jinit_read_targa(cinfo);
+#else
+    ERREXIT(cinfo, JERR_TGA_NOTCOMP);
+#endif
+  }
+
+  if ((c = getc(infile)) == EOF)
+    ERREXIT(cinfo, JERR_INPUT_EMPTY);
+  if (ungetc(c, infile) == EOF)
+    ERREXIT(cinfo, JERR_UNGETC_FAILED);
+
+  switch (c) {
+#ifdef BMP_SUPPORTED
+  case 'B':
+    return jinit_read_bmp(cinfo, TRUE);
+#endif
+#ifdef GIF_SUPPORTED
+  case 'G':
+    if (cinfo->data_precision == 16) {
+#ifdef C_LOSSLESS_SUPPORTED
+      return j16init_read_gif(cinfo);
+#else
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+      break;
+#endif
+    } else if (cinfo->data_precision == 12)
+      return j12init_read_gif(cinfo);
+    else
+      return jinit_read_gif(cinfo);
+#endif
+#ifdef PPM_SUPPORTED
+  case 'P':
+    if (cinfo->data_precision == 16) {
+#ifdef C_LOSSLESS_SUPPORTED
+      return j16init_read_ppm(cinfo);
+#else
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+      break;
+#endif
+    } else if (cinfo->data_precision == 12)
+      return j12init_read_ppm(cinfo);
+    else
+      return jinit_read_ppm(cinfo);
+#endif
+#ifdef TARGA_SUPPORTED
+  case 0x00:
+    return jinit_read_targa(cinfo);
+#endif
+  default:
+    ERREXIT(cinfo, JERR_UNKNOWN_FORMAT);
+    break;
+  }
+
+  return NULL;                  /* suppress compiler warnings */
+}
+
+
+/*
+ * Argument-parsing code.
+ * The switch parser is designed to be useful with DOS-style command line
+ * syntax, ie, intermixed switches and file names, where only the switches
+ * to the left of a given file name affect processing of that file.
+ * The main program in this file doesn't actually use this capability...
+ */
+
+
+static const char *progname;    /* program name for error messages */
+static char *icc_filename;      /* for -icc switch */
+static char *outfilename;       /* for -outfile switch */
+static boolean memdst;          /* for -memdst switch */
+static boolean report;          /* for -report switch */
+static boolean strict;          /* for -strict switch */
+
+
+#ifdef CJPEG_FUZZER
+
+#include <setjmp.h>
+
+struct my_error_mgr {
+  struct jpeg_error_mgr pub;
+  jmp_buf setjmp_buffer;
+};
+
+void my_error_exit(j_common_ptr cinfo)
+{
+  struct my_error_mgr *myerr = (struct my_error_mgr *)cinfo->err;
+
+  longjmp(myerr->setjmp_buffer, 1);
+}
+
+static void my_emit_message_fuzzer(j_common_ptr cinfo, int msg_level)
+{
+  if (msg_level < 0)
+    cinfo->err->num_warnings++;
+}
+
+#define HANDLE_ERROR() { \
+  if (cinfo.global_state > CSTATE_START) { \
+    if (memdst && outbuffer) \
+      (*cinfo.dest->term_destination) (&cinfo); \
+    jpeg_abort_compress(&cinfo); \
+  } \
+  jpeg_destroy_compress(&cinfo); \
+  if (input_file != stdin && input_file != NULL) \
+    fclose(input_file); \
+  if (memdst) \
+    free(outbuffer); \
+  return EXIT_FAILURE; \
+}
+
+#endif
+
+
+LOCAL(void)
+usage(void)
+/* complain about bad command line */
+{
+  fprintf(stderr, "usage: %s [switches] ", progname);
+#ifdef TWO_FILE_COMMANDLINE
+  fprintf(stderr, "inputfile outputfile\n");
+#else
+  fprintf(stderr, "[inputfile]\n");
+#endif
+
+  fprintf(stderr, "Switches (names may be abbreviated):\n");
+  fprintf(stderr, "  -quality N[,...]   Compression quality (0..100; 5-95 is most useful range,\n");
+  fprintf(stderr, "                     default is 75)\n");
+  fprintf(stderr, "  -grayscale     Create monochrome JPEG file\n");
+  fprintf(stderr, "  -rgb           Create RGB JPEG file\n");
+#ifdef ENTROPY_OPT_SUPPORTED
+  fprintf(stderr, "  -optimize      Optimize Huffman table (smaller file, but slow compression)\n");
+#endif
+#ifdef C_PROGRESSIVE_SUPPORTED
+  fprintf(stderr, "  -progressive   Create progressive JPEG file\n");
+#endif
+#ifdef TARGA_SUPPORTED
+  fprintf(stderr, "  -targa         Input file is Targa format (usually not needed)\n");
+#endif
+  fprintf(stderr, "Switches for advanced users:\n");
+  fprintf(stderr, "  -precision N   Create JPEG file with N-bit data precision\n");
+#ifdef C_LOSSLESS_SUPPORTED
+  fprintf(stderr, "                 (N is 8, 12, or 16; default is 8; if N is 16, then -lossless\n");
+  fprintf(stderr, "                 must also be specified)\n");
+#else
+  fprintf(stderr, "                 (N is 8 or 12; default is 8)\n");
+#endif
+#ifdef C_LOSSLESS_SUPPORTED
+  fprintf(stderr, "  -lossless psv[,Pt]  Create lossless JPEG file\n");
+#endif
+#ifdef C_ARITH_CODING_SUPPORTED
+  fprintf(stderr, "  -arithmetic    Use arithmetic coding\n");
+#endif
+#ifdef DCT_ISLOW_SUPPORTED
+  fprintf(stderr, "  -dct int       Use accurate integer DCT method%s\n",
+          (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+  fprintf(stderr, "  -dct fast      Use less accurate integer DCT method [legacy feature]%s\n",
+          (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  fprintf(stderr, "  -dct float     Use floating-point DCT method [legacy feature]%s\n",
+          (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
+#endif
+  fprintf(stderr, "  -icc FILE      Embed ICC profile contained in FILE\n");
+  fprintf(stderr, "  -restart N     Set restart interval in rows, or in blocks with B\n");
+#ifdef INPUT_SMOOTHING_SUPPORTED
+  fprintf(stderr, "  -smooth N      Smooth dithered input (N=1..100 is strength)\n");
+#endif
+  fprintf(stderr, "  -maxmemory N   Maximum memory to use (in kbytes)\n");
+  fprintf(stderr, "  -outfile name  Specify name for output file\n");
+  fprintf(stderr, "  -memdst        Compress to memory instead of file (useful for benchmarking)\n");
+  fprintf(stderr, "  -report        Report compression progress\n");
+  fprintf(stderr, "  -strict        Treat all warnings as fatal\n");
+  fprintf(stderr, "  -verbose  or  -debug   Emit debug output\n");
+  fprintf(stderr, "  -version       Print version information and exit\n");
+  fprintf(stderr, "Switches for wizards:\n");
+  fprintf(stderr, "  -baseline      Force baseline quantization tables\n");
+  fprintf(stderr, "  -qtables FILE  Use quantization tables given in FILE\n");
+  fprintf(stderr, "  -qslots N[,...]    Set component quantization tables\n");
+  fprintf(stderr, "  -sample HxV[,...]  Set component sampling factors\n");
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+  fprintf(stderr, "  -scans FILE    Create multi-scan JPEG per script FILE\n");
+#endif
+  exit(EXIT_FAILURE);
+}
+
+
+LOCAL(int)
+parse_switches(j_compress_ptr cinfo, int argc, char **argv,
+               int last_file_arg_seen, boolean for_real)
+/* Parse optional switches.
+ * Returns argv[] index of first file-name argument (== argc if none).
+ * Any file names with indexes <= last_file_arg_seen are ignored;
+ * they have presumably been processed in a previous iteration.
+ * (Pass 0 for last_file_arg_seen on the first or only iteration.)
+ * for_real is FALSE on the first (dummy) pass; we may skip any expensive
+ * processing.
+ */
+{
+  int argn;
+  char *arg;
+#ifdef C_LOSSLESS_SUPPORTED
+  int psv, pt = 0;
+#endif
+  boolean force_baseline;
+  boolean simple_progressive;
+  char *qualityarg = NULL;      /* saves -quality parm if any */
+  char *qtablefile = NULL;      /* saves -qtables filename if any */
+  char *qslotsarg = NULL;       /* saves -qslots parm if any */
+  char *samplearg = NULL;       /* saves -sample parm if any */
+  char *scansarg = NULL;        /* saves -scans parm if any */
+
+  /* Set up default JPEG parameters. */
+
+  force_baseline = FALSE;       /* by default, allow 16-bit quantizers */
+  simple_progressive = FALSE;
+  is_targa = FALSE;
+  icc_filename = NULL;
+  outfilename = NULL;
+  memdst = FALSE;
+  report = FALSE;
+  strict = FALSE;
+  cinfo->err->trace_level = 0;
+
+  /* Scan command line options, adjust parameters */
+
+  for (argn = 1; argn < argc; argn++) {
+    arg = argv[argn];
+    if (*arg != '-') {
+      /* Not a switch, must be a file name argument */
+      if (argn <= last_file_arg_seen) {
+        outfilename = NULL;     /* -outfile applies to just one input file */
+        continue;               /* ignore this name if previously processed */
+      }
+      break;                    /* else done parsing switches */
+    }
+    arg++;                      /* advance past switch marker character */
+
+    if (keymatch(arg, "arithmetic", 1)) {
+      /* Use arithmetic coding. */
+#ifdef C_ARITH_CODING_SUPPORTED
+      cinfo->arith_code = TRUE;
+#else
+      fprintf(stderr, "%s: sorry, arithmetic coding not supported\n",
+              progname);
+      exit(EXIT_FAILURE);
+#endif
+
+    } else if (keymatch(arg, "baseline", 1)) {
+      /* Force baseline-compatible output (8-bit quantizer values). */
+      force_baseline = TRUE;
+
+    } else if (keymatch(arg, "dct", 2)) {
+      /* Select DCT algorithm. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (keymatch(argv[argn], "int", 1)) {
+        cinfo->dct_method = JDCT_ISLOW;
+      } else if (keymatch(argv[argn], "fast", 2)) {
+        cinfo->dct_method = JDCT_IFAST;
+      } else if (keymatch(argv[argn], "float", 2)) {
+        cinfo->dct_method = JDCT_FLOAT;
+      } else
+        usage();
+
+    } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
+      /* Enable debug printouts. */
+      /* On first -d, print version identification */
+      static boolean printed_version = FALSE;
+
+      if (!printed_version) {
+        fprintf(stderr, "%s version %s (build %s)\n",
+                PACKAGE_NAME, VERSION, BUILD);
+        fprintf(stderr, JCOPYRIGHT1);
+        fprintf(stderr, JCOPYRIGHT2 "\n");
+        fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n",
+                JVERSION);
+        printed_version = TRUE;
+      }
+      cinfo->err->trace_level++;
+
+    } else if (keymatch(arg, "version", 4)) {
+      fprintf(stderr, "%s version %s (build %s)\n",
+              PACKAGE_NAME, VERSION, BUILD);
+      exit(EXIT_SUCCESS);
+
+    } else if (keymatch(arg, "grayscale", 2) ||
+               keymatch(arg, "greyscale", 2)) {
+      /* Force a monochrome JPEG file to be generated. */
+      jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
+
+    } else if (keymatch(arg, "rgb", 3)) {
+      /* Force an RGB JPEG file to be generated. */
+      jpeg_set_colorspace(cinfo, JCS_RGB);
+
+    } else if (keymatch(arg, "icc", 1)) {
+      /* Set ICC filename. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      icc_filename = argv[argn];
+
+    } else if (keymatch(arg, "lossless", 1)) {
+      /* Enable lossless mode. */
+#ifdef C_LOSSLESS_SUPPORTED
+      char ch = ',', *ptr;
+
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (sscanf(argv[argn], "%d%c", &psv, &ch) < 1 || ch != ',')
+        usage();
+      ptr = argv[argn];
+      while (*ptr && *ptr++ != ','); /* advance to next segment of arg
+                                        string */
+      if (*ptr)
+        sscanf(ptr, "%d", &pt);
+      jpeg_enable_lossless(cinfo, psv, pt);
+#else
+      fprintf(stderr, "%s: sorry, lossless output was not compiled\n",
+              progname);
+      exit(EXIT_FAILURE);
+#endif
+
+    } else if (keymatch(arg, "maxmemory", 3)) {
+      /* Maximum memory in Kb (or Mb with 'm'). */
+      long lval;
+      char ch = 'x';
+
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
+        usage();
+      if (ch == 'm' || ch == 'M')
+        lval *= 1000L;
+      cinfo->mem->max_memory_to_use = lval * 1000L;
+
+    } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) {
+      /* Enable entropy parm optimization. */
+#ifdef ENTROPY_OPT_SUPPORTED
+      cinfo->optimize_coding = TRUE;
+#else
+      fprintf(stderr, "%s: sorry, entropy optimization was not compiled in\n",
+              progname);
+      exit(EXIT_FAILURE);
+#endif
+
+    } else if (keymatch(arg, "outfile", 4)) {
+      /* Set output file name. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      outfilename = argv[argn]; /* save it away for later use */
+
+    } else if (keymatch(arg, "precision", 3)) {
+      /* Set data precision. */
+      int val;
+
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (sscanf(argv[argn], "%d", &val) != 1)
+        usage();
+#ifdef C_LOSSLESS_SUPPORTED
+      if (val != 8 && val != 12 && val != 16)
+#else
+      if (val != 8 && val != 12)
+#endif
+        usage();
+      cinfo->data_precision = val;
+
+    } else if (keymatch(arg, "progressive", 3)) {
+      /* Select simple progressive mode. */
+#ifdef C_PROGRESSIVE_SUPPORTED
+      simple_progressive = TRUE;
+      /* We must postpone execution until num_components is known. */
+#else
+      fprintf(stderr, "%s: sorry, progressive output was not compiled in\n",
+              progname);
+      exit(EXIT_FAILURE);
+#endif
+
+    } else if (keymatch(arg, "memdst", 2)) {
+      /* Use in-memory destination manager */
+      memdst = TRUE;
+
+    } else if (keymatch(arg, "quality", 1)) {
+      /* Quality ratings (quantization table scaling factors). */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      qualityarg = argv[argn];
+
+    } else if (keymatch(arg, "qslots", 2)) {
+      /* Quantization table slot numbers. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      qslotsarg = argv[argn];
+      /* Must delay setting qslots until after we have processed any
+       * colorspace-determining switches, since jpeg_set_colorspace sets
+       * default quant table numbers.
+       */
+
+    } else if (keymatch(arg, "qtables", 2)) {
+      /* Quantization tables fetched from file. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      qtablefile = argv[argn];
+      /* We postpone actually reading the file in case -quality comes later. */
+
+    } else if (keymatch(arg, "report", 3)) {
+      report = TRUE;
+
+    } else if (keymatch(arg, "restart", 1)) {
+      /* Restart interval in MCU rows (or in MCUs with 'b'). */
+      long lval;
+      char ch = 'x';
+
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
+        usage();
+      if (lval < 0 || lval > 65535L)
+        usage();
+      if (ch == 'b' || ch == 'B') {
+        cinfo->restart_interval = (unsigned int)lval;
+        cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */
+      } else {
+        cinfo->restart_in_rows = (int)lval;
+        /* restart_interval will be computed during startup */
+      }
+
+    } else if (keymatch(arg, "sample", 2)) {
+      /* Set sampling factors. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      samplearg = argv[argn];
+      /* Must delay setting sample factors until after we have processed any
+       * colorspace-determining switches, since jpeg_set_colorspace sets
+       * default sampling factors.
+       */
+
+    } else if (keymatch(arg, "scans", 4)) {
+      /* Set scan script. */
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      scansarg = argv[argn];
+      /* We must postpone reading the file in case -progressive appears. */
+#else
+      fprintf(stderr, "%s: sorry, multi-scan output was not compiled in\n",
+              progname);
+      exit(EXIT_FAILURE);
+#endif
+
+    } else if (keymatch(arg, "smooth", 2)) {
+      /* Set input smoothing factor. */
+      int val;
+
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (sscanf(argv[argn], "%d", &val) != 1)
+        usage();
+      if (val < 0 || val > 100)
+        usage();
+      cinfo->smoothing_factor = val;
+
+    } else if (keymatch(arg, "strict", 2)) {
+      strict = TRUE;
+
+    } else if (keymatch(arg, "targa", 1)) {
+      /* Input file is Targa format. */
+      is_targa = TRUE;
+
+    } else {
+      usage();                  /* bogus switch */
+    }
+  }
+
+  /* Post-switch-scanning cleanup */
+
+  if (for_real) {
+
+    /* Set quantization tables for selected quality. */
+    /* Some or all may be overridden if -qtables is present. */
+    if (qualityarg != NULL)     /* process -quality if it was present */
+      if (!set_quality_ratings(cinfo, qualityarg, force_baseline))
+        usage();
+
+    if (qtablefile != NULL)     /* process -qtables if it was present */
+      if (!read_quant_tables(cinfo, qtablefile, force_baseline))
+        usage();
+
+    if (qslotsarg != NULL)      /* process -qslots if it was present */
+      if (!set_quant_slots(cinfo, qslotsarg))
+        usage();
+
+    if (samplearg != NULL)      /* process -sample if it was present */
+      if (!set_sample_factors(cinfo, samplearg))
+        usage();
+
+#ifdef C_PROGRESSIVE_SUPPORTED
+    if (simple_progressive)     /* process -progressive; -scans can override */
+      jpeg_simple_progression(cinfo);
+#endif
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+    if (scansarg != NULL)       /* process -scans if it was present */
+      if (!read_scan_script(cinfo, scansarg))
+        usage();
+#endif
+  }
+
+  return argn;                  /* return index of next arg (file name) */
+}
+
+
+METHODDEF(void)
+my_emit_message(j_common_ptr cinfo, int msg_level)
+{
+  if (msg_level < 0) {
+    /* Treat warning as fatal */
+    cinfo->err->error_exit(cinfo);
+  } else {
+    if (cinfo->err->trace_level >= msg_level)
+      cinfo->err->output_message(cinfo);
+  }
+}
+
+
+/*
+ * The main program.
+ */
+
+int
+main(int argc, char **argv)
+{
+  struct jpeg_compress_struct cinfo;
+#ifdef CJPEG_FUZZER
+  struct my_error_mgr myerr;
+  struct jpeg_error_mgr &jerr = myerr.pub;
+#else
+  struct jpeg_error_mgr jerr;
+#endif
+  struct cdjpeg_progress_mgr progress;
+  int file_index;
+  cjpeg_source_ptr src_mgr;
+  FILE *input_file = NULL;
+  FILE *icc_file;
+  JOCTET *icc_profile = NULL;
+  long icc_len = 0;
+  FILE *output_file = NULL;
+  unsigned char *outbuffer = NULL;
+  unsigned long outsize = 0;
+  JDIMENSION num_scanlines;
+
+  progname = argv[0];
+  if (progname == NULL || progname[0] == 0)
+    progname = "cjpeg";         /* in case C library doesn't provide it */
+
+  /* Initialize the JPEG compression object with default error handling. */
+  cinfo.err = jpeg_std_error(&jerr);
+  jpeg_create_compress(&cinfo);
+  /* Add some application-specific error messages (from cderror.h) */
+  jerr.addon_message_table = cdjpeg_message_table;
+  jerr.first_addon_message = JMSG_FIRSTADDONCODE;
+  jerr.last_addon_message = JMSG_LASTADDONCODE;
+
+  /* Initialize JPEG parameters.
+   * Much of this may be overridden later.
+   * In particular, we don't yet know the input file's color space,
+   * but we need to provide some value for jpeg_set_defaults() to work.
+   */
+
+  cinfo.in_color_space = JCS_RGB; /* arbitrary guess */
+  jpeg_set_defaults(&cinfo);
+
+  /* Scan command line to find file names.
+   * It is convenient to use just one switch-parsing routine, but the switch
+   * values read here are ignored; we will rescan the switches after opening
+   * the input file.
+   */
+
+  file_index = parse_switches(&cinfo, argc, argv, 0, FALSE);
+
+  if (strict)
+    jerr.emit_message = my_emit_message;
+
+#ifdef TWO_FILE_COMMANDLINE
+  if (!memdst) {
+    /* Must have either -outfile switch or explicit output file name */
+    if (outfilename == NULL) {
+      if (file_index != argc - 2) {
+        fprintf(stderr, "%s: must name one input and one output file\n",
+                progname);
+        usage();
+      }
+      outfilename = argv[file_index + 1];
+    } else {
+      if (file_index != argc - 1) {
+        fprintf(stderr, "%s: must name one input and one output file\n",
+                progname);
+        usage();
+      }
+    }
+  }
+#else
+  /* Unix style: expect zero or one file name */
+  if (file_index < argc - 1) {
+    fprintf(stderr, "%s: only one input file\n", progname);
+    usage();
+  }
+#endif /* TWO_FILE_COMMANDLINE */
+
+  /* Open the input file. */
+  if (file_index < argc) {
+    if ((input_file = fopen(argv[file_index], READ_BINARY)) == NULL) {
+      fprintf(stderr, "%s: can't open %s\n", progname, argv[file_index]);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+    /* default input file is stdin */
+    input_file = read_stdin();
+  }
+
+  /* Open the output file. */
+  if (outfilename != NULL) {
+    if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) {
+      fprintf(stderr, "%s: can't open %s\n", progname, outfilename);
+      exit(EXIT_FAILURE);
+    }
+  } else if (!memdst) {
+    /* default output file is stdout */
+    output_file = write_stdout();
+  }
+
+  if (icc_filename != NULL) {
+    if ((icc_file = fopen(icc_filename, READ_BINARY)) == NULL) {
+      fprintf(stderr, "%s: can't open %s\n", progname, icc_filename);
+      exit(EXIT_FAILURE);
+    }
+    if (fseek(icc_file, 0, SEEK_END) < 0 ||
+        (icc_len = ftell(icc_file)) < 1 ||
+        fseek(icc_file, 0, SEEK_SET) < 0) {
+      fprintf(stderr, "%s: can't determine size of %s\n", progname,
+              icc_filename);
+      exit(EXIT_FAILURE);
+    }
+    if ((icc_profile = (JOCTET *)malloc(icc_len)) == NULL) {
+      fprintf(stderr, "%s: can't allocate memory for ICC profile\n", progname);
+      fclose(icc_file);
+      exit(EXIT_FAILURE);
+    }
+    if (fread(icc_profile, icc_len, 1, icc_file) < 1) {
+      fprintf(stderr, "%s: can't read ICC profile from %s\n", progname,
+              icc_filename);
+      free(icc_profile);
+      fclose(icc_file);
+      exit(EXIT_FAILURE);
+    }
+    fclose(icc_file);
+  }
+
+#ifdef CJPEG_FUZZER
+  jerr.error_exit = my_error_exit;
+  jerr.emit_message = my_emit_message_fuzzer;
+  if (setjmp(myerr.setjmp_buffer))
+    HANDLE_ERROR()
+#endif
+
+  if (report) {
+    start_progress_monitor((j_common_ptr)&cinfo, &progress);
+    progress.report = report;
+  }
+
+  /* Figure out the input file format, and set up to read it. */
+  src_mgr = select_file_type(&cinfo, input_file);
+  src_mgr->input_file = input_file;
+#ifdef CJPEG_FUZZER
+  src_mgr->max_pixels = 1048576;
+#endif
+
+  /* Read the input file header to obtain file size & colorspace. */
+  (*src_mgr->start_input) (&cinfo, src_mgr);
+
+  /* Now that we know input colorspace, fix colorspace-dependent defaults */
+  jpeg_default_colorspace(&cinfo);
+
+  /* Adjust default compression parameters by re-parsing the options */
+  file_index = parse_switches(&cinfo, argc, argv, 0, TRUE);
+
+  /* Specify data destination for compression */
+  if (memdst)
+    jpeg_mem_dest(&cinfo, &outbuffer, &outsize);
+  else
+    jpeg_stdio_dest(&cinfo, output_file);
+
+#ifdef CJPEG_FUZZER
+  if (setjmp(myerr.setjmp_buffer))
+    HANDLE_ERROR()
+#endif
+
+  /* Start compressor */
+  jpeg_start_compress(&cinfo, TRUE);
+
+  if (icc_profile != NULL)
+    jpeg_write_icc_profile(&cinfo, icc_profile, (unsigned int)icc_len);
+
+  /* Process data */
+  if (cinfo.data_precision == 16) {
+#ifdef C_LOSSLESS_SUPPORTED
+    while (cinfo.next_scanline < cinfo.image_height) {
+      num_scanlines = (*src_mgr->get_pixel_rows) (&cinfo, src_mgr);
+      (void)jpeg16_write_scanlines(&cinfo, src_mgr->buffer16, num_scanlines);
+    }
+#else
+    ERREXIT1(&cinfo, JERR_BAD_PRECISION, cinfo.data_precision);
+#endif
+  } else if (cinfo.data_precision == 12) {
+    while (cinfo.next_scanline < cinfo.image_height) {
+      num_scanlines = (*src_mgr->get_pixel_rows) (&cinfo, src_mgr);
+      (void)jpeg12_write_scanlines(&cinfo, src_mgr->buffer12, num_scanlines);
+    }
+  } else {
+    while (cinfo.next_scanline < cinfo.image_height) {
+      num_scanlines = (*src_mgr->get_pixel_rows) (&cinfo, src_mgr);
+      (void)jpeg_write_scanlines(&cinfo, src_mgr->buffer, num_scanlines);
+    }
+  }
+
+  /* Finish compression and release memory */
+  (*src_mgr->finish_input) (&cinfo, src_mgr);
+  jpeg_finish_compress(&cinfo);
+  jpeg_destroy_compress(&cinfo);
+
+  /* Close files, if we opened them */
+  if (input_file != stdin)
+    fclose(input_file);
+  if (output_file != stdout && output_file != NULL)
+    fclose(output_file);
+
+  if (report)
+    end_progress_monitor((j_common_ptr)&cinfo);
+
+  if (memdst) {
+#ifndef CJPEG_FUZZER
+    fprintf(stderr, "Compressed size:  %lu bytes\n", outsize);
+#endif
+    free(outbuffer);
+  }
+
+  free(icc_profile);
+
+  /* All done. */
+  return (jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS);
+}
diff --git a/3rdparty/libjpeg-turbo/src/cmyk.h b/3rdparty/libjpeg-turbo/src/cmyk.h
new file mode 100644
index 0000000000..23891249cf
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/cmyk.h
@@ -0,0 +1,61 @@
+/*
+ * cmyk.h
+ *
+ * Copyright (C) 2017-2018, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains convenience functions for performing quick & dirty
+ * CMYK<->RGB conversion.  This algorithm is suitable for testing purposes
+ * only.  Properly converting between CMYK and RGB requires a color management
+ * system.
+ */
+
+#ifndef CMYK_H
+#define CMYK_H
+
+#include <jinclude.h>
+#define JPEG_INTERNALS
+#include <jpeglib.h>
+#include "jsamplecomp.h"
+
+
+/* Fully reversible */
+
+INLINE
+LOCAL(void)
+rgb_to_cmyk(_JSAMPLE r, _JSAMPLE g, _JSAMPLE b,
+            _JSAMPLE *c, _JSAMPLE *m, _JSAMPLE *y, _JSAMPLE *k)
+{
+  double ctmp = 1.0 - ((double)r / (double)_MAXJSAMPLE);
+  double mtmp = 1.0 - ((double)g / (double)_MAXJSAMPLE);
+  double ytmp = 1.0 - ((double)b / (double)_MAXJSAMPLE);
+  double ktmp = MIN(MIN(ctmp, mtmp), ytmp);
+
+  if (ktmp == 1.0) ctmp = mtmp = ytmp = 0.0;
+  else {
+    ctmp = (ctmp - ktmp) / (1.0 - ktmp);
+    mtmp = (mtmp - ktmp) / (1.0 - ktmp);
+    ytmp = (ytmp - ktmp) / (1.0 - ktmp);
+  }
+  *c = (_JSAMPLE)((double)_MAXJSAMPLE - ctmp * (double)_MAXJSAMPLE + 0.5);
+  *m = (_JSAMPLE)((double)_MAXJSAMPLE - mtmp * (double)_MAXJSAMPLE + 0.5);
+  *y = (_JSAMPLE)((double)_MAXJSAMPLE - ytmp * (double)_MAXJSAMPLE + 0.5);
+  *k = (_JSAMPLE)((double)_MAXJSAMPLE - ktmp * (double)_MAXJSAMPLE + 0.5);
+}
+
+
+/* Fully reversible only for C/M/Y/K values generated with rgb_to_cmyk() */
+
+INLINE
+LOCAL(void)
+cmyk_to_rgb(_JSAMPLE c, _JSAMPLE m, _JSAMPLE y, _JSAMPLE k,
+            _JSAMPLE *r, _JSAMPLE *g, _JSAMPLE *b)
+{
+  *r = (_JSAMPLE)((double)c * (double)k / (double)_MAXJSAMPLE + 0.5);
+  *g = (_JSAMPLE)((double)m * (double)k / (double)_MAXJSAMPLE + 0.5);
+  *b = (_JSAMPLE)((double)y * (double)k / (double)_MAXJSAMPLE + 0.5);
+}
+
+
+#endif /* CMYK_H */
diff --git a/3rdparty/libjpeg-turbo/src/djpeg.c b/3rdparty/libjpeg-turbo/src/djpeg.c
new file mode 100644
index 0000000000..1baedddeff
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/djpeg.c
@@ -0,0 +1,932 @@
+/*
+ * djpeg.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2013-2019 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010-2011, 2013-2017, 2019-2020, 2022-2024, D. R. Commander.
+ * Copyright (C) 2015, Google, Inc.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains a command-line user interface for the JPEG decompressor.
+ * It should work on any system with Unix- or MS-DOS-style command lines.
+ *
+ * Two different command line styles are permitted, depending on the
+ * compile-time switch TWO_FILE_COMMANDLINE:
+ *      djpeg [options]  inputfile outputfile
+ *      djpeg [options]  [inputfile]
+ * In the second style, output is always to standard output, which you'd
+ * normally redirect to a file or pipe to some other program.  Input is
+ * either from a named file or from standard input (typically redirected).
+ * The second style is convenient on Unix but is unhelpful on systems that
+ * don't support pipes.  Also, you MUST use the first style if your system
+ * doesn't do binary I/O to stdin/stdout.
+ * To simplify script writing, the "-outfile" switch is provided.  The syntax
+ *      djpeg [options]  -outfile outputfile  inputfile
+ * works regardless of which command line style is used.
+ */
+
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include "jversion.h"           /* for version message */
+#include "jconfigint.h"
+
+#include <ctype.h>              /* to declare isprint() */
+
+
+/* Create the add-on message string table. */
+
+#define JMESSAGE(code, string)  string,
+
+static const char * const cdjpeg_message_table[] = {
+#include "cderror.h"
+  NULL
+};
+
+
+/*
+ * This list defines the known output image formats
+ * (not all of which need be supported by a given version).
+ * You can change the default output format by defining DEFAULT_FMT;
+ * indeed, you had better do so if you undefine PPM_SUPPORTED.
+ */
+
+typedef enum {
+  FMT_BMP,                      /* BMP format (Windows flavor) */
+  FMT_GIF,                      /* GIF format (LZW-compressed) */
+  FMT_GIF0,                     /* GIF format (uncompressed) */
+  FMT_OS2,                      /* BMP format (OS/2 flavor) */
+  FMT_PPM,                      /* PPM/PGM (PBMPLUS formats) */
+  FMT_TARGA,                    /* Targa format */
+  FMT_TIFF                      /* TIFF format */
+} IMAGE_FORMATS;
+
+#ifndef DEFAULT_FMT             /* so can override from CFLAGS in Makefile */
+#define DEFAULT_FMT     FMT_PPM
+#endif
+
+static IMAGE_FORMATS requested_fmt;
+
+
+/*
+ * Argument-parsing code.
+ * The switch parser is designed to be useful with DOS-style command line
+ * syntax, ie, intermixed switches and file names, where only the switches
+ * to the left of a given file name affect processing of that file.
+ * The main program in this file doesn't actually use this capability...
+ */
+
+
+static const char *progname;    /* program name for error messages */
+static char *icc_filename;      /* for -icc switch */
+static JDIMENSION max_scans;    /* for -maxscans switch */
+static char *outfilename;       /* for -outfile switch */
+static boolean memsrc;          /* for -memsrc switch */
+static boolean report;          /* for -report switch */
+static boolean skip, crop;
+static JDIMENSION skip_start, skip_end;
+static JDIMENSION crop_x, crop_y, crop_width, crop_height;
+static boolean strict;          /* for -strict switch */
+#define INPUT_BUF_SIZE  4096
+
+
+LOCAL(void)
+usage(void)
+/* complain about bad command line */
+{
+  fprintf(stderr, "usage: %s [switches] ", progname);
+#ifdef TWO_FILE_COMMANDLINE
+  fprintf(stderr, "inputfile outputfile\n");
+#else
+  fprintf(stderr, "[inputfile]\n");
+#endif
+
+  fprintf(stderr, "Switches (names may be abbreviated):\n");
+  fprintf(stderr, "  -colors N      Reduce image to no more than N colors\n");
+  fprintf(stderr, "  -fast          Fast, low-quality processing\n");
+  fprintf(stderr, "  -grayscale     Force grayscale output\n");
+  fprintf(stderr, "  -rgb           Force RGB output\n");
+  fprintf(stderr, "  -rgb565        Force RGB565 output\n");
+#ifdef IDCT_SCALING_SUPPORTED
+  fprintf(stderr, "  -scale M/N     Scale output image by fraction M/N, eg, 1/8\n");
+#endif
+#ifdef BMP_SUPPORTED
+  fprintf(stderr, "  -bmp           Select BMP output format (Windows style)%s\n",
+          (DEFAULT_FMT == FMT_BMP ? " (default)" : ""));
+#endif
+#ifdef GIF_SUPPORTED
+  fprintf(stderr, "  -gif           Select GIF output format (LZW-compressed)%s\n",
+          (DEFAULT_FMT == FMT_GIF ? " (default)" : ""));
+  fprintf(stderr, "  -gif0          Select GIF output format (uncompressed)%s\n",
+          (DEFAULT_FMT == FMT_GIF0 ? " (default)" : ""));
+#endif
+#ifdef BMP_SUPPORTED
+  fprintf(stderr, "  -os2           Select BMP output format (OS/2 style)%s\n",
+          (DEFAULT_FMT == FMT_OS2 ? " (default)" : ""));
+#endif
+#ifdef PPM_SUPPORTED
+  fprintf(stderr, "  -pnm           Select PBMPLUS (PPM/PGM) output format%s\n",
+          (DEFAULT_FMT == FMT_PPM ? " (default)" : ""));
+#endif
+#ifdef TARGA_SUPPORTED
+  fprintf(stderr, "  -targa         Select Targa output format%s\n",
+          (DEFAULT_FMT == FMT_TARGA ? " (default)" : ""));
+#endif
+  fprintf(stderr, "Switches for advanced users:\n");
+#ifdef DCT_ISLOW_SUPPORTED
+  fprintf(stderr, "  -dct int       Use accurate integer DCT method%s\n",
+          (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+  fprintf(stderr, "  -dct fast      Use less accurate integer DCT method [legacy feature]%s\n",
+          (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  fprintf(stderr, "  -dct float     Use floating-point DCT method [legacy feature]%s\n",
+          (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
+#endif
+  fprintf(stderr, "  -dither fs     Use F-S dithering (default)\n");
+  fprintf(stderr, "  -dither none   Don't use dithering in quantization\n");
+  fprintf(stderr, "  -dither ordered  Use ordered dither (medium speed, quality)\n");
+  fprintf(stderr, "  -icc FILE      Extract ICC profile to FILE\n");
+#ifdef QUANT_2PASS_SUPPORTED
+  fprintf(stderr, "  -map FILE      Map to colors used in named image file\n");
+#endif
+  fprintf(stderr, "  -nosmooth      Don't use high-quality upsampling\n");
+#ifdef QUANT_1PASS_SUPPORTED
+  fprintf(stderr, "  -onepass       Use 1-pass quantization (fast, low quality)\n");
+#endif
+  fprintf(stderr, "  -maxmemory N   Maximum memory to use (in kbytes)\n");
+  fprintf(stderr, "  -maxscans N    Maximum number of scans to allow in input file\n");
+  fprintf(stderr, "  -outfile name  Specify name for output file\n");
+  fprintf(stderr, "  -memsrc        Load input file into memory before decompressing\n");
+  fprintf(stderr, "  -report        Report decompression progress\n");
+  fprintf(stderr, "  -skip Y0,Y1    Decompress all rows except those between Y0 and Y1 (inclusive)\n");
+  fprintf(stderr, "  -crop WxH+X+Y  Decompress only a rectangular subregion of the image\n");
+  fprintf(stderr, "                 [requires PBMPLUS (PPM/PGM), GIF, or Targa output format]\n");
+  fprintf(stderr, "  -strict        Treat all warnings as fatal\n");
+  fprintf(stderr, "  -verbose  or  -debug   Emit debug output\n");
+  fprintf(stderr, "  -version       Print version information and exit\n");
+  exit(EXIT_FAILURE);
+}
+
+
+LOCAL(int)
+parse_switches(j_decompress_ptr cinfo, int argc, char **argv,
+               int last_file_arg_seen, boolean for_real)
+/* Parse optional switches.
+ * Returns argv[] index of first file-name argument (== argc if none).
+ * Any file names with indexes <= last_file_arg_seen are ignored;
+ * they have presumably been processed in a previous iteration.
+ * (Pass 0 for last_file_arg_seen on the first or only iteration.)
+ * for_real is FALSE on the first (dummy) pass; we may skip any expensive
+ * processing.
+ */
+{
+  int argn;
+  char *arg;
+
+  /* Set up default JPEG parameters. */
+  requested_fmt = DEFAULT_FMT;  /* set default output file format */
+  icc_filename = NULL;
+  max_scans = 0;
+  outfilename = NULL;
+  memsrc = FALSE;
+  report = FALSE;
+  skip = FALSE;
+  crop = FALSE;
+  strict = FALSE;
+  cinfo->err->trace_level = 0;
+
+  /* Scan command line options, adjust parameters */
+
+  for (argn = 1; argn < argc; argn++) {
+    arg = argv[argn];
+    if (*arg != '-') {
+      /* Not a switch, must be a file name argument */
+      if (argn <= last_file_arg_seen) {
+        outfilename = NULL;     /* -outfile applies to just one input file */
+        continue;               /* ignore this name if previously processed */
+      }
+      break;                    /* else done parsing switches */
+    }
+    arg++;                      /* advance past switch marker character */
+
+    if (keymatch(arg, "bmp", 1)) {
+      /* BMP output format (Windows flavor). */
+      requested_fmt = FMT_BMP;
+
+    } else if (keymatch(arg, "colors", 1) || keymatch(arg, "colours", 1) ||
+               keymatch(arg, "quantize", 1) || keymatch(arg, "quantise", 1)) {
+      /* Do color quantization. */
+      int val;
+
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (sscanf(argv[argn], "%d", &val) != 1)
+        usage();
+      cinfo->desired_number_of_colors = val;
+      cinfo->quantize_colors = TRUE;
+
+    } else if (keymatch(arg, "dct", 2)) {
+      /* Select IDCT algorithm. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (keymatch(argv[argn], "int", 1)) {
+        cinfo->dct_method = JDCT_ISLOW;
+      } else if (keymatch(argv[argn], "fast", 2)) {
+        cinfo->dct_method = JDCT_IFAST;
+      } else if (keymatch(argv[argn], "float", 2)) {
+        cinfo->dct_method = JDCT_FLOAT;
+      } else
+        usage();
+
+    } else if (keymatch(arg, "dither", 2)) {
+      /* Select dithering algorithm. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (keymatch(argv[argn], "fs", 2)) {
+        cinfo->dither_mode = JDITHER_FS;
+      } else if (keymatch(argv[argn], "none", 2)) {
+        cinfo->dither_mode = JDITHER_NONE;
+      } else if (keymatch(argv[argn], "ordered", 2)) {
+        cinfo->dither_mode = JDITHER_ORDERED;
+      } else
+        usage();
+
+    } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
+      /* Enable debug printouts. */
+      /* On first -d, print version identification */
+      static boolean printed_version = FALSE;
+
+      if (!printed_version) {
+        fprintf(stderr, "%s version %s (build %s)\n",
+                PACKAGE_NAME, VERSION, BUILD);
+        fprintf(stderr, JCOPYRIGHT1);
+        fprintf(stderr, JCOPYRIGHT2 "\n");
+        fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n",
+                JVERSION);
+        printed_version = TRUE;
+      }
+      cinfo->err->trace_level++;
+
+    } else if (keymatch(arg, "version", 4)) {
+      fprintf(stderr, "%s version %s (build %s)\n",
+              PACKAGE_NAME, VERSION, BUILD);
+      exit(EXIT_SUCCESS);
+
+    } else if (keymatch(arg, "fast", 1)) {
+      /* Select recommended processing options for quick-and-dirty output. */
+      cinfo->two_pass_quantize = FALSE;
+      cinfo->dither_mode = JDITHER_ORDERED;
+      if (!cinfo->quantize_colors) /* don't override an earlier -colors */
+        cinfo->desired_number_of_colors = 216;
+      cinfo->dct_method = JDCT_FASTEST;
+      cinfo->do_fancy_upsampling = FALSE;
+
+    } else if (keymatch(arg, "gif", 1)) {
+      /* GIF output format (LZW-compressed). */
+      requested_fmt = FMT_GIF;
+
+    } else if (keymatch(arg, "gif0", 4)) {
+      /* GIF output format (uncompressed). */
+      requested_fmt = FMT_GIF0;
+
+    } else if (keymatch(arg, "grayscale", 2) ||
+               keymatch(arg, "greyscale", 2)) {
+      /* Force monochrome output. */
+      cinfo->out_color_space = JCS_GRAYSCALE;
+
+    } else if (keymatch(arg, "rgb", 2)) {
+      /* Force RGB output. */
+      cinfo->out_color_space = JCS_RGB;
+
+    } else if (keymatch(arg, "rgb565", 2)) {
+      /* Force RGB565 output. */
+      cinfo->out_color_space = JCS_RGB565;
+
+    } else if (keymatch(arg, "icc", 1)) {
+      /* Set ICC filename. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      icc_filename = argv[argn];
+#ifdef SAVE_MARKERS_SUPPORTED
+      jpeg_save_markers(cinfo, JPEG_APP0 + 2, 0xFFFF);
+#endif
+
+    } else if (keymatch(arg, "map", 3)) {
+      /* Quantize to a color map taken from an input file. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (for_real) {           /* too expensive to do twice! */
+#ifdef QUANT_2PASS_SUPPORTED    /* otherwise can't quantize to supplied map */
+        FILE *mapfile;
+
+        if ((mapfile = fopen(argv[argn], READ_BINARY)) == NULL) {
+          fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
+          exit(EXIT_FAILURE);
+        }
+        if (cinfo->data_precision == 12)
+          read_color_map_12(cinfo, mapfile);
+        else
+          read_color_map(cinfo, mapfile);
+        fclose(mapfile);
+        cinfo->quantize_colors = TRUE;
+#else
+        ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+      }
+
+    } else if (keymatch(arg, "maxmemory", 3)) {
+      /* Maximum memory in Kb (or Mb with 'm'). */
+      long lval;
+      char ch = 'x';
+
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
+        usage();
+      if (ch == 'm' || ch == 'M')
+        lval *= 1000L;
+      cinfo->mem->max_memory_to_use = lval * 1000L;
+
+    } else if (keymatch(arg, "maxscans", 4)) {
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (sscanf(argv[argn], "%u", &max_scans) != 1)
+        usage();
+
+    } else if (keymatch(arg, "nosmooth", 3)) {
+      /* Suppress fancy upsampling */
+      cinfo->do_fancy_upsampling = FALSE;
+
+    } else if (keymatch(arg, "onepass", 3)) {
+      /* Use fast one-pass quantization. */
+      cinfo->two_pass_quantize = FALSE;
+
+    } else if (keymatch(arg, "os2", 3)) {
+      /* BMP output format (OS/2 flavor). */
+      requested_fmt = FMT_OS2;
+
+    } else if (keymatch(arg, "outfile", 4)) {
+      /* Set output file name. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      outfilename = argv[argn]; /* save it away for later use */
+
+    } else if (keymatch(arg, "memsrc", 2)) {
+      /* Use in-memory source manager */
+      memsrc = TRUE;
+
+    } else if (keymatch(arg, "pnm", 1) || keymatch(arg, "ppm", 1)) {
+      /* PPM/PGM output format. */
+      requested_fmt = FMT_PPM;
+
+    } else if (keymatch(arg, "report", 2)) {
+      report = TRUE;
+
+    } else if (keymatch(arg, "scale", 2)) {
+      /* Scale the output image by a fraction M/N. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (sscanf(argv[argn], "%u/%u",
+                 &cinfo->scale_num, &cinfo->scale_denom) != 2)
+        usage();
+
+    } else if (keymatch(arg, "skip", 2)) {
+      if (++argn >= argc)
+        usage();
+      if (sscanf(argv[argn], "%u,%u", &skip_start, &skip_end) != 2 ||
+          skip_start > skip_end)
+        usage();
+      skip = TRUE;
+
+    } else if (keymatch(arg, "crop", 2)) {
+      char c;
+      if (++argn >= argc)
+        usage();
+      if (sscanf(argv[argn], "%u%c%u+%u+%u", &crop_width, &c, &crop_height,
+                 &crop_x, &crop_y) != 5 ||
+          (c != 'X' && c != 'x') || crop_width < 1 || crop_height < 1)
+        usage();
+      crop = TRUE;
+
+    } else if (keymatch(arg, "strict", 2)) {
+      strict = TRUE;
+
+    } else if (keymatch(arg, "targa", 1)) {
+      /* Targa output format. */
+      requested_fmt = FMT_TARGA;
+
+    } else {
+      usage();                  /* bogus switch */
+    }
+  }
+
+  return argn;                  /* return index of next arg (file name) */
+}
+
+
+/*
+ * Marker processor for COM and interesting APPn markers.
+ * This replaces the library's built-in processor, which just skips the marker.
+ * We want to print out the marker as text, to the extent possible.
+ * Note this code relies on a non-suspending data source.
+ */
+
+LOCAL(unsigned int)
+jpeg_getc(j_decompress_ptr cinfo)
+/* Read next byte */
+{
+  struct jpeg_source_mgr *datasrc = cinfo->src;
+
+  if (datasrc->bytes_in_buffer == 0) {
+    if (!(*datasrc->fill_input_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  }
+  datasrc->bytes_in_buffer--;
+  return *datasrc->next_input_byte++;
+}
+
+
+METHODDEF(boolean)
+print_text_marker(j_decompress_ptr cinfo)
+{
+  boolean traceit = (cinfo->err->trace_level >= 1);
+  long length;
+  unsigned int ch;
+  unsigned int lastch = 0;
+
+  length = jpeg_getc(cinfo) << 8;
+  length += jpeg_getc(cinfo);
+  length -= 2;                  /* discount the length word itself */
+
+  if (traceit) {
+    if (cinfo->unread_marker == JPEG_COM)
+      fprintf(stderr, "Comment, length %ld:\n", (long)length);
+    else                        /* assume it is an APPn otherwise */
+      fprintf(stderr, "APP%d, length %ld:\n",
+              cinfo->unread_marker - JPEG_APP0, (long)length);
+  }
+
+  while (--length >= 0) {
+    ch = jpeg_getc(cinfo);
+    if (traceit) {
+      /* Emit the character in a readable form.
+       * Nonprintables are converted to \nnn form,
+       * while \ is converted to \\.
+       * Newlines in CR, CR/LF, or LF form will be printed as one newline.
+       */
+      if (ch == '\r') {
+        fprintf(stderr, "\n");
+      } else if (ch == '\n') {
+        if (lastch != '\r')
+          fprintf(stderr, "\n");
+      } else if (ch == '\\') {
+        fprintf(stderr, "\\\\");
+      } else if (isprint(ch)) {
+        putc(ch, stderr);
+      } else {
+        fprintf(stderr, "\\%03o", ch);
+      }
+      lastch = ch;
+    }
+  }
+
+  if (traceit)
+    fprintf(stderr, "\n");
+
+  return TRUE;
+}
+
+
+METHODDEF(void)
+my_emit_message(j_common_ptr cinfo, int msg_level)
+{
+  if (msg_level < 0) {
+    /* Treat warning as fatal */
+    cinfo->err->error_exit(cinfo);
+  } else {
+    if (cinfo->err->trace_level >= msg_level)
+      cinfo->err->output_message(cinfo);
+  }
+}
+
+
+/*
+ * The main program.
+ */
+
+int
+main(int argc, char **argv)
+{
+  struct jpeg_decompress_struct cinfo;
+  struct jpeg_error_mgr jerr;
+  struct cdjpeg_progress_mgr progress;
+  int file_index;
+  djpeg_dest_ptr dest_mgr = NULL;
+  FILE *input_file;
+  FILE *output_file;
+  unsigned char *inbuffer = NULL;
+  unsigned long insize = 0;
+  JDIMENSION num_scanlines;
+
+  progname = argv[0];
+  if (progname == NULL || progname[0] == 0)
+    progname = "djpeg";         /* in case C library doesn't provide it */
+
+  /* Initialize the JPEG decompression object with default error handling. */
+  cinfo.err = jpeg_std_error(&jerr);
+  jpeg_create_decompress(&cinfo);
+  /* Add some application-specific error messages (from cderror.h) */
+  jerr.addon_message_table = cdjpeg_message_table;
+  jerr.first_addon_message = JMSG_FIRSTADDONCODE;
+  jerr.last_addon_message = JMSG_LASTADDONCODE;
+
+  /* Insert custom marker processor for COM and APP12.
+   * APP12 is used by some digital camera makers for textual info,
+   * so we provide the ability to display it as text.
+   * If you like, additional APPn marker types can be selected for display,
+   * but don't try to override APP0 or APP14 this way (see libjpeg.txt).
+   */
+  jpeg_set_marker_processor(&cinfo, JPEG_COM, print_text_marker);
+  jpeg_set_marker_processor(&cinfo, JPEG_APP0 + 12, print_text_marker);
+
+  /* Scan command line to find file names. */
+  /* It is convenient to use just one switch-parsing routine, but the switch
+   * values read here are ignored; we will rescan the switches after opening
+   * the input file.
+   * (Exception: tracing level set here controls verbosity for COM markers
+   * found during jpeg_read_header...)
+   */
+
+  file_index = parse_switches(&cinfo, argc, argv, 0, FALSE);
+
+  if (strict)
+    jerr.emit_message = my_emit_message;
+
+#ifdef TWO_FILE_COMMANDLINE
+  /* Must have either -outfile switch or explicit output file name */
+  if (outfilename == NULL) {
+    if (file_index != argc - 2) {
+      fprintf(stderr, "%s: must name one input and one output file\n",
+              progname);
+      usage();
+    }
+    outfilename = argv[file_index + 1];
+  } else {
+    if (file_index != argc - 1) {
+      fprintf(stderr, "%s: must name one input and one output file\n",
+              progname);
+      usage();
+    }
+  }
+#else
+  /* Unix style: expect zero or one file name */
+  if (file_index < argc - 1) {
+    fprintf(stderr, "%s: only one input file\n", progname);
+    usage();
+  }
+#endif /* TWO_FILE_COMMANDLINE */
+
+  /* Open the input file. */
+  if (file_index < argc) {
+    if ((input_file = fopen(argv[file_index], READ_BINARY)) == NULL) {
+      fprintf(stderr, "%s: can't open %s\n", progname, argv[file_index]);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+    /* default input file is stdin */
+    input_file = read_stdin();
+  }
+
+  /* Open the output file. */
+  if (outfilename != NULL) {
+    if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) {
+      fprintf(stderr, "%s: can't open %s\n", progname, outfilename);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+    /* default output file is stdout */
+    output_file = write_stdout();
+  }
+
+  if (report || max_scans != 0) {
+    start_progress_monitor((j_common_ptr)&cinfo, &progress);
+    progress.report = report;
+    progress.max_scans = max_scans;
+  }
+
+  /* Specify data source for decompression */
+  if (memsrc) {
+    size_t nbytes;
+    do {
+      inbuffer = (unsigned char *)realloc(inbuffer, insize + INPUT_BUF_SIZE);
+      if (inbuffer == NULL) {
+        fprintf(stderr, "%s: memory allocation failure\n", progname);
+        exit(EXIT_FAILURE);
+      }
+      nbytes = fread(&inbuffer[insize], 1, INPUT_BUF_SIZE, input_file);
+      if (nbytes < INPUT_BUF_SIZE && ferror(input_file)) {
+        if (file_index < argc)
+          fprintf(stderr, "%s: can't read from %s\n", progname,
+                  argv[file_index]);
+        else
+          fprintf(stderr, "%s: can't read from stdin\n", progname);
+      }
+      insize += (unsigned long)nbytes;
+    } while (nbytes == INPUT_BUF_SIZE);
+    fprintf(stderr, "Compressed size:  %lu bytes\n", insize);
+    jpeg_mem_src(&cinfo, inbuffer, insize);
+  } else
+    jpeg_stdio_src(&cinfo, input_file);
+
+  /* Read file header, set default decompression parameters */
+  (void)jpeg_read_header(&cinfo, TRUE);
+
+  /* Adjust default decompression parameters by re-parsing the options */
+  file_index = parse_switches(&cinfo, argc, argv, 0, TRUE);
+
+  /* Initialize the output module now to let it override any crucial
+   * option settings (for instance, GIF wants to force color quantization).
+   */
+  switch (requested_fmt) {
+#ifdef BMP_SUPPORTED
+  case FMT_BMP:
+    dest_mgr = jinit_write_bmp(&cinfo, FALSE, TRUE);
+    break;
+  case FMT_OS2:
+    dest_mgr = jinit_write_bmp(&cinfo, TRUE, TRUE);
+    break;
+#endif
+#ifdef GIF_SUPPORTED
+  case FMT_GIF:
+    if (cinfo.data_precision == 16)
+      ERREXIT1(&cinfo, JERR_BAD_PRECISION, cinfo.data_precision);
+    else if (cinfo.data_precision == 12)
+      dest_mgr = j12init_write_gif(&cinfo, TRUE);
+    else
+      dest_mgr = jinit_write_gif(&cinfo, TRUE);
+    break;
+  case FMT_GIF0:
+    dest_mgr = jinit_write_gif(&cinfo, FALSE);
+    break;
+#endif
+#ifdef PPM_SUPPORTED
+  case FMT_PPM:
+    if (cinfo.data_precision == 16)
+#ifdef D_LOSSLESS_SUPPORTED
+      dest_mgr = j16init_write_ppm(&cinfo);
+#else
+      ERREXIT1(&cinfo, JERR_BAD_PRECISION, cinfo.data_precision);
+#endif
+    else if (cinfo.data_precision == 12)
+      dest_mgr = j12init_write_ppm(&cinfo);
+    else
+      dest_mgr = jinit_write_ppm(&cinfo);
+    break;
+#endif
+#ifdef TARGA_SUPPORTED
+  case FMT_TARGA:
+    dest_mgr = jinit_write_targa(&cinfo);
+    break;
+#endif
+  default:
+    ERREXIT(&cinfo, JERR_UNSUPPORTED_FORMAT);
+    break;
+  }
+  dest_mgr->output_file = output_file;
+
+  /* Start decompressor */
+  (void)jpeg_start_decompress(&cinfo);
+
+  /* Skip rows */
+  if (skip) {
+    JDIMENSION tmp;
+
+    /* Check for valid skip_end.  We cannot check this value until after
+     * jpeg_start_decompress() is called.  Note that we have already verified
+     * that skip_start <= skip_end.
+     */
+    if (skip_end > cinfo.output_height - 1) {
+      fprintf(stderr, "%s: skip region exceeds image height %u\n", progname,
+              cinfo.output_height);
+      exit(EXIT_FAILURE);
+    }
+
+    /* Write output file header.  This is a hack to ensure that the destination
+     * manager creates an output image of the proper size.
+     */
+    tmp = cinfo.output_height;
+    cinfo.output_height -= (skip_end - skip_start + 1);
+    (*dest_mgr->start_output) (&cinfo, dest_mgr);
+    cinfo.output_height = tmp;
+
+    if (cinfo.data_precision == 16)
+      ERREXIT(&cinfo, JERR_NOTIMPL);
+    else if (cinfo.data_precision == 12) {
+      /* Process data */
+      while (cinfo.output_scanline < skip_start) {
+        num_scanlines = jpeg12_read_scanlines(&cinfo, dest_mgr->buffer12,
+                                              dest_mgr->buffer_height);
+        (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
+      }
+      if ((tmp = jpeg12_skip_scanlines(&cinfo, skip_end - skip_start + 1)) !=
+          skip_end - skip_start + 1) {
+        fprintf(stderr, "%s: jpeg12_skip_scanlines() returned %u rather than %u\n",
+                progname, tmp, skip_end - skip_start + 1);
+        exit(EXIT_FAILURE);
+      }
+      while (cinfo.output_scanline < cinfo.output_height) {
+        num_scanlines = jpeg12_read_scanlines(&cinfo, dest_mgr->buffer12,
+                                              dest_mgr->buffer_height);
+        (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
+      }
+    } else {
+      /* Process data */
+      while (cinfo.output_scanline < skip_start) {
+        num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
+                                            dest_mgr->buffer_height);
+        (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
+      }
+      if ((tmp = jpeg_skip_scanlines(&cinfo, skip_end - skip_start + 1)) !=
+          skip_end - skip_start + 1) {
+        fprintf(stderr, "%s: jpeg_skip_scanlines() returned %u rather than %u\n",
+                progname, tmp, skip_end - skip_start + 1);
+        exit(EXIT_FAILURE);
+      }
+      while (cinfo.output_scanline < cinfo.output_height) {
+        num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
+                                            dest_mgr->buffer_height);
+        (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
+      }
+    }
+
+  /* Decompress a subregion */
+  } else if (crop) {
+    JDIMENSION tmp;
+
+    /* Check for valid crop dimensions.  We cannot check these values until
+     * after jpeg_start_decompress() is called.
+     */
+    if (crop_x + crop_width > cinfo.output_width ||
+        crop_y + crop_height > cinfo.output_height) {
+      fprintf(stderr, "%s: crop dimensions exceed image dimensions %u x %u\n",
+              progname, cinfo.output_width, cinfo.output_height);
+      exit(EXIT_FAILURE);
+    }
+
+    if (cinfo.data_precision == 16)
+      ERREXIT(&cinfo, JERR_NOTIMPL);
+    else if (cinfo.data_precision == 12)
+      jpeg12_crop_scanline(&cinfo, &crop_x, &crop_width);
+    else
+      jpeg_crop_scanline(&cinfo, &crop_x, &crop_width);
+    if (dest_mgr->calc_buffer_dimensions)
+      (*dest_mgr->calc_buffer_dimensions) (&cinfo, dest_mgr);
+    else
+      ERREXIT(&cinfo, JERR_UNSUPPORTED_FORMAT);
+
+    /* Write output file header.  This is a hack to ensure that the destination
+     * manager creates an output image of the proper size.
+     */
+    tmp = cinfo.output_height;
+    cinfo.output_height = crop_height;
+    (*dest_mgr->start_output) (&cinfo, dest_mgr);
+    cinfo.output_height = tmp;
+
+    if (cinfo.data_precision == 16)
+      ERREXIT(&cinfo, JERR_NOTIMPL);
+    else if (cinfo.data_precision == 12) {
+      /* Process data */
+      if ((tmp = jpeg12_skip_scanlines(&cinfo, crop_y)) != crop_y) {
+        fprintf(stderr, "%s: jpeg12_skip_scanlines() returned %u rather than %u\n",
+                progname, tmp, crop_y);
+        exit(EXIT_FAILURE);
+      }
+      while (cinfo.output_scanline < crop_y + crop_height) {
+        num_scanlines = jpeg12_read_scanlines(&cinfo, dest_mgr->buffer12,
+                                              dest_mgr->buffer_height);
+        (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
+      }
+      if ((tmp =
+           jpeg12_skip_scanlines(&cinfo, cinfo.output_height - crop_y -
+                                         crop_height)) !=
+          cinfo.output_height - crop_y - crop_height) {
+        fprintf(stderr, "%s: jpeg12_skip_scanlines() returned %u rather than %u\n",
+                progname, tmp, cinfo.output_height - crop_y - crop_height);
+        exit(EXIT_FAILURE);
+      }
+    } else {
+      /* Process data */
+      if ((tmp = jpeg_skip_scanlines(&cinfo, crop_y)) != crop_y) {
+        fprintf(stderr, "%s: jpeg_skip_scanlines() returned %u rather than %u\n",
+                progname, tmp, crop_y);
+        exit(EXIT_FAILURE);
+      }
+      while (cinfo.output_scanline < crop_y + crop_height) {
+        num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
+                                            dest_mgr->buffer_height);
+        (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
+      }
+      if ((tmp =
+           jpeg_skip_scanlines(&cinfo,
+                               cinfo.output_height - crop_y - crop_height)) !=
+          cinfo.output_height - crop_y - crop_height) {
+        fprintf(stderr, "%s: jpeg_skip_scanlines() returned %u rather than %u\n",
+                progname, tmp, cinfo.output_height - crop_y - crop_height);
+        exit(EXIT_FAILURE);
+      }
+    }
+
+  /* Normal full-image decompress */
+  } else {
+    /* Write output file header */
+    (*dest_mgr->start_output) (&cinfo, dest_mgr);
+
+    if (cinfo.data_precision == 16) {
+#ifdef D_LOSSLESS_SUPPORTED
+      /* Process data */
+      while (cinfo.output_scanline < cinfo.output_height) {
+        num_scanlines = jpeg16_read_scanlines(&cinfo, dest_mgr->buffer16,
+                                              dest_mgr->buffer_height);
+        (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
+      }
+#else
+      ERREXIT1(&cinfo, JERR_BAD_PRECISION, cinfo.data_precision);
+#endif
+    } else if (cinfo.data_precision == 12) {
+      /* Process data */
+      while (cinfo.output_scanline < cinfo.output_height) {
+        num_scanlines = jpeg12_read_scanlines(&cinfo, dest_mgr->buffer12,
+                                              dest_mgr->buffer_height);
+        (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
+      }
+    } else {
+      /* Process data */
+      while (cinfo.output_scanline < cinfo.output_height) {
+        num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
+                                            dest_mgr->buffer_height);
+        (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
+      }
+    }
+  }
+
+  /* Hack: count final pass as done in case finish_output does an extra pass.
+   * The library won't have updated completed_passes.
+   */
+  if (report || max_scans != 0)
+    progress.pub.completed_passes = progress.pub.total_passes;
+
+  if (icc_filename != NULL) {
+    FILE *icc_file;
+    JOCTET *icc_profile;
+    unsigned int icc_len;
+
+    if ((icc_file = fopen(icc_filename, WRITE_BINARY)) == NULL) {
+      fprintf(stderr, "%s: can't open %s\n", progname, icc_filename);
+      exit(EXIT_FAILURE);
+    }
+    if (jpeg_read_icc_profile(&cinfo, &icc_profile, &icc_len)) {
+      if (fwrite(icc_profile, icc_len, 1, icc_file) < 1) {
+        fprintf(stderr, "%s: can't read ICC profile from %s\n", progname,
+                icc_filename);
+        free(icc_profile);
+        fclose(icc_file);
+        exit(EXIT_FAILURE);
+      }
+      free(icc_profile);
+      fclose(icc_file);
+    } else if (cinfo.err->msg_code != JWRN_BOGUS_ICC)
+      fprintf(stderr, "%s: no ICC profile data in JPEG file\n", progname);
+  }
+
+  /* Finish decompression and release memory.
+   * I must do it in this order because output module has allocated memory
+   * of lifespan JPOOL_IMAGE; it needs to finish before releasing memory.
+   */
+  (*dest_mgr->finish_output) (&cinfo, dest_mgr);
+  (void)jpeg_finish_decompress(&cinfo);
+  jpeg_destroy_decompress(&cinfo);
+
+  /* Close files, if we opened them */
+  if (input_file != stdin)
+    fclose(input_file);
+  if (output_file != stdout)
+    fclose(output_file);
+
+  if (report || max_scans != 0)
+    end_progress_monitor((j_common_ptr)&cinfo);
+
+  if (memsrc)
+    free(inbuffer);
+
+  /* All done. */
+  exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS);
+  return 0;                     /* suppress no-return-value warnings */
+}
diff --git a/3rdparty/libjpeg-turbo/src/example.c b/3rdparty/libjpeg-turbo/src/example.c
new file mode 100644
index 0000000000..78b658a0ab
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/example.c
@@ -0,0 +1,643 @@
+/*
+ * example.c
+ *
+ * This file was part of the Independent JPEG Group's software.
+ * Copyright (C) 1992-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2017, 2019, 2022-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file illustrates how to use the IJG code as a subroutine library
+ * to read or write JPEG image files with 8-bit or 12-bit data precision.  You
+ * should look at this code in conjunction with the documentation file
+ * libjpeg.txt.
+ *
+ * We present these routines in the same coding style used in the JPEG code
+ * (ANSI function definitions, etc); but you are of course free to code your
+ * routines in a different style if you prefer.
+ */
+
+/* First-time users of libjpeg-turbo might be better served by looking at
+ * tjexample.c, which uses the more straightforward TurboJPEG API.  Note that
+ * this example, like cjpeg and djpeg, interleaves disk I/O with JPEG
+ * compression/decompression, so it is not suitable for benchmarking purposes.
+ */
+
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _WIN32
+#define strcasecmp  stricmp
+#define strncasecmp  strnicmp
+#endif
+
+/*
+ * Include file for users of JPEG library.
+ * You will need to have included system headers that define at least
+ * the typedefs FILE and size_t before you can include jpeglib.h.
+ * (stdio.h is sufficient on ANSI-conforming systems.)
+ * You may also wish to include "jerror.h".
+ */
+
+#include "jpeglib.h"
+#include "jerror.h"
+
+/*
+ * <setjmp.h> is used for the optional error recovery mechanism shown in
+ * the second part of the example.
+ */
+
+#include <setjmp.h>
+
+
+
+/******************** JPEG COMPRESSION SAMPLE INTERFACE *******************/
+
+/* This half of the example shows how to feed data into the JPEG compressor.
+ * We present a minimal version that does not worry about refinements such
+ * as error recovery (the JPEG code will just exit() if it gets an error).
+ */
+
+
+/*
+ * IMAGE DATA FORMATS:
+ *
+ * The standard input image format is a rectangular array of pixels, with
+ * each pixel having the same number of "component" values (color channels).
+ * Each pixel row is an array of JSAMPLEs (which typically are unsigned chars)
+ * or J12SAMPLEs (which typically are shorts).  If you are working with color
+ * data, then the color values for each pixel must be adjacent in the row; for
+ * example, R,G,B,R,G,B,R,G,B,... for 24-bit RGB color.
+ *
+ * For this example, we'll assume that this data structure matches the way
+ * our application has stored the image in memory, so we can just pass a
+ * pointer to our image buffer.  In particular, let's say that the image is
+ * RGB color and is described by:
+ */
+
+#define WIDTH  640              /* Number of columns in image */
+#define HEIGHT  480             /* Number of rows in image */
+
+
+/*
+ * Sample routine for JPEG compression.  We assume that the target file name,
+ * a compression quality factor, and a data precision are passed in.
+ */
+
+METHODDEF(void)
+write_JPEG_file(char *filename, int quality, int data_precision)
+{
+  /* This struct contains the JPEG compression parameters and pointers to
+   * working space (which is allocated as needed by the JPEG library).
+   * It is possible to have several such structures, representing multiple
+   * compression/decompression processes, in existence at once.  We refer
+   * to any one struct (and its associated working data) as a "JPEG object".
+   */
+  struct jpeg_compress_struct cinfo;
+  /* This struct represents a JPEG error handler.  It is declared separately
+   * because applications often want to supply a specialized error handler
+   * (see the second half of this file for an example).  But here we just
+   * take the easy way out and use the standard error handler, which will
+   * print a message on stderr and call exit() if compression fails.
+   * Note that this struct must live as long as the main JPEG parameter
+   * struct, to avoid dangling-pointer problems.
+   */
+  struct jpeg_error_mgr jerr;
+  /* More stuff */
+  FILE *outfile;                /* target file */
+  JSAMPARRAY image_buffer = NULL;
+                                /* Points to large array of R,G,B-order data */
+  JSAMPROW row_pointer[1];      /* pointer to JSAMPLE row[s] */
+  J12SAMPARRAY image_buffer12 = NULL;
+                                /* Points to large array of R,G,B-order 12-bit
+                                   data */
+  J12SAMPROW row_pointer12[1];  /* pointer to J12SAMPLE row[s] */
+  int row_stride;               /* physical row width in image buffer */
+  int row, col;
+
+  /* Step 1: allocate and initialize JPEG compression object */
+
+  /* We have to set up the error handler first, in case the initialization
+   * step fails.  (Unlikely, but it could happen if you are out of memory.)
+   * This routine fills in the contents of struct jerr, and returns jerr's
+   * address which we place into the link field in cinfo.
+   */
+  cinfo.err = jpeg_std_error(&jerr);
+  /* Now we can initialize the JPEG compression object. */
+  jpeg_create_compress(&cinfo);
+
+  /* Step 2: specify data destination (eg, a file) */
+  /* Note: steps 2 and 3 can be done in either order. */
+
+  /* Here we use the library-supplied code to send compressed data to a
+   * stdio stream.  You can also write your own code to do something else.
+   * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
+   * requires it in order to write binary files.
+   */
+  if ((outfile = fopen(filename, "wb")) == NULL)
+    ERREXIT(&cinfo, JERR_FILE_WRITE);
+  jpeg_stdio_dest(&cinfo, outfile);
+
+  /* Step 3: set parameters for compression */
+
+  /* First we supply a description of the input image.
+   * Four fields of the cinfo struct must be filled in:
+   */
+  cinfo.image_width = WIDTH;            /* image width and height, in pixels */
+  cinfo.image_height = HEIGHT;
+  cinfo.input_components = 3;           /* # of color components per pixel */
+  cinfo.in_color_space = JCS_RGB;       /* colorspace of input image */
+  cinfo.data_precision = data_precision; /* data precision of input image */
+  /* Now use the library's routine to set default compression parameters.
+   * (You must set at least cinfo.in_color_space before calling this,
+   * since the defaults depend on the source color space.)
+   */
+  jpeg_set_defaults(&cinfo);
+  /* Now you can set any non-default parameters you wish to.
+   * Here we just illustrate the use of quality (quantization table) scaling:
+   */
+  jpeg_set_quality(&cinfo, quality, TRUE /* limit to baseline-JPEG values */);
+  /* Use 4:4:4 subsampling (default is 4:2:0) */
+  cinfo.comp_info[0].h_samp_factor = cinfo.comp_info[0].v_samp_factor = 1;
+
+  /* Step 4: Start compressor */
+
+  /* TRUE ensures that we will write a complete interchange-JPEG file.
+   * Pass TRUE unless you are very sure of what you're doing.
+   */
+  jpeg_start_compress(&cinfo, TRUE);
+
+  /* Step 5: allocate and initialize image buffer */
+
+  row_stride = WIDTH * 3;       /* J[12]SAMPLEs per row in image_buffer */
+  /* Make a sample array that will go away when done with image.  Note that,
+   * for the purposes of this example, we could also create a one-row-high
+   * sample array and initialize it for each successive scanline written in the
+   * scanline loop below.
+   */
+  if (cinfo.data_precision == 12) {
+    image_buffer12 = (J12SAMPARRAY)(*cinfo.mem->alloc_sarray)
+      ((j_common_ptr)&cinfo, JPOOL_IMAGE, row_stride, HEIGHT);
+
+    /* Initialize image buffer with a repeating pattern */
+    for (row = 0; row < HEIGHT; row++) {
+      for (col = 0; col < WIDTH; col++) {
+        image_buffer12[row][col * 3] =
+          (col * (MAXJ12SAMPLE + 1) / WIDTH) % (MAXJ12SAMPLE + 1);
+        image_buffer12[row][col * 3 + 1] =
+          (row * (MAXJ12SAMPLE + 1) / HEIGHT) % (MAXJ12SAMPLE + 1);
+        image_buffer12[row][col * 3 + 2] =
+          (row * (MAXJ12SAMPLE + 1) / HEIGHT +
+           col * (MAXJ12SAMPLE + 1) / WIDTH) % (MAXJ12SAMPLE + 1);
+      }
+    }
+  } else {
+    image_buffer = (*cinfo.mem->alloc_sarray)
+      ((j_common_ptr)&cinfo, JPOOL_IMAGE, row_stride, HEIGHT);
+
+    for (row = 0; row < HEIGHT; row++) {
+      for (col = 0; col < WIDTH; col++) {
+        image_buffer[row][col * 3] =
+          (col * (MAXJSAMPLE + 1) / WIDTH) % (MAXJSAMPLE + 1);
+        image_buffer[row][col * 3 + 1] =
+          (row * (MAXJSAMPLE + 1) / HEIGHT) % (MAXJSAMPLE + 1);
+        image_buffer[row][col * 3 + 2] =
+          (row * (MAXJSAMPLE + 1) / HEIGHT + col * (MAXJSAMPLE + 1) / WIDTH) %
+          (MAXJSAMPLE + 1);
+      }
+    }
+  }
+
+  /* Step 6: while (scan lines remain to be written) */
+  /*           jpeg_write_scanlines(...); */
+
+  /* Here we use the library's state variable cinfo.next_scanline as the
+   * loop counter, so that we don't have to keep track ourselves.
+   * To keep things simple, we pass one scanline per call; you can pass
+   * more if you wish, though.
+   */
+  if (cinfo.data_precision == 12) {
+    while (cinfo.next_scanline < cinfo.image_height) {
+      /* jpeg12_write_scanlines expects an array of pointers to scanlines.
+       * Here the array is only one element long, but you could pass
+       * more than one scanline at a time if that's more convenient.
+       */
+      row_pointer12[0] = image_buffer12[cinfo.next_scanline];
+      (void)jpeg12_write_scanlines(&cinfo, row_pointer12, 1);
+    }
+  } else {
+    while (cinfo.next_scanline < cinfo.image_height) {
+      /* jpeg_write_scanlines expects an array of pointers to scanlines.
+       * Here the array is only one element long, but you could pass
+       * more than one scanline at a time if that's more convenient.
+       */
+      row_pointer[0] = image_buffer[cinfo.next_scanline];
+      (void)jpeg_write_scanlines(&cinfo, row_pointer, 1);
+    }
+  }
+
+  /* Step 7: Finish compression */
+
+  jpeg_finish_compress(&cinfo);
+  /* After finish_compress, we can close the output file. */
+  fclose(outfile);
+
+  /* Step 8: release JPEG compression object */
+
+  /* This is an important step since it will release a good deal of memory. */
+  jpeg_destroy_compress(&cinfo);
+
+  /* And we're done! */
+}
+
+
+/*
+ * SOME FINE POINTS:
+ *
+ * In the above loop, we ignored the return value of jpeg_write_scanlines,
+ * which is the number of scanlines actually written.  We could get away
+ * with this because we were only relying on the value of cinfo.next_scanline,
+ * which will be incremented correctly.  If you maintain additional loop
+ * variables then you should be careful to increment them properly.
+ * Actually, for output to a stdio stream you needn't worry, because
+ * then jpeg_write_scanlines will write all the lines passed (or else exit
+ * with a fatal error).  Partial writes can only occur if you use a data
+ * destination module that can demand suspension of the compressor.
+ * (If you don't know what that's for, you don't need it.)
+ *
+ * Scanlines MUST be supplied in top-to-bottom order if you want your JPEG
+ * files to be compatible with everyone else's.  If you cannot readily read
+ * your data in that order, you'll need an intermediate array to hold the
+ * image.  See rdtarga.c or rdbmp.c for examples of handling bottom-to-top
+ * source data using the JPEG code's internal virtual-array mechanisms.
+ */
+
+
+
+/******************** JPEG DECOMPRESSION SAMPLE INTERFACE *******************/
+
+/* This half of the example shows how to read data from the JPEG decompressor.
+ * It's a bit more refined than the above, in that we show:
+ *   (a) how to modify the JPEG library's standard error-reporting behavior;
+ *   (b) how to allocate workspace using the library's memory manager.
+ *
+ * Just to make this example a little different from the first one, we'll
+ * assume that we do not intend to put the whole image into an in-memory
+ * buffer, but to send it line-by-line someplace else.  We need a one-
+ * scanline-high JSAMPLE or J12SAMPLE array as a work buffer, and we will let
+ * the JPEG memory manager allocate it for us.  This approach is actually quite
+ * useful because we don't need to remember to deallocate the buffer
+ * separately: it will go away automatically when the JPEG object is cleaned
+ * up.
+ */
+
+
+/*
+ * ERROR HANDLING:
+ *
+ * The JPEG library's standard error handler (jerror.c) is divided into
+ * several "methods" which you can override individually.  This lets you
+ * adjust the behavior without duplicating a lot of code, which you might
+ * have to update with each future release.
+ *
+ * Our example here shows how to override the "error_exit" method so that
+ * control is returned to the library's caller when a fatal error occurs,
+ * rather than calling exit() as the standard error_exit method does.
+ *
+ * We use C's setjmp/longjmp facility to return control.  This means that the
+ * routine which calls the JPEG library must first execute a setjmp() call to
+ * establish the return point.  We want the replacement error_exit to do a
+ * longjmp().  But we need to make the setjmp buffer accessible to the
+ * error_exit routine.  To do this, we make a private extension of the
+ * standard JPEG error handler object.  (If we were using C++, we'd say we
+ * were making a subclass of the regular error handler.)
+ *
+ * Here's the extended error handler struct:
+ */
+
+struct my_error_mgr {
+  struct jpeg_error_mgr pub;    /* "public" fields */
+
+  jmp_buf setjmp_buffer;        /* for return to caller */
+};
+
+typedef struct my_error_mgr *my_error_ptr;
+
+/*
+ * Here's the routine that will replace the standard error_exit method:
+ */
+
+METHODDEF(void)
+my_error_exit(j_common_ptr cinfo)
+{
+  /* cinfo->err really points to a my_error_mgr struct, so coerce pointer */
+  my_error_ptr myerr = (my_error_ptr)cinfo->err;
+
+  /* Always display the message. */
+  /* We could postpone this until after returning, if we chose. */
+  (*cinfo->err->output_message) (cinfo);
+
+  /* Return control to the setjmp point */
+  longjmp(myerr->setjmp_buffer, 1);
+}
+
+
+METHODDEF(int) do_read_JPEG_file(struct jpeg_decompress_struct *cinfo,
+                                 char *infilename, char *outfilename);
+
+/*
+ * Sample routine for JPEG decompression.  We assume that the source file name
+ * is passed in.  We want to return 1 on success, 0 on error.
+ */
+
+METHODDEF(int)
+read_JPEG_file(char *infilename, char *outfilename)
+{
+  /* This struct contains the JPEG decompression parameters and pointers to
+   * working space (which is allocated as needed by the JPEG library).
+   */
+  struct jpeg_decompress_struct cinfo;
+
+  return do_read_JPEG_file(&cinfo, infilename, outfilename);
+}
+
+/*
+ * We call the libjpeg API from within a separate function, because modifying
+ * the local non-volatile jpeg_decompress_struct instance below the setjmp()
+ * return point and then accessing the instance after setjmp() returns would
+ * result in undefined behavior that may potentially overwrite all or part of
+ * the structure.
+ */
+
+METHODDEF(int)
+do_read_JPEG_file(struct jpeg_decompress_struct *cinfo, char *infilename,
+                  char *outfilename)
+{
+  /* We use our private extension JPEG error handler.
+   * Note that this struct must live as long as the main JPEG parameter
+   * struct, to avoid dangling-pointer problems.
+   */
+  struct my_error_mgr jerr;
+  /* More stuff */
+  FILE *infile;                 /* source file */
+  FILE *outfile;                /* output file */
+  JSAMPARRAY buffer = NULL;     /* Output row buffer */
+  J12SAMPARRAY buffer12 = NULL; /* 12-bit output row buffer */
+  int col;
+  int row_stride;               /* physical row width in output buffer */
+  int little_endian = 1;
+
+  /* In this example we want to open the input and output files before doing
+   * anything else, so that the setjmp() error recovery below can assume the
+   * files are open.
+   *
+   * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
+   * requires it in order to read/write binary files.
+   */
+
+  if ((infile = fopen(infilename, "rb")) == NULL) {
+    fprintf(stderr, "can't open %s\n", infilename);
+    return 0;
+  }
+  if ((outfile = fopen(outfilename, "wb")) == NULL) {
+    fprintf(stderr, "can't open %s\n", outfilename);
+    fclose(infile);
+    return 0;
+  }
+
+  /* Step 1: allocate and initialize JPEG decompression object */
+
+  /* We set up the normal JPEG error routines, then override error_exit. */
+  cinfo->err = jpeg_std_error(&jerr.pub);
+  jerr.pub.error_exit = my_error_exit;
+  /* Establish the setjmp return context for my_error_exit to use. */
+  if (setjmp(jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error.
+     * We need to clean up the JPEG object, close the input file, and return.
+     */
+    jpeg_destroy_decompress(cinfo);
+    fclose(infile);
+    fclose(outfile);
+    return 0;
+  }
+  /* Now we can initialize the JPEG decompression object. */
+  jpeg_create_decompress(cinfo);
+
+  /* Step 2: specify data source (eg, a file) */
+
+  jpeg_stdio_src(cinfo, infile);
+
+  /* Step 3: read file parameters with jpeg_read_header() */
+
+  (void)jpeg_read_header(cinfo, TRUE);
+  /* We can ignore the return value from jpeg_read_header since
+   *   (a) suspension is not possible with the stdio data source, and
+   *   (b) we passed TRUE to reject a tables-only JPEG file as an error.
+   * See libjpeg.txt for more info.
+   */
+
+  /* emit header for raw PPM format */
+  fprintf(outfile, "P6\n%d %d\n%d\n", WIDTH, HEIGHT,
+          cinfo->data_precision == 12 ? MAXJ12SAMPLE : MAXJSAMPLE);
+
+  /* Step 4: set parameters for decompression */
+
+  /* In this example, we don't need to change any of the defaults set by
+   * jpeg_read_header(), so we do nothing here.
+   */
+
+  /* Step 5: Start decompressor */
+
+  (void)jpeg_start_decompress(cinfo);
+  /* We can ignore the return value since suspension is not possible
+   * with the stdio data source.
+   */
+
+  /* We may need to do some setup of our own at this point before reading
+   * the data.  After jpeg_start_decompress() we have the correct scaled
+   * output image dimensions available, as well as the output colormap
+   * if we asked for color quantization.
+   * In this example, we need to make an output work buffer of the right size.
+   */
+  /* Samples per row in output buffer */
+  row_stride = cinfo->output_width * cinfo->output_components;
+  /* Make a one-row-high sample array that will go away when done with image */
+  if (cinfo->data_precision == 12)
+    buffer12 = (J12SAMPARRAY)(*cinfo->mem->alloc_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE, row_stride, 1);
+  else
+    buffer = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE, row_stride, 1);
+
+  /* Step 6: while (scan lines remain to be read) */
+  /*           jpeg_read_scanlines(...); */
+
+  /* Here we use the library's state variable cinfo->output_scanline as the
+   * loop counter, so that we don't have to keep track ourselves.
+   */
+  if (cinfo->data_precision == 12) {
+    while (cinfo->output_scanline < cinfo->output_height) {
+      /* jpeg12_read_scanlines expects an array of pointers to scanlines.
+       * Here the array is only one element long, but you could ask for
+       * more than one scanline at a time if that's more convenient.
+       */
+      (void)jpeg12_read_scanlines(cinfo, buffer12, 1);
+      if (*(char *)&little_endian == 1) {
+        /* Swap MSB and LSB in each sample */
+        for (col = 0; col < row_stride; col++)
+          buffer12[0][col] = ((buffer12[0][col] & 0xFF) << 8) |
+                             ((buffer12[0][col] >> 8) & 0xFF);
+      }
+      fwrite(buffer12[0], 1, row_stride * sizeof(J12SAMPLE), outfile);
+    }
+  } else {
+    while (cinfo->output_scanline < cinfo->output_height) {
+      /* jpeg_read_scanlines expects an array of pointers to scanlines.
+       * Here the array is only one element long, but you could ask for
+       * more than one scanline at a time if that's more convenient.
+       */
+      (void)jpeg_read_scanlines(cinfo, buffer, 1);
+      fwrite(buffer[0], 1, row_stride, outfile);
+    }
+  }
+
+  /* Step 7: Finish decompression */
+
+  (void)jpeg_finish_decompress(cinfo);
+  /* We can ignore the return value since suspension is not possible
+   * with the stdio data source.
+   */
+
+  /* Step 8: Release JPEG decompression object */
+
+  /* This is an important step since it will release a good deal of memory. */
+  jpeg_destroy_decompress(cinfo);
+
+  /* After finish_decompress, we can close the input and output files.
+   * Here we postpone it until after no more JPEG errors are possible,
+   * so as to simplify the setjmp error logic above.  (Actually, I don't
+   * think that jpeg_destroy can do an error exit, but why assume anything...)
+   */
+  fclose(infile);
+  fclose(outfile);
+
+  /* At this point you may want to check to see whether any corrupt-data
+   * warnings occurred (test whether jerr.pub.num_warnings is nonzero).
+   */
+
+  /* And we're done! */
+  return 1;
+}
+
+
+/*
+ * SOME FINE POINTS:
+ *
+ * In the above code, we ignored the return value of jpeg_read_scanlines,
+ * which is the number of scanlines actually read.  We could get away with
+ * this because we asked for only one line at a time and we weren't using
+ * a suspending data source.  See libjpeg.txt for more info.
+ *
+ * We cheated a bit by calling alloc_sarray() after jpeg_start_decompress();
+ * we should have done it beforehand to ensure that the space would be
+ * counted against the JPEG max_memory setting.  In some systems the above
+ * code would risk an out-of-memory error.  However, in general we don't
+ * know the output image dimensions before jpeg_start_decompress(), unless we
+ * call jpeg_calc_output_dimensions().  See libjpeg.txt for more about this.
+ *
+ * Scanlines are returned in the same order as they appear in the JPEG file,
+ * which is standardly top-to-bottom.  If you must emit data bottom-to-top,
+ * you can use one of the virtual arrays provided by the JPEG memory manager
+ * to invert the data.  See wrbmp.c for an example.
+ */
+
+
+LOCAL(void)
+usage(const char *progname)
+{
+  fprintf(stderr, "usage: %s compress [switches] outputfile[.jpg]\n",
+          progname);
+  fprintf(stderr, "       %s decompress inputfile[.jpg] outputfile[.ppm]\n",
+          progname);
+  fprintf(stderr, "Switches (names may be abbreviated):\n");
+  fprintf(stderr, "  -precision N   Create JPEG file with N-bit data precision\n");
+  fprintf(stderr, "                 (N is 8 or 12; default is 8)\n");
+  fprintf(stderr, "  -quality N     Compression quality (0..100; 5-95 is most useful range,\n");
+  fprintf(stderr, "                 default is 75)\n");
+
+  exit(EXIT_FAILURE);
+}
+
+
+typedef enum {
+  COMPRESS,
+  DECOMPRESS
+} EXAMPLE_MODE;
+
+
+int
+main(int argc, char **argv)
+{
+  int argn, quality = 75;
+  int data_precision = 8;
+  EXAMPLE_MODE mode = -1;
+  char *arg, *filename = NULL;
+
+  if (argc < 3)
+    usage(argv[0]);
+
+  if (!strcasecmp(argv[1], "compress"))
+    mode = COMPRESS;
+  else if (!strcasecmp(argv[1], "decompress"))
+    mode = DECOMPRESS;
+  else
+    usage(argv[0]);
+
+  for (argn = 2; argn < argc; argn++) {
+    arg = argv[argn];
+    if (*arg != '-') {
+      filename = arg;
+      /* Not a switch, must be a file name argument */
+      break;                    /* done parsing switches */
+    }
+    arg++;                      /* advance past switch marker character */
+
+    if (!strncasecmp(arg, "p", 1)) {
+      /* Set data precision. */
+      if (++argn >= argc)       /* advance to next argument */
+        usage(argv[0]);
+      if (sscanf(argv[argn], "%d", &data_precision) < 1 ||
+          (data_precision != 8 && data_precision != 12))
+        usage(argv[0]);
+    } else if (!strncasecmp(arg, "q", 1)) {
+      /* Quality rating (quantization table scaling factor). */
+      if (++argn >= argc)       /* advance to next argument */
+        usage(argv[0]);
+      if (sscanf(argv[argn], "%d", &quality) < 1 || quality < 0 ||
+          quality > 100)
+        usage(argv[0]);
+      if (quality < 1)
+        quality = 1;
+    }
+  }
+
+  if (!filename)
+    usage(argv[0]);
+
+  if (mode == COMPRESS)
+    write_JPEG_file(filename, quality, data_precision);
+  else if (mode == DECOMPRESS) {
+    if (argc - argn < 2)
+      usage(argv[0]);
+
+    read_JPEG_file(argv[argn], argv[argn + 1]);
+  }
+
+  return 0;
+}
diff --git a/3rdparty/libjpeg-turbo/src/jcapimin.c b/3rdparty/libjpeg-turbo/src/jcapimin.c
index 84e7ecc9a7..cbb3d13e1c 100644
--- a/3rdparty/libjpeg-turbo/src/jcapimin.c
+++ b/3rdparty/libjpeg-turbo/src/jcapimin.c
@@ -23,6 +23,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jcmaster.h"
 
 
 /*
@@ -90,8 +91,18 @@ jpeg_CreateCompress(j_compress_ptr cinfo, int version, size_t structsize)
 
   cinfo->input_gamma = 1.0;     /* in case application forgets */
 
+  cinfo->data_precision = BITS_IN_JSAMPLE;
+
   /* OK, I'm ready */
   cinfo->global_state = CSTATE_START;
+
+  /* The master struct is used to store extension parameters, so we allocate it
+   * here.
+   */
+  cinfo->master = (struct jpeg_comp_master *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                  sizeof(my_comp_master));
+  memset(cinfo->master, 0, sizeof(my_comp_master));
 }
 
 
@@ -183,8 +194,20 @@ jpeg_finish_compress(j_compress_ptr cinfo)
       /* We bypass the main controller and invoke coef controller directly;
        * all work is being done from the coefficient buffer.
        */
-      if (!(*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE)NULL))
-        ERREXIT(cinfo, JERR_CANT_SUSPEND);
+      if (cinfo->data_precision == 16) {
+#ifdef C_LOSSLESS_SUPPORTED
+        if (!(*cinfo->coef->compress_data_16) (cinfo, (J16SAMPIMAGE)NULL))
+          ERREXIT(cinfo, JERR_CANT_SUSPEND);
+#else
+        ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+      } else if (cinfo->data_precision == 12) {
+        if (!(*cinfo->coef->compress_data_12) (cinfo, (J12SAMPIMAGE)NULL))
+          ERREXIT(cinfo, JERR_CANT_SUSPEND);
+      } else {
+        if (!(*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE)NULL))
+          ERREXIT(cinfo, JERR_CANT_SUSPEND);
+      }
     }
     (*cinfo->master->finish_pass) (cinfo);
   }
diff --git a/3rdparty/libjpeg-turbo/src/jcapistd.c b/3rdparty/libjpeg-turbo/src/jcapistd.c
index aa2aad9f66..2053028f2b 100644
--- a/3rdparty/libjpeg-turbo/src/jcapistd.c
+++ b/3rdparty/libjpeg-turbo/src/jcapistd.c
@@ -1,8 +1,10 @@
 /*
  * jcapistd.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -18,8 +20,11 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsamplecomp.h"
 
 
+#if BITS_IN_JSAMPLE == 8
+
 /*
  * Compression initialization.
  * Before calling this, all parameters and a data destination must be set up.
@@ -51,13 +56,15 @@ jpeg_start_compress(j_compress_ptr cinfo, boolean write_all_tables)
   jinit_compress_master(cinfo);
   /* Set up for the first pass */
   (*cinfo->master->prepare_for_pass) (cinfo);
-  /* Ready for application to drive first pass through jpeg_write_scanlines
-   * or jpeg_write_raw_data.
+  /* Ready for application to drive first pass through _jpeg_write_scanlines
+   * or _jpeg_write_raw_data.
    */
   cinfo->next_scanline = 0;
   cinfo->global_state = (cinfo->raw_data_in ? CSTATE_RAW_OK : CSTATE_SCANNING);
 }
 
+#endif
+
 
 /*
  * Write some scanlines of data to the JPEG compressor.
@@ -67,7 +74,7 @@ jpeg_start_compress(j_compress_ptr cinfo, boolean write_all_tables)
  * the data destination module has requested suspension of the compressor,
  * or if more than image_height scanlines are passed in.
  *
- * Note: we warn about excess calls to jpeg_write_scanlines() since
+ * Note: we warn about excess calls to _jpeg_write_scanlines() since
  * this likely signals an application programmer error.  However,
  * excess scanlines passed in the last valid call are *silently* ignored,
  * so that the application need not adjust num_lines for end-of-image
@@ -75,11 +82,15 @@ jpeg_start_compress(j_compress_ptr cinfo, boolean write_all_tables)
  */
 
 GLOBAL(JDIMENSION)
-jpeg_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
-                     JDIMENSION num_lines)
+_jpeg_write_scanlines(j_compress_ptr cinfo, _JSAMPARRAY scanlines,
+                      JDIMENSION num_lines)
 {
+#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
   JDIMENSION row_ctr, rows_left;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   if (cinfo->global_state != CSTATE_SCANNING)
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
   if (cinfo->next_scanline >= cinfo->image_height)
@@ -93,9 +104,9 @@ jpeg_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
   }
 
   /* Give master control module another chance if this is first call to
-   * jpeg_write_scanlines.  This lets output of the frame/scan headers be
+   * _jpeg_write_scanlines.  This lets output of the frame/scan headers be
    * delayed so that application can write COM, etc, markers between
-   * jpeg_start_compress and jpeg_write_scanlines.
+   * jpeg_start_compress and _jpeg_write_scanlines.
    */
   if (cinfo->master->call_pass_startup)
     (*cinfo->master->pass_startup) (cinfo);
@@ -106,23 +117,35 @@ jpeg_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
     num_lines = rows_left;
 
   row_ctr = 0;
-  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, num_lines);
+  (*cinfo->main->_process_data) (cinfo, scanlines, &row_ctr, num_lines);
   cinfo->next_scanline += row_ctr;
   return row_ctr;
+#else
+  ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  return 0;
+#endif
 }
 
 
+#if BITS_IN_JSAMPLE != 16
+
 /*
  * Alternate entry point to write raw data.
  * Processes exactly one iMCU row per call, unless suspended.
  */
 
 GLOBAL(JDIMENSION)
-jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
-                    JDIMENSION num_lines)
+_jpeg_write_raw_data(j_compress_ptr cinfo, _JSAMPIMAGE data,
+                     JDIMENSION num_lines)
 {
   JDIMENSION lines_per_iMCU_row;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
   if (cinfo->global_state != CSTATE_RAW_OK)
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
   if (cinfo->next_scanline >= cinfo->image_height) {
@@ -138,9 +161,9 @@ jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
   }
 
   /* Give master control module another chance if this is first call to
-   * jpeg_write_raw_data.  This lets output of the frame/scan headers be
+   * _jpeg_write_raw_data.  This lets output of the frame/scan headers be
    * delayed so that application can write COM, etc, markers between
-   * jpeg_start_compress and jpeg_write_raw_data.
+   * jpeg_start_compress and _jpeg_write_raw_data.
    */
   if (cinfo->master->call_pass_startup)
     (*cinfo->master->pass_startup) (cinfo);
@@ -151,7 +174,7 @@ jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
     ERREXIT(cinfo, JERR_BUFFER_SIZE);
 
   /* Directly compress the row. */
-  if (!(*cinfo->coef->compress_data) (cinfo, data)) {
+  if (!(*cinfo->coef->_compress_data) (cinfo, data)) {
     /* If compressor did not consume the whole row, suspend processing. */
     return 0;
   }
@@ -160,3 +183,5 @@ jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
   cinfo->next_scanline += lines_per_iMCU_row;
   return lines_per_iMCU_row;
 }
+
+#endif /* BITS_IN_JSAMPLE != 16 */
diff --git a/3rdparty/libjpeg-turbo/src/jccoefct.c b/3rdparty/libjpeg-turbo/src/jccoefct.c
index 068232a527..2a5dde2d07 100644
--- a/3rdparty/libjpeg-turbo/src/jccoefct.c
+++ b/3rdparty/libjpeg-turbo/src/jccoefct.c
@@ -3,19 +3,20 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1997, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code and
- * information relevant to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
  * This file contains the coefficient buffer controller for compression.
- * This controller is the top level of the JPEG compressor proper.
+ * This controller is the top level of the lossy JPEG compressor proper.
  * The coefficient buffer lies between forward-DCT and entropy encoding steps.
  */
 
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsamplecomp.h"
 
 
 /* We use a full-image coefficient buffer when doing Huffman optimization,
@@ -58,11 +59,12 @@ typedef my_coef_controller *my_coef_ptr;
 
 
 /* Forward declarations */
-METHODDEF(boolean) compress_data(j_compress_ptr cinfo, JSAMPIMAGE input_buf);
+METHODDEF(boolean) compress_data(j_compress_ptr cinfo, _JSAMPIMAGE input_buf);
 #ifdef FULL_COEF_BUFFER_SUPPORTED
 METHODDEF(boolean) compress_first_pass(j_compress_ptr cinfo,
-                                       JSAMPIMAGE input_buf);
-METHODDEF(boolean) compress_output(j_compress_ptr cinfo, JSAMPIMAGE input_buf);
+                                       _JSAMPIMAGE input_buf);
+METHODDEF(boolean) compress_output(j_compress_ptr cinfo,
+                                   _JSAMPIMAGE input_buf);
 #endif
 
 
@@ -106,18 +108,18 @@ start_pass_coef(j_compress_ptr cinfo, J_BUF_MODE pass_mode)
   case JBUF_PASS_THRU:
     if (coef->whole_image[0] != NULL)
       ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    coef->pub.compress_data = compress_data;
+    coef->pub._compress_data = compress_data;
     break;
 #ifdef FULL_COEF_BUFFER_SUPPORTED
   case JBUF_SAVE_AND_PASS:
     if (coef->whole_image[0] == NULL)
       ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    coef->pub.compress_data = compress_first_pass;
+    coef->pub._compress_data = compress_first_pass;
     break;
   case JBUF_CRANK_DEST:
     if (coef->whole_image[0] == NULL)
       ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    coef->pub.compress_data = compress_output;
+    coef->pub._compress_data = compress_output;
     break;
 #endif
   default:
@@ -138,7 +140,7 @@ start_pass_coef(j_compress_ptr cinfo, J_BUF_MODE pass_mode)
  */
 
 METHODDEF(boolean)
-compress_data(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+compress_data(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
 {
   my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
   JDIMENSION MCU_col_num;       /* index of current MCU within row */
@@ -172,10 +174,10 @@ compress_data(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
         for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
           if (coef->iMCU_row_num < last_iMCU_row ||
               yoffset + yindex < compptr->last_row_height) {
-            (*cinfo->fdct->forward_DCT) (cinfo, compptr,
-                                         input_buf[compptr->component_index],
-                                         coef->MCU_buffer[blkn],
-                                         ypos, xpos, (JDIMENSION)blockcnt);
+            (*cinfo->fdct->_forward_DCT) (cinfo, compptr,
+                                          input_buf[compptr->component_index],
+                                          coef->MCU_buffer[blkn],
+                                          ypos, xpos, (JDIMENSION)blockcnt);
             if (blockcnt < compptr->MCU_width) {
               /* Create some dummy blocks at the right edge of the image. */
               jzero_far((void *)coef->MCU_buffer[blkn + blockcnt],
@@ -242,7 +244,7 @@ compress_data(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
  */
 
 METHODDEF(boolean)
-compress_first_pass(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+compress_first_pass(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
 {
   my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
   JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
@@ -279,10 +281,10 @@ compress_first_pass(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
      */
     for (block_row = 0; block_row < block_rows; block_row++) {
       thisblockrow = buffer[block_row];
-      (*cinfo->fdct->forward_DCT) (cinfo, compptr,
-                                   input_buf[ci], thisblockrow,
-                                   (JDIMENSION)(block_row * DCTSIZE),
-                                   (JDIMENSION)0, blocks_across);
+      (*cinfo->fdct->_forward_DCT) (cinfo, compptr,
+                                    input_buf[ci], thisblockrow,
+                                    (JDIMENSION)(block_row * DCTSIZE),
+                                    (JDIMENSION)0, blocks_across);
       if (ndummy > 0) {
         /* Create dummy blocks at the right edge of the image. */
         thisblockrow += blocks_across; /* => first dummy block */
@@ -338,7 +340,7 @@ compress_first_pass(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
  */
 
 METHODDEF(boolean)
-compress_output(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+compress_output(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
 {
   my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
   JDIMENSION MCU_col_num;       /* index of current MCU within row */
@@ -402,10 +404,13 @@ compress_output(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
  */
 
 GLOBAL(void)
-jinit_c_coef_controller(j_compress_ptr cinfo, boolean need_full_buffer)
+_jinit_c_coef_controller(j_compress_ptr cinfo, boolean need_full_buffer)
 {
   my_coef_ptr coef;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   coef = (my_coef_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(my_coef_controller));
diff --git a/3rdparty/libjpeg-turbo/src/jccolext.c b/3rdparty/libjpeg-turbo/src/jccolext.c
index 303b322ce6..8eba36c4df 100644
--- a/3rdparty/libjpeg-turbo/src/jccolext.c
+++ b/3rdparty/libjpeg-turbo/src/jccolext.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2012, 2015, D. R. Commander.
+ * Copyright (C) 2009-2012, 2015, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -29,15 +29,16 @@
 
 INLINE
 LOCAL(void)
-rgb_ycc_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
-                         JSAMPIMAGE output_buf, JDIMENSION output_row,
+rgb_ycc_convert_internal(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                         _JSAMPIMAGE output_buf, JDIMENSION output_row,
                          int num_rows)
 {
+#if BITS_IN_JSAMPLE != 16
   my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
   register int r, g, b;
   register JLONG *ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2;
+  register _JSAMPROW inptr;
+  register _JSAMPROW outptr0, outptr1, outptr2;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->image_width;
 
@@ -48,26 +49,29 @@ rgb_ycc_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
     outptr2 = output_buf[2][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      r = inptr[RGB_RED];
-      g = inptr[RGB_GREEN];
-      b = inptr[RGB_BLUE];
+      r = RANGE_LIMIT(inptr[RGB_RED]);
+      g = RANGE_LIMIT(inptr[RGB_GREEN]);
+      b = RANGE_LIMIT(inptr[RGB_BLUE]);
       inptr += RGB_PIXELSIZE;
-      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+      /* If the inputs are 0.._MAXJSAMPLE, the outputs of these equations
        * must be too; we do not need an explicit range-limiting operation.
        * Hence the value being shifted is never negative, and we don't
        * need the general RIGHT_SHIFT macro.
        */
       /* Y */
-      outptr0[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
-                                ctab[b + B_Y_OFF]) >> SCALEBITS);
+      outptr0[col] = (_JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
+                                 ctab[b + B_Y_OFF]) >> SCALEBITS);
       /* Cb */
-      outptr1[col] = (JSAMPLE)((ctab[r + R_CB_OFF] + ctab[g + G_CB_OFF] +
-                                ctab[b + B_CB_OFF]) >> SCALEBITS);
+      outptr1[col] = (_JSAMPLE)((ctab[r + R_CB_OFF] + ctab[g + G_CB_OFF] +
+                                 ctab[b + B_CB_OFF]) >> SCALEBITS);
       /* Cr */
-      outptr2[col] = (JSAMPLE)((ctab[r + R_CR_OFF] + ctab[g + G_CR_OFF] +
-                                ctab[b + B_CR_OFF]) >> SCALEBITS);
+      outptr2[col] = (_JSAMPLE)((ctab[r + R_CR_OFF] + ctab[g + G_CR_OFF] +
+                                 ctab[b + B_CR_OFF]) >> SCALEBITS);
     }
   }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
 }
 
 
@@ -83,15 +87,16 @@ rgb_ycc_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
 
 INLINE
 LOCAL(void)
-rgb_gray_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
-                          JSAMPIMAGE output_buf, JDIMENSION output_row,
+rgb_gray_convert_internal(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                          _JSAMPIMAGE output_buf, JDIMENSION output_row,
                           int num_rows)
 {
+#if BITS_IN_JSAMPLE != 16
   my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
   register int r, g, b;
   register JLONG *ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
+  register _JSAMPROW inptr;
+  register _JSAMPROW outptr;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->image_width;
 
@@ -100,15 +105,18 @@ rgb_gray_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
     outptr = output_buf[0][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      r = inptr[RGB_RED];
-      g = inptr[RGB_GREEN];
-      b = inptr[RGB_BLUE];
+      r = RANGE_LIMIT(inptr[RGB_RED]);
+      g = RANGE_LIMIT(inptr[RGB_GREEN]);
+      b = RANGE_LIMIT(inptr[RGB_BLUE]);
       inptr += RGB_PIXELSIZE;
       /* Y */
-      outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
-                               ctab[b + B_Y_OFF]) >> SCALEBITS);
+      outptr[col] = (_JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
+                                ctab[b + B_Y_OFF]) >> SCALEBITS);
     }
   }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
 }
 
 
@@ -119,12 +127,12 @@ rgb_gray_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
 
 INLINE
 LOCAL(void)
-rgb_rgb_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
-                         JSAMPIMAGE output_buf, JDIMENSION output_row,
+rgb_rgb_convert_internal(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                         _JSAMPIMAGE output_buf, JDIMENSION output_row,
                          int num_rows)
 {
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2;
+  register _JSAMPROW inptr;
+  register _JSAMPROW outptr0, outptr1, outptr2;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->image_width;
 
diff --git a/3rdparty/libjpeg-turbo/src/jccolor.c b/3rdparty/libjpeg-turbo/src/jccolor.c
index bdc563c723..cd3a6a7a56 100644
--- a/3rdparty/libjpeg-turbo/src/jccolor.c
+++ b/3rdparty/libjpeg-turbo/src/jccolor.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2012, 2015, D. R. Commander.
+ * Copyright (C) 2009-2012, 2015, 2022, D. R. Commander.
  * Copyright (C) 2014, MIPS Technologies, Inc., California.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -17,16 +17,20 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
-#include "jconfigint.h"
+#include "jsamplecomp.h"
 
 
+#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
+
 /* Private subobject */
 
 typedef struct {
   struct jpeg_color_converter pub; /* public fields */
 
+#if BITS_IN_JSAMPLE != 16
   /* Private state for RGB->YCC conversion */
   JLONG *rgb_ycc_tab;           /* => table for RGB to YCbCr conversion */
+#endif
 } my_color_converter;
 
 typedef my_color_converter *my_cconvert_ptr;
@@ -36,14 +40,14 @@ typedef my_color_converter *my_cconvert_ptr;
 
 /*
  * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
- * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * normalized to the range 0.._MAXJSAMPLE rather than -0.5 .. 0.5.
  * The conversion equations to be implemented are therefore
  *      Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
- *      Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERJSAMPLE
- *      Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERJSAMPLE
+ *      Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + _CENTERJSAMPLE
+ *      Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + _CENTERJSAMPLE
  * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
- * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
- * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
+ * Note: older versions of the IJG code used a zero offset of _MAXJSAMPLE/2,
+ * rather than _CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
  * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
  * were not represented exactly.  Now we sacrifice exact representation of
  * maximum red and maximum blue in order to get exact grayscales.
@@ -54,16 +58,16 @@ typedef my_color_converter *my_cconvert_ptr;
  *
  * For even more speed, we avoid doing any multiplications in the inner loop
  * by precalculating the constants times R,G,B for all possible values.
- * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * For 8-bit samples this is very reasonable (only 256 entries per table);
  * for 12-bit samples it is still acceptable.  It's not very reasonable for
  * 16-bit samples, but if you want lossless storage you shouldn't be changing
  * colorspace anyway.
- * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
+ * The _CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
  * in the tables to save adding them separately in the inner loop.
  */
 
 #define SCALEBITS       16      /* speediest right-shift on some machines */
-#define CBCR_OFFSET     ((JLONG)CENTERJSAMPLE << SCALEBITS)
+#define CBCR_OFFSET     ((JLONG)_CENTERJSAMPLE << SCALEBITS)
 #define ONE_HALF        ((JLONG)1 << (SCALEBITS - 1))
 #define FIX(x)          ((JLONG)((x) * (1L << SCALEBITS) + 0.5))
 
@@ -74,15 +78,27 @@ typedef my_color_converter *my_cconvert_ptr;
  */
 
 #define R_Y_OFF         0                       /* offset to R => Y section */
-#define G_Y_OFF         (1 * (MAXJSAMPLE + 1))  /* offset to G => Y section */
-#define B_Y_OFF         (2 * (MAXJSAMPLE + 1))  /* etc. */
-#define R_CB_OFF        (3 * (MAXJSAMPLE + 1))
-#define G_CB_OFF        (4 * (MAXJSAMPLE + 1))
-#define B_CB_OFF        (5 * (MAXJSAMPLE + 1))
+#define G_Y_OFF         (1 * (_MAXJSAMPLE + 1)) /* offset to G => Y section */
+#define B_Y_OFF         (2 * (_MAXJSAMPLE + 1)) /* etc. */
+#define R_CB_OFF        (3 * (_MAXJSAMPLE + 1))
+#define G_CB_OFF        (4 * (_MAXJSAMPLE + 1))
+#define B_CB_OFF        (5 * (_MAXJSAMPLE + 1))
 #define R_CR_OFF        B_CB_OFF                /* B=>Cb, R=>Cr are the same */
-#define G_CR_OFF        (6 * (MAXJSAMPLE + 1))
-#define B_CR_OFF        (7 * (MAXJSAMPLE + 1))
-#define TABLE_SIZE      (8 * (MAXJSAMPLE + 1))
+#define G_CR_OFF        (6 * (_MAXJSAMPLE + 1))
+#define B_CR_OFF        (7 * (_MAXJSAMPLE + 1))
+#define TABLE_SIZE      (8 * (_MAXJSAMPLE + 1))
+
+/* 12-bit samples use a 16-bit data type, so it is possible to pass
+ * out-of-range sample values (< 0 or > 4095) to jpeg_write_scanlines().
+ * Thus, we mask the incoming 12-bit samples to guard against overrunning
+ * or underrunning the conversion tables.
+ */
+
+#if BITS_IN_JSAMPLE == 12
+#define RANGE_LIMIT(value)  ((value) & 0xFFF)
+#else
+#define RANGE_LIMIT(value)  (value)
+#endif
 
 
 /* Include inline routines for colorspace extensions */
@@ -197,6 +213,7 @@ typedef my_color_converter *my_cconvert_ptr;
 METHODDEF(void)
 rgb_ycc_start(j_compress_ptr cinfo)
 {
+#if BITS_IN_JSAMPLE != 16
   my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
   JLONG *rgb_ycc_tab;
   JLONG i;
@@ -206,15 +223,15 @@ rgb_ycc_start(j_compress_ptr cinfo)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 (TABLE_SIZE * sizeof(JLONG)));
 
-  for (i = 0; i <= MAXJSAMPLE; i++) {
+  for (i = 0; i <= _MAXJSAMPLE; i++) {
     rgb_ycc_tab[i + R_Y_OFF] = FIX(0.29900) * i;
     rgb_ycc_tab[i + G_Y_OFF] = FIX(0.58700) * i;
     rgb_ycc_tab[i + B_Y_OFF] = FIX(0.11400) * i   + ONE_HALF;
     rgb_ycc_tab[i + R_CB_OFF] = (-FIX(0.16874)) * i;
     rgb_ycc_tab[i + G_CB_OFF] = (-FIX(0.33126)) * i;
     /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
-     * This ensures that the maximum output will round to MAXJSAMPLE
-     * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
+     * This ensures that the maximum output will round to _MAXJSAMPLE
+     * not _MAXJSAMPLE+1, and thus that we don't have to range-limit.
      */
     rgb_ycc_tab[i + B_CB_OFF] = FIX(0.50000) * i  + CBCR_OFFSET + ONE_HALF - 1;
 /*  B=>Cb and R=>Cr tables are the same
@@ -223,6 +240,9 @@ rgb_ycc_start(j_compress_ptr cinfo)
     rgb_ycc_tab[i + G_CR_OFF] = (-FIX(0.41869)) * i;
     rgb_ycc_tab[i + B_CR_OFF] = (-FIX(0.08131)) * i;
   }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
 }
 
 
@@ -231,8 +251,8 @@ rgb_ycc_start(j_compress_ptr cinfo)
  */
 
 METHODDEF(void)
-rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
-                JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
+rgb_ycc_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
 {
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
@@ -279,8 +299,8 @@ rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  */
 
 METHODDEF(void)
-rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
-                 JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
+rgb_gray_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                 _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
 {
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
@@ -324,8 +344,8 @@ rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  */
 
 METHODDEF(void)
-rgb_rgb_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
-                JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
+rgb_rgb_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
 {
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
@@ -373,14 +393,15 @@ rgb_rgb_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  */
 
 METHODDEF(void)
-cmyk_ycck_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
-                  JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
+cmyk_ycck_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                  _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
 {
+#if BITS_IN_JSAMPLE != 16
   my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
   register int r, g, b;
   register JLONG *ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2, outptr3;
+  register _JSAMPROW inptr;
+  register _JSAMPROW outptr0, outptr1, outptr2, outptr3;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->image_width;
 
@@ -392,28 +413,31 @@ cmyk_ycck_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
     outptr3 = output_buf[3][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      r = MAXJSAMPLE - inptr[0];
-      g = MAXJSAMPLE - inptr[1];
-      b = MAXJSAMPLE - inptr[2];
+      r = _MAXJSAMPLE - RANGE_LIMIT(inptr[0]);
+      g = _MAXJSAMPLE - RANGE_LIMIT(inptr[1]);
+      b = _MAXJSAMPLE - RANGE_LIMIT(inptr[2]);
       /* K passes through as-is */
       outptr3[col] = inptr[3];
       inptr += 4;
-      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+      /* If the inputs are 0.._MAXJSAMPLE, the outputs of these equations
        * must be too; we do not need an explicit range-limiting operation.
        * Hence the value being shifted is never negative, and we don't
        * need the general RIGHT_SHIFT macro.
        */
       /* Y */
-      outptr0[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
-                                ctab[b + B_Y_OFF]) >> SCALEBITS);
+      outptr0[col] = (_JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
+                                 ctab[b + B_Y_OFF]) >> SCALEBITS);
       /* Cb */
-      outptr1[col] = (JSAMPLE)((ctab[r + R_CB_OFF] + ctab[g + G_CB_OFF] +
-                                ctab[b + B_CB_OFF]) >> SCALEBITS);
+      outptr1[col] = (_JSAMPLE)((ctab[r + R_CB_OFF] + ctab[g + G_CB_OFF] +
+                                 ctab[b + B_CB_OFF]) >> SCALEBITS);
       /* Cr */
-      outptr2[col] = (JSAMPLE)((ctab[r + R_CR_OFF] + ctab[g + G_CR_OFF] +
-                                ctab[b + B_CR_OFF]) >> SCALEBITS);
+      outptr2[col] = (_JSAMPLE)((ctab[r + R_CR_OFF] + ctab[g + G_CR_OFF] +
+                                 ctab[b + B_CR_OFF]) >> SCALEBITS);
     }
   }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
 }
 
 
@@ -424,11 +448,11 @@ cmyk_ycck_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  */
 
 METHODDEF(void)
-grayscale_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
-                  JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
+grayscale_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                  _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
 {
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
+  register _JSAMPROW inptr;
+  register _JSAMPROW outptr;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->image_width;
   int instride = cinfo->input_components;
@@ -452,11 +476,11 @@ grayscale_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  */
 
 METHODDEF(void)
-null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-             JDIMENSION output_row, int num_rows)
+null_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+             _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
 {
-  register JSAMPROW inptr;
-  register JSAMPROW outptr, outptr0, outptr1, outptr2, outptr3;
+  register _JSAMPROW inptr;
+  register _JSAMPROW outptr, outptr0, outptr1, outptr2, outptr3;
   register JDIMENSION col;
   register int ci;
   int nc = cinfo->num_components;
@@ -524,10 +548,13 @@ null_method(j_compress_ptr cinfo)
  */
 
 GLOBAL(void)
-jinit_color_converter(j_compress_ptr cinfo)
+_jinit_color_converter(j_compress_ptr cinfo)
 {
   my_cconvert_ptr cconvert;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   cconvert = (my_cconvert_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(my_color_converter));
@@ -574,123 +601,116 @@ jinit_color_converter(j_compress_ptr cinfo)
     break;
   }
 
-  /* Check num_components, set conversion method based on requested space */
+  /* Check num_components, set conversion method based on requested space.
+   * NOTE: We do not allow any lossy color conversion algorithms in lossless
+   * mode.
+   */
   switch (cinfo->jpeg_color_space) {
   case JCS_GRAYSCALE:
+    if (cinfo->master->lossless &&
+        cinfo->in_color_space != cinfo->jpeg_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     if (cinfo->num_components != 1)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
     if (cinfo->in_color_space == JCS_GRAYSCALE)
-      cconvert->pub.color_convert = grayscale_convert;
-    else if (cinfo->in_color_space == JCS_RGB ||
-             cinfo->in_color_space == JCS_EXT_RGB ||
-             cinfo->in_color_space == JCS_EXT_RGBX ||
-             cinfo->in_color_space == JCS_EXT_BGR ||
-             cinfo->in_color_space == JCS_EXT_BGRX ||
-             cinfo->in_color_space == JCS_EXT_XBGR ||
-             cinfo->in_color_space == JCS_EXT_XRGB ||
-             cinfo->in_color_space == JCS_EXT_RGBA ||
-             cinfo->in_color_space == JCS_EXT_BGRA ||
-             cinfo->in_color_space == JCS_EXT_ABGR ||
-             cinfo->in_color_space == JCS_EXT_ARGB) {
+      cconvert->pub._color_convert = grayscale_convert;
+    else if (IsExtRGB(cinfo->in_color_space)) {
+#ifdef WITH_SIMD
       if (jsimd_can_rgb_gray())
-        cconvert->pub.color_convert = jsimd_rgb_gray_convert;
-      else {
+        cconvert->pub._color_convert = jsimd_rgb_gray_convert;
+      else
+#endif
+      {
         cconvert->pub.start_pass = rgb_ycc_start;
-        cconvert->pub.color_convert = rgb_gray_convert;
+        cconvert->pub._color_convert = rgb_gray_convert;
       }
     } else if (cinfo->in_color_space == JCS_YCbCr)
-      cconvert->pub.color_convert = grayscale_convert;
+      cconvert->pub._color_convert = grayscale_convert;
     else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
   case JCS_RGB:
+    if (cinfo->master->lossless && !IsExtRGB(cinfo->in_color_space))
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     if (cinfo->num_components != 3)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
     if (rgb_red[cinfo->in_color_space] == 0 &&
         rgb_green[cinfo->in_color_space] == 1 &&
         rgb_blue[cinfo->in_color_space] == 2 &&
         rgb_pixelsize[cinfo->in_color_space] == 3) {
-#if defined(__mips__)
+#if defined(WITH_SIMD) && defined(__mips__)
       if (jsimd_c_can_null_convert())
-        cconvert->pub.color_convert = jsimd_c_null_convert;
+        cconvert->pub._color_convert = jsimd_c_null_convert;
       else
 #endif
-        cconvert->pub.color_convert = null_convert;
-    } else if (cinfo->in_color_space == JCS_RGB ||
-               cinfo->in_color_space == JCS_EXT_RGB ||
-               cinfo->in_color_space == JCS_EXT_RGBX ||
-               cinfo->in_color_space == JCS_EXT_BGR ||
-               cinfo->in_color_space == JCS_EXT_BGRX ||
-               cinfo->in_color_space == JCS_EXT_XBGR ||
-               cinfo->in_color_space == JCS_EXT_XRGB ||
-               cinfo->in_color_space == JCS_EXT_RGBA ||
-               cinfo->in_color_space == JCS_EXT_BGRA ||
-               cinfo->in_color_space == JCS_EXT_ABGR ||
-               cinfo->in_color_space == JCS_EXT_ARGB)
-      cconvert->pub.color_convert = rgb_rgb_convert;
+        cconvert->pub._color_convert = null_convert;
+    } else if (IsExtRGB(cinfo->in_color_space))
+      cconvert->pub._color_convert = rgb_rgb_convert;
     else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
   case JCS_YCbCr:
+    if (cinfo->master->lossless &&
+        cinfo->in_color_space != cinfo->jpeg_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     if (cinfo->num_components != 3)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_RGB ||
-        cinfo->in_color_space == JCS_EXT_RGB ||
-        cinfo->in_color_space == JCS_EXT_RGBX ||
-        cinfo->in_color_space == JCS_EXT_BGR ||
-        cinfo->in_color_space == JCS_EXT_BGRX ||
-        cinfo->in_color_space == JCS_EXT_XBGR ||
-        cinfo->in_color_space == JCS_EXT_XRGB ||
-        cinfo->in_color_space == JCS_EXT_RGBA ||
-        cinfo->in_color_space == JCS_EXT_BGRA ||
-        cinfo->in_color_space == JCS_EXT_ABGR ||
-        cinfo->in_color_space == JCS_EXT_ARGB) {
+    if (IsExtRGB(cinfo->in_color_space)) {
+#ifdef WITH_SIMD
       if (jsimd_can_rgb_ycc())
-        cconvert->pub.color_convert = jsimd_rgb_ycc_convert;
-      else {
-        cconvert->pub.start_pass = rgb_ycc_start;
-        cconvert->pub.color_convert = rgb_ycc_convert;
-      }
-    } else if (cinfo->in_color_space == JCS_YCbCr) {
-#if defined(__mips__)
-      if (jsimd_c_can_null_convert())
-        cconvert->pub.color_convert = jsimd_c_null_convert;
+        cconvert->pub._color_convert = jsimd_rgb_ycc_convert;
       else
 #endif
-        cconvert->pub.color_convert = null_convert;
+      {
+        cconvert->pub.start_pass = rgb_ycc_start;
+        cconvert->pub._color_convert = rgb_ycc_convert;
+      }
+    } else if (cinfo->in_color_space == JCS_YCbCr) {
+#if defined(WITH_SIMD) && defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub._color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub._color_convert = null_convert;
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
   case JCS_CMYK:
+    if (cinfo->master->lossless &&
+        cinfo->in_color_space != cinfo->jpeg_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     if (cinfo->num_components != 4)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
     if (cinfo->in_color_space == JCS_CMYK) {
-#if defined(__mips__)
+#if defined(WITH_SIMD) && defined(__mips__)
       if (jsimd_c_can_null_convert())
-        cconvert->pub.color_convert = jsimd_c_null_convert;
+        cconvert->pub._color_convert = jsimd_c_null_convert;
       else
 #endif
-        cconvert->pub.color_convert = null_convert;
+        cconvert->pub._color_convert = null_convert;
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
   case JCS_YCCK:
+    if (cinfo->master->lossless &&
+        cinfo->in_color_space != cinfo->jpeg_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     if (cinfo->num_components != 4)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
     if (cinfo->in_color_space == JCS_CMYK) {
       cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = cmyk_ycck_convert;
+      cconvert->pub._color_convert = cmyk_ycck_convert;
     } else if (cinfo->in_color_space == JCS_YCCK) {
-#if defined(__mips__)
+#if defined(WITH_SIMD) && defined(__mips__)
       if (jsimd_c_can_null_convert())
-        cconvert->pub.color_convert = jsimd_c_null_convert;
+        cconvert->pub._color_convert = jsimd_c_null_convert;
       else
 #endif
-        cconvert->pub.color_convert = null_convert;
+        cconvert->pub._color_convert = null_convert;
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
@@ -699,12 +719,14 @@ jinit_color_converter(j_compress_ptr cinfo)
     if (cinfo->jpeg_color_space != cinfo->in_color_space ||
         cinfo->num_components != cinfo->input_components)
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-#if defined(__mips__)
+#if defined(WITH_SIMD) && defined(__mips__)
     if (jsimd_c_can_null_convert())
-      cconvert->pub.color_convert = jsimd_c_null_convert;
+      cconvert->pub._color_convert = jsimd_c_null_convert;
     else
 #endif
-      cconvert->pub.color_convert = null_convert;
+      cconvert->pub._color_convert = null_convert;
     break;
   }
 }
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
diff --git a/3rdparty/libjpeg-turbo/src/jcdctmgr.c b/3rdparty/libjpeg-turbo/src/jcdctmgr.c
index 7dae17a6e1..7191ee7316 100644
--- a/3rdparty/libjpeg-turbo/src/jcdctmgr.c
+++ b/3rdparty/libjpeg-turbo/src/jcdctmgr.c
@@ -6,7 +6,7 @@
  * libjpeg-turbo Modifications:
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2011, 2014-2015, D. R. Commander.
+ * Copyright (C) 2011, 2014-2015, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -28,10 +28,10 @@
 typedef void (*forward_DCT_method_ptr) (DCTELEM *data);
 typedef void (*float_DCT_method_ptr) (FAST_FLOAT *data);
 
-typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
+typedef void (*convsamp_method_ptr) (_JSAMPARRAY sample_data,
                                      JDIMENSION start_col,
                                      DCTELEM *workspace);
-typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
+typedef void (*float_convsamp_method_ptr) (_JSAMPARRAY sample_data,
                                            JDIMENSION start_col,
                                            FAST_FLOAT *workspace);
 
@@ -265,9 +265,13 @@ start_pass_fdctmgr(j_compress_ptr cinfo)
       dtbl = fdct->divisors[qtblno];
       for (i = 0; i < DCTSIZE2; i++) {
 #if BITS_IN_JSAMPLE == 8
+#ifdef WITH_SIMD
         if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
             fdct->quantize == jsimd_quantize)
           fdct->quantize = quantize;
+#else
+        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
+#endif
 #else
         dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
 #endif
@@ -305,12 +309,19 @@ start_pass_fdctmgr(j_compress_ptr cinfo)
         dtbl = fdct->divisors[qtblno];
         for (i = 0; i < DCTSIZE2; i++) {
 #if BITS_IN_JSAMPLE == 8
+#ifdef WITH_SIMD
           if (!compute_reciprocal(
                 DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
                                       (JLONG)aanscales[i]),
                         CONST_BITS - 3), &dtbl[i]) &&
               fdct->quantize == jsimd_quantize)
             fdct->quantize = quantize;
+#else
+          compute_reciprocal(
+            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
+                                  (JLONG)aanscales[i]),
+                    CONST_BITS-3), &dtbl[i]);
+#endif
 #else
           dtbl[i] = (DCTELEM)
             DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
@@ -370,10 +381,10 @@ start_pass_fdctmgr(j_compress_ptr cinfo)
  */
 
 METHODDEF(void)
-convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
+convsamp(_JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
 {
   register DCTELEM *workspaceptr;
-  register JSAMPROW elemptr;
+  register _JSAMPROW elemptr;
   register int elemr;
 
   workspaceptr = workspace;
@@ -381,19 +392,19 @@ convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
     elemptr = sample_data[elemr] + start_col;
 
 #if DCTSIZE == 8                /* unroll the inner loop */
-    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
 #else
     {
       register int elemc;
       for (elemc = DCTSIZE; elemc > 0; elemc--)
-        *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
     }
 #endif
   }
@@ -488,7 +499,7 @@ quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
 
 METHODDEF(void)
 forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
-            JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+            _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
             JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)
 /* This version is used for integer DCT implementations. */
 {
@@ -522,30 +533,30 @@ forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
 #ifdef DCT_FLOAT_SUPPORTED
 
 METHODDEF(void)
-convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
+convsamp_float(_JSAMPARRAY sample_data, JDIMENSION start_col,
                FAST_FLOAT *workspace)
 {
   register FAST_FLOAT *workspaceptr;
-  register JSAMPROW elemptr;
+  register _JSAMPROW elemptr;
   register int elemr;
 
   workspaceptr = workspace;
   for (elemr = 0; elemr < DCTSIZE; elemr++) {
     elemptr = sample_data[elemr] + start_col;
 #if DCTSIZE == 8                /* unroll the inner loop */
-    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
 #else
     {
       register int elemc;
       for (elemc = DCTSIZE; elemc > 0; elemc--)
-        *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+        *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
     }
 #endif
   }
@@ -577,7 +588,7 @@ quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
 
 METHODDEF(void)
 forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
-                  JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+                  _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
                   JDIMENSION start_row, JDIMENSION start_col,
                   JDIMENSION num_blocks)
 /* This version is used for floating-point DCT implementations. */
@@ -617,11 +628,14 @@ forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jinit_forward_dct(j_compress_ptr cinfo)
+_jinit_forward_dct(j_compress_ptr cinfo)
 {
   my_fdct_ptr fdct;
   int i;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   fdct = (my_fdct_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(my_fdct_controller));
@@ -632,28 +646,34 @@ jinit_forward_dct(j_compress_ptr cinfo)
   switch (cinfo->dct_method) {
 #ifdef DCT_ISLOW_SUPPORTED
   case JDCT_ISLOW:
-    fdct->pub.forward_DCT = forward_DCT;
+    fdct->pub._forward_DCT = forward_DCT;
+#ifdef WITH_SIMD
     if (jsimd_can_fdct_islow())
       fdct->dct = jsimd_fdct_islow;
     else
-      fdct->dct = jpeg_fdct_islow;
+#endif
+      fdct->dct = _jpeg_fdct_islow;
     break;
 #endif
 #ifdef DCT_IFAST_SUPPORTED
   case JDCT_IFAST:
-    fdct->pub.forward_DCT = forward_DCT;
+    fdct->pub._forward_DCT = forward_DCT;
+#ifdef WITH_SIMD
     if (jsimd_can_fdct_ifast())
       fdct->dct = jsimd_fdct_ifast;
     else
-      fdct->dct = jpeg_fdct_ifast;
+#endif
+      fdct->dct = _jpeg_fdct_ifast;
     break;
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
   case JDCT_FLOAT:
-    fdct->pub.forward_DCT = forward_DCT_float;
+    fdct->pub._forward_DCT = forward_DCT_float;
+#ifdef WITH_SIMD
     if (jsimd_can_fdct_float())
       fdct->float_dct = jsimd_fdct_float;
     else
+#endif
       fdct->float_dct = jpeg_fdct_float;
     break;
 #endif
@@ -671,25 +691,33 @@ jinit_forward_dct(j_compress_ptr cinfo)
   case JDCT_IFAST:
 #endif
 #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
+#ifdef WITH_SIMD
     if (jsimd_can_convsamp())
       fdct->convsamp = jsimd_convsamp;
     else
+#endif
       fdct->convsamp = convsamp;
+#ifdef WITH_SIMD
     if (jsimd_can_quantize())
       fdct->quantize = jsimd_quantize;
     else
+#endif
       fdct->quantize = quantize;
     break;
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
   case JDCT_FLOAT:
+#ifdef WITH_SIMD
     if (jsimd_can_convsamp_float())
       fdct->float_convsamp = jsimd_convsamp_float;
     else
+#endif
       fdct->float_convsamp = convsamp_float;
+#ifdef WITH_SIMD
     if (jsimd_can_quantize_float())
       fdct->float_quantize = jsimd_quantize_float;
     else
+#endif
       fdct->float_quantize = quantize_float;
     break;
 #endif
diff --git a/3rdparty/libjpeg-turbo/src/jcdiffct.c b/3rdparty/libjpeg-turbo/src/jcdiffct.c
new file mode 100644
index 0000000000..0bae068919
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jcdiffct.c
@@ -0,0 +1,411 @@
+/*
+ * jcdiffct.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains the difference buffer controller for compression.
+ * This controller is the top level of the lossless JPEG compressor proper.
+ * The difference buffer lies between the prediction/differencing and entropy
+ * encoding steps.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jlossls.h"            /* Private declarations for lossless codec */
+
+
+#ifdef C_LOSSLESS_SUPPORTED
+
+/* We use a full-image sample buffer when doing Huffman optimization,
+ * and also for writing multiple-scan JPEG files.  In all cases, the
+ * full-image buffer is filled during the first pass, and the scaling,
+ * prediction and differencing steps are run during subsequent passes.
+ */
+#ifdef ENTROPY_OPT_SUPPORTED
+#define FULL_SAMP_BUFFER_SUPPORTED
+#else
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+#define FULL_SAMP_BUFFER_SUPPORTED
+#endif
+#endif
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;      /* iMCU row # within image */
+  JDIMENSION mcu_ctr;           /* counts MCUs processed in current row */
+  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
+
+  _JSAMPROW cur_row[MAX_COMPONENTS];    /* row of point-transformed samples */
+  _JSAMPROW prev_row[MAX_COMPONENTS];   /* previous row of Pt'd samples */
+  JDIFFARRAY diff_buf[MAX_COMPONENTS];  /* iMCU row of differences */
+
+  /* In multi-pass modes, we need a virtual sample array for each component. */
+  jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
+} my_diff_controller;
+
+typedef my_diff_controller *my_diff_ptr;
+
+
+/* Forward declarations */
+METHODDEF(boolean) compress_data(j_compress_ptr cinfo, _JSAMPIMAGE input_buf);
+#ifdef FULL_SAMP_BUFFER_SUPPORTED
+METHODDEF(boolean) compress_first_pass(j_compress_ptr cinfo,
+                                       _JSAMPIMAGE input_buf);
+METHODDEF(boolean) compress_output(j_compress_ptr cinfo,
+                                   _JSAMPIMAGE input_buf);
+#endif
+
+
+LOCAL(void)
+start_iMCU_row(j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    diff->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (diff->iMCU_row_num < (cinfo->total_iMCU_rows-1))
+      diff->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      diff->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  diff->mcu_ctr = 0;
+  diff->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_diff(j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
+
+  /* Because it is hitching a ride on the jpeg_forward_dct struct,
+   * start_pass_lossless() will be called at the start of the initial pass.
+   * This ensures that it will be called at the start of the Huffman
+   * optimization and output passes as well.
+   */
+  if (pass_mode == JBUF_CRANK_DEST)
+    (*cinfo->fdct->start_pass) (cinfo);
+
+  diff->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (diff->whole_image[0] != NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    diff->pub._compress_data = compress_data;
+    break;
+#ifdef FULL_SAMP_BUFFER_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    if (diff->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    diff->pub._compress_data = compress_first_pass;
+    break;
+  case JBUF_CRANK_DEST:
+    if (diff->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    diff->pub._compress_data = compress_output;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+#define SWAP_ROWS(rowa, rowb) { \
+  _JSAMPROW temp = rowa; \
+  rowa = rowb;  rowb = temp; \
+}
+
+/*
+ * Process some data in the single-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor rows for each component in the image.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(boolean)
+compress_data(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
+{
+  my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
+  lossless_comp_ptr losslessc = (lossless_comp_ptr)cinfo->fdct;
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
+  JDIMENSION MCU_count;         /* number of MCUs encoded */
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int ci, compi, yoffset, samp_row, samp_rows, samps_across;
+  jpeg_component_info *compptr;
+
+  /* Loop to write as much as one whole iMCU row */
+  for (yoffset = diff->MCU_vert_offset; yoffset < diff->MCU_rows_per_iMCU_row;
+       yoffset++) {
+
+    MCU_col_num = diff->mcu_ctr;
+
+    /* Scale and predict each scanline of the MCU row separately.
+     *
+     * Note: We only do this if we are at the start of an MCU row, ie,
+     * we don't want to reprocess a row suspended by the output.
+     */
+    if (MCU_col_num == 0) {
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+        compptr = cinfo->cur_comp_info[ci];
+        compi = compptr->component_index;
+        if (diff->iMCU_row_num < last_iMCU_row)
+          samp_rows = compptr->v_samp_factor;
+        else {
+          /* NB: can't use last_row_height here, since may not be set! */
+          samp_rows =
+            (int)(compptr->height_in_blocks % compptr->v_samp_factor);
+          if (samp_rows == 0) samp_rows = compptr->v_samp_factor;
+          else {
+            /* Fill dummy difference rows at the bottom edge with zeros, which
+             * will encode to the smallest amount of data.
+             */
+            for (samp_row = samp_rows; samp_row < compptr->v_samp_factor;
+                 samp_row++)
+              memset(diff->diff_buf[compi][samp_row], 0,
+                     jround_up((long)compptr->width_in_blocks,
+                               (long)compptr->h_samp_factor) * sizeof(JDIFF));
+          }
+        }
+        samps_across = compptr->width_in_blocks;
+
+        for (samp_row = 0; samp_row < samp_rows; samp_row++) {
+          (*losslessc->scaler_scale) (cinfo,
+                                      input_buf[compi][samp_row],
+                                      diff->cur_row[compi],
+                                      samps_across);
+          (*losslessc->predict_difference[compi])
+            (cinfo, compi, diff->cur_row[compi], diff->prev_row[compi],
+             diff->diff_buf[compi][samp_row], samps_across);
+          SWAP_ROWS(diff->cur_row[compi], diff->prev_row[compi]);
+        }
+      }
+    }
+    /* Try to write the MCU row (or remaining portion of suspended MCU row). */
+    MCU_count =
+      (*cinfo->entropy->encode_mcus) (cinfo,
+                                      diff->diff_buf, yoffset, MCU_col_num,
+                                      cinfo->MCUs_per_row - MCU_col_num);
+    if (MCU_count != cinfo->MCUs_per_row - MCU_col_num) {
+      /* Suspension forced; update state counters and exit */
+      diff->MCU_vert_offset = yoffset;
+      diff->mcu_ctr += MCU_col_num;
+      return FALSE;
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    diff->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  diff->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+#ifdef FULL_SAMP_BUFFER_SUPPORTED
+
+/*
+ * Process some data in the first pass of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor rows for each component in the image.
+ * This amount of data is read from the source buffer and saved into the
+ * virtual arrays.
+ *
+ * We must also emit the data to the compressor.  This is conveniently
+ * done by calling compress_output() after we've loaded the current strip
+ * of the virtual arrays.
+ *
+ * NB: input_buf contains a plane for each component in image.  All components
+ * are loaded into the virtual arrays in this pass.  However, it may be that
+ * only a subset of the components are emitted to the compressor during
+ * this first pass; be careful about looking at the scan-dependent variables
+ * (MCU dimensions, etc).
+ */
+
+METHODDEF(boolean)
+compress_first_pass(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
+{
+  my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION samps_across;
+  int ci, samp_row, samp_rows;
+  _JSAMPARRAY buffer;
+  jpeg_component_info *compptr;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Align the virtual buffer for this component. */
+    buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
+      ((j_common_ptr)cinfo, diff->whole_image[ci],
+       diff->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION)compptr->v_samp_factor, TRUE);
+
+    /* Count non-dummy sample rows in this iMCU row. */
+    if (diff->iMCU_row_num < last_iMCU_row)
+      samp_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here, since may not be set! */
+      samp_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
+      if (samp_rows == 0) samp_rows = compptr->v_samp_factor;
+    }
+    samps_across = compptr->width_in_blocks;
+
+    /* Perform point transform scaling and prediction/differencing for all
+     * non-dummy rows in this iMCU row.  Each call on these functions
+     * processes a complete row of samples.
+     */
+    for (samp_row = 0; samp_row < samp_rows; samp_row++) {
+      memcpy(buffer[samp_row], input_buf[ci][samp_row],
+             samps_across * sizeof(_JSAMPLE));
+    }
+  }
+  /* NB: compress_output will increment iMCU_row_num if successful.
+   * A suspension return will result in redoing all the work above next time.
+   */
+
+  /* Emit data to the compressor, sharing code with subsequent passes */
+  return compress_output(cinfo, input_buf);
+}
+
+
+/*
+ * Process some data in subsequent passes of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the compressor.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
+{
+  my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
+  int ci, compi;
+  _JSAMPARRAY buffer[MAX_COMPS_IN_SCAN];
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan.
+   * NB: during first pass, this is safe only because the buffers will
+   * already be aligned properly, so jmemmgr.c won't need to do any I/O.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    compi = compptr->component_index;
+    buffer[compi] = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
+      ((j_common_ptr)cinfo, diff->whole_image[compi],
+       diff->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION)compptr->v_samp_factor, FALSE);
+  }
+
+  return compress_data(cinfo, buffer);
+}
+
+#endif /* FULL_SAMP_BUFFER_SUPPORTED */
+
+
+/*
+ * Initialize difference buffer controller.
+ */
+
+GLOBAL(void)
+_jinit_c_diff_controller(j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_diff_ptr diff;
+  int ci, row;
+  jpeg_component_info *compptr;
+
+  diff = (my_diff_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_diff_controller));
+  cinfo->coef = (struct jpeg_c_coef_controller *)diff;
+  diff->pub.start_pass = start_pass_diff;
+
+  /* Create the prediction row buffers. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    diff->cur_row[ci] = *(_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE,
+       (JDIMENSION)jround_up((long)compptr->width_in_blocks,
+                             (long)compptr->h_samp_factor),
+       (JDIMENSION)1);
+    diff->prev_row[ci] = *(_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE,
+       (JDIMENSION)jround_up((long)compptr->width_in_blocks,
+                             (long)compptr->h_samp_factor),
+       (JDIMENSION)1);
+  }
+
+  /* Create the difference buffer. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    diff->diff_buf[ci] =
+      ALLOC_DARRAY(JPOOL_IMAGE,
+                   (JDIMENSION)jround_up((long)compptr->width_in_blocks,
+                                         (long)compptr->h_samp_factor),
+                   (JDIMENSION)compptr->v_samp_factor);
+    /* Prefill difference rows with zeros.  We do this because only actual
+     * data is placed in the buffers during prediction/differencing, leaving
+     * any dummy differences at the right edge as zeros, which will encode
+     * to the smallest amount of data.
+     */
+    for (row = 0; row < compptr->v_samp_factor; row++)
+      memset(diff->diff_buf[ci][row], 0,
+             jround_up((long)compptr->width_in_blocks,
+                       (long)compptr->h_samp_factor) * sizeof(JDIFF));
+  }
+
+  /* Create the sample buffer. */
+  if (need_full_buffer) {
+#ifdef FULL_SAMP_BUFFER_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor differences in each direction. */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      diff->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
+        ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE,
+         (JDIMENSION)jround_up((long)compptr->width_in_blocks,
+                               (long)compptr->h_samp_factor),
+         (JDIMENSION)jround_up((long)compptr->height_in_blocks,
+                               (long)compptr->v_samp_factor),
+         (JDIMENSION)compptr->v_samp_factor);
+    }
+#else
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+  } else
+    diff->whole_image[0] = NULL; /* flag for no virtual arrays */
+}
+
+#endif /* C_LOSSLESS_SUPPORTED */
diff --git a/3rdparty/libjpeg-turbo/src/jchuff.c b/3rdparty/libjpeg-turbo/src/jchuff.c
index f4dfa1cb54..488c9b5c3a 100644
--- a/3rdparty/libjpeg-turbo/src/jchuff.c
+++ b/3rdparty/libjpeg-turbo/src/jchuff.c
@@ -3,11 +3,14 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2014-2016, 2018-2022, D. R. Commander.
+ * Copyright (C) 2009-2011, 2014-2016, 2018-2024, D. R. Commander.
  * Copyright (C) 2015, Matthieu Darbois.
  * Copyright (C) 2018, Matthias Räncker.
  * Copyright (C) 2020, Arm Limited.
+ * Copyright (C) 2022, Felix Hanau.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -26,44 +29,13 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#ifdef WITH_SIMD
 #include "jsimd.h"
-#include "jconfigint.h"
+#else
+#include "jchuff.h"             /* Declarations shared with jc*huff.c */
+#endif
 #include <limits.h>
-
-/*
- * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
- * used for bit counting rather than the lookup table.  This will reduce the
- * memory footprint by 64k, which is important for some mobile applications
- * that create many isolated instances of libjpeg-turbo (web browsers, for
- * instance.)  This may improve performance on some mobile platforms as well.
- * This feature is enabled by default only on Arm processors, because some x86
- * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
- * shown to have a significant performance impact even on the x86 chips that
- * have a fast implementation of it.  When building for Armv6, you can
- * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
- * flags (this defines __thumb__).
- */
-
-/* NOTE: Both GCC and Clang define __GNUC__ */
-#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
-    defined(_M_ARM) || defined(_M_ARM64)
-#if !defined(__thumb__) || defined(__thumb2__)
-#define USE_CLZ_INTRINSIC
-#endif
-#endif
-
-#ifdef USE_CLZ_INTRINSIC
-#if defined(_MSC_VER) && !defined(__clang__)
-#define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))
-#else
-#define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
-#endif
-#define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
-#else
-#include "jpeg_nbits_table.h"
-#define JPEG_NBITS(x)          (jpeg_nbits_table[x])
-#define JPEG_NBITS_NONZERO(x)  JPEG_NBITS(x)
-#endif
+#include "jpeg_nbits.h"
 
 
 /* Expanded entropy encoder object for Huffman encoding.
@@ -102,7 +74,9 @@ typedef bit_buf_type simd_bit_buf_type;
 typedef struct {
   union {
     bit_buf_type c;
+#ifdef WITH_SIMD
     simd_bit_buf_type simd;
+#endif
   } put_buffer;                         /* current bit accumulation buffer */
   int free_bits;                        /* # of bits available in it */
                                         /* (Neon GAS: # of bits now in it) */
@@ -127,7 +101,9 @@ typedef struct {
   long *ac_count_ptrs[NUM_HUFF_TBLS];
 #endif
 
+#ifdef WITH_SIMD
   int simd;
+#endif
 } huff_entropy_encoder;
 
 typedef huff_entropy_encoder *huff_entropy_ptr;
@@ -141,7 +117,9 @@ typedef struct {
   size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
   savable_state cur;            /* Current bit buffer & DC state */
   j_compress_ptr cinfo;         /* dump_buffer needs access to this */
+#ifdef WITH_SIMD
   int simd;
+#endif
 } working_state;
 
 
@@ -180,7 +158,9 @@ start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)
     entropy->pub.finish_pass = finish_pass_huff;
   }
 
+#ifdef WITH_SIMD
   entropy->simd = jsimd_can_huff_encode_one_block();
+#endif
 
   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
     compptr = cinfo->cur_comp_info[ci];
@@ -220,6 +200,7 @@ start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)
   }
 
   /* Initialize bit buffer to empty */
+#ifdef WITH_SIMD
   if (entropy->simd) {
     entropy->saved.put_buffer.simd = 0;
 #if defined(__aarch64__) && !defined(NEON_INTRINSICS)
@@ -227,7 +208,9 @@ start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)
 #else
     entropy->saved.free_bits = SIMD_BIT_BUF_SIZE;
 #endif
-  } else {
+  } else
+#endif
+  {
     entropy->saved.put_buffer.c = 0;
     entropy->saved.free_bits = BIT_BUF_SIZE;
   }
@@ -242,7 +225,7 @@ start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)
  * Compute the derived values for a Huffman table.
  * This routine also performs some validation checks on the table.
  *
- * Note this is also used by jcphuff.c.
+ * Note this is also used by jcphuff.c and jclhuff.c.
  */
 
 GLOBAL(void)
@@ -318,12 +301,12 @@ jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno,
   memset(dtbl->ehufco, 0, sizeof(dtbl->ehufco));
   memset(dtbl->ehufsi, 0, sizeof(dtbl->ehufsi));
 
-  /* This is also a convenient place to check for out-of-range
-   * and duplicated VAL entries.  We allow 0..255 for AC symbols
-   * but only 0..15 for DC.  (We could constrain them further
-   * based on data depth and mode, but this seems enough.)
+  /* This is also a convenient place to check for out-of-range and duplicated
+   * VAL entries.  We allow 0..255 for AC symbols but only 0..15 for DC in
+   * lossy mode and 0..16 for DC in lossless mode.  (We could constrain them
+   * further based on data depth and mode, but this seems enough.)
    */
-  maxsymbol = isDC ? 15 : 255;
+  maxsymbol = isDC ? (cinfo->master->lossless ? 16 : 15) : 255;
 
   for (p = 0; p < lastp; p++) {
     i = htbl->huffval[p];
@@ -500,6 +483,7 @@ flush_bits(working_state *state)
   simd_bit_buf_type put_buffer;  int put_bits;
   int localbuf = 0;
 
+#ifdef WITH_SIMD
   if (state->simd) {
 #if defined(__aarch64__) && !defined(NEON_INTRINSICS)
     put_bits = state->cur.free_bits;
@@ -507,7 +491,9 @@ flush_bits(working_state *state)
     put_bits = SIMD_BIT_BUF_SIZE - state->cur.free_bits;
 #endif
     put_buffer = state->cur.put_buffer.simd;
-  } else {
+  } else
+#endif
+  {
     put_bits = BIT_BUF_SIZE - state->cur.free_bits;
     put_buffer = state->cur.put_buffer.c;
   }
@@ -525,6 +511,7 @@ flush_bits(working_state *state)
     EMIT_BYTE(temp)
   }
 
+#ifdef WITH_SIMD
   if (state->simd) {                    /* and reset bit buffer to empty */
     state->cur.put_buffer.simd = 0;
 #if defined(__aarch64__) && !defined(NEON_INTRINSICS)
@@ -532,7 +519,9 @@ flush_bits(working_state *state)
 #else
     state->cur.free_bits = SIMD_BIT_BUF_SIZE;
 #endif
-  } else {
+  } else
+#endif
+  {
     state->cur.put_buffer.c = 0;
     state->cur.free_bits = BIT_BUF_SIZE;
   }
@@ -542,6 +531,8 @@ flush_bits(working_state *state)
 }
 
 
+#ifdef WITH_SIMD
+
 /* Encode a single block's worth of coefficients */
 
 LOCAL(boolean)
@@ -561,6 +552,8 @@ encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val,
   return TRUE;
 }
 
+#endif
+
 LOCAL(boolean)
 encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
                  c_derived_tbl *dctbl, c_derived_tbl *actbl)
@@ -569,6 +562,7 @@ encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
   bit_buf_type put_buffer;
   JOCTET _buffer[BUFSIZE], *buffer;
   int localbuf = 0;
+  int max_coef_bits = state->cinfo->data_precision + 2;
 
   free_bits = state->cur.free_bits;
   put_buffer = state->cur.put_buffer.c;
@@ -589,6 +583,11 @@ encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
 
   /* Find the number of bits needed for the magnitude of the coefficient */
   nbits = JPEG_NBITS(nbits);
+  /* Check for out-of-range coefficient values.
+   * Since we're encoding a difference, the range limit is twice as much.
+   */
+  if (nbits > max_coef_bits + 1)
+    ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
 
   /* Emit the Huffman-coded symbol for the number of bits.
    * Emit that number of bits of the value, if positive,
@@ -614,6 +613,9 @@ encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
     temp += nbits; \
     nbits ^= temp; \
     nbits = JPEG_NBITS_NONZERO(nbits); \
+    /* Check for out-of-range coefficient values */ \
+    if (nbits > max_coef_bits) \
+      ERREXIT(state->cinfo, JERR_BAD_DCT_COEF); \
     /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
     while (r >= 16 * 16) { \
       r -= 16 * 16; \
@@ -695,7 +697,9 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
   state.free_in_buffer = cinfo->dest->free_in_buffer;
   state.cur = entropy->saved;
   state.cinfo = cinfo;
+#ifdef WITH_SIMD
   state.simd = entropy->simd;
+#endif
 
   /* Emit restart marker if needed */
   if (cinfo->restart_interval) {
@@ -705,6 +709,7 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
   }
 
   /* Encode the MCU data blocks */
+#ifdef WITH_SIMD
   if (entropy->simd) {
     for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
       ci = cinfo->MCU_membership[blkn];
@@ -717,7 +722,9 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
       /* Update last_dc_val */
       state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
     }
-  } else {
+  } else
+#endif
+  {
     for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
       ci = cinfo->MCU_membership[blkn];
       compptr = cinfo->cur_comp_info[ci];
@@ -765,7 +772,9 @@ finish_pass_huff(j_compress_ptr cinfo)
   state.free_in_buffer = cinfo->dest->free_in_buffer;
   state.cur = entropy->saved;
   state.cinfo = cinfo;
+#ifdef WITH_SIMD
   state.simd = entropy->simd;
+#endif
 
   /* Flush out the last data */
   if (!flush_bits(&state))
@@ -801,6 +810,7 @@ htest_one_block(j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
   register int temp;
   register int nbits;
   register int k, r;
+  int max_coef_bits = cinfo->data_precision + 2;
 
   /* Encode the DC coefficient difference per section F.1.2.1 */
 
@@ -817,7 +827,7 @@ htest_one_block(j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
   /* Check for out-of-range coefficient values.
    * Since we're encoding a difference, the range limit is twice as much.
    */
-  if (nbits > MAX_COEF_BITS + 1)
+  if (nbits > max_coef_bits + 1)
     ERREXIT(cinfo, JERR_BAD_DCT_COEF);
 
   /* Count the Huffman symbol for the number of bits */
@@ -846,7 +856,7 @@ htest_one_block(j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
       while ((temp >>= 1))
         nbits++;
       /* Check for out-of-range coefficient values */
-      if (nbits > MAX_COEF_BITS)
+      if (nbits > max_coef_bits)
         ERREXIT(cinfo, JERR_BAD_DCT_COEF);
 
       /* Count Huffman symbol for run length / number of bits */
@@ -901,7 +911,7 @@ encode_mcu_gather(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 
 /*
  * Generate the best Huffman code table for the given counts, fill htbl.
- * Note this is also used by jcphuff.c.
+ * Note this is also used by jcphuff.c and jclhuff.c.
  *
  * The JPEG standard requires that no symbol be assigned a codeword of all
  * one bits (so that padding bits added at the end of a compressed segment
@@ -933,11 +943,15 @@ jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])
 {
 #define MAX_CLEN  32            /* assumed maximum initial code length */
   UINT8 bits[MAX_CLEN + 1];     /* bits[k] = # of symbols with code length k */
+  int bit_pos[MAX_CLEN + 1];    /* # of symbols with smaller code length */
   int codesize[257];            /* codesize[k] = code length of symbol k */
+  int nz_index[257];            /* index of nonzero symbol in the original freq
+                                   array */
   int others[257];              /* next symbol in current branch of tree */
   int c1, c2;
   int p, i, j;
-  long v;
+  int num_nz_symbols;
+  long v, v2;
 
   /* This algorithm is explained in section K.2 of the JPEG standard */
 
@@ -952,28 +966,41 @@ jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])
    * will be placed last in the largest codeword category.
    */
 
+  /* Group nonzero frequencies together so we can more easily find the
+   * smallest.
+   */
+  num_nz_symbols = 0;
+  for (i = 0; i < 257; i++) {
+    if (freq[i]) {
+      nz_index[num_nz_symbols] = i;
+      freq[num_nz_symbols] = freq[i];
+      num_nz_symbols++;
+    }
+  }
+
   /* Huffman's basic algorithm to assign optimal code lengths to symbols */
 
   for (;;) {
-    /* Find the smallest nonzero frequency, set c1 = its symbol */
-    /* In case of ties, take the larger symbol number */
+    /* Find the two smallest nonzero frequencies; set c1, c2 = their symbols */
+    /* In case of ties, take the larger symbol number.  Since we have grouped
+     * the nonzero symbols together, checking for zero symbols is not
+     * necessary.
+     */
     c1 = -1;
-    v = 1000000000L;
-    for (i = 0; i <= 256; i++) {
-      if (freq[i] && freq[i] <= v) {
-        v = freq[i];
-        c1 = i;
-      }
-    }
-
-    /* Find the next smallest nonzero frequency, set c2 = its symbol */
-    /* In case of ties, take the larger symbol number */
     c2 = -1;
     v = 1000000000L;
-    for (i = 0; i <= 256; i++) {
-      if (freq[i] && freq[i] <= v && i != c1) {
-        v = freq[i];
-        c2 = i;
+    v2 = 1000000000L;
+    for (i = 0; i < num_nz_symbols; i++) {
+      if (freq[i] <= v2) {
+        if (freq[i] <= v) {
+          c2 = c1;
+          v2 = v;
+          v = freq[i];
+          c1 = i;
+        } else {
+          v2 = freq[i];
+          c2 = i;
+        }
       }
     }
 
@@ -983,7 +1010,10 @@ jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])
 
     /* Else merge the two counts/trees */
     freq[c1] += freq[c2];
-    freq[c2] = 0;
+    /* Set the frequency to a very high value instead of zero, so we don't have
+     * to check for zero values.
+     */
+    freq[c2] = 1000000001L;
 
     /* Increment the codesize of everything in c1's tree branch */
     codesize[c1]++;
@@ -1003,15 +1033,24 @@ jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])
   }
 
   /* Now count the number of symbols of each code length */
-  for (i = 0; i <= 256; i++) {
-    if (codesize[i]) {
-      /* The JPEG standard seems to think that this can't happen, */
-      /* but I'm paranoid... */
-      if (codesize[i] > MAX_CLEN)
-        ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
+  for (i = 0; i < num_nz_symbols; i++) {
+    /* The JPEG standard seems to think that this can't happen, */
+    /* but I'm paranoid... */
+    if (codesize[i] > MAX_CLEN)
+      ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
 
-      bits[codesize[i]]++;
-    }
+    bits[codesize[i]]++;
+  }
+
+  /* Count the number of symbols with a length smaller than i bits, so we can
+   * construct the symbol table more efficiently.  Note that this includes the
+   * pseudo-symbol 256, but since it is the last symbol, it will not affect the
+   * table.
+   */
+  p = 0;
+  for (i = 1; i <= MAX_CLEN; i++) {
+    bit_pos[i] = p;
+    p += bits[i];
   }
 
   /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
@@ -1051,14 +1090,9 @@ jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])
    * changes made above, but Rec. ITU-T T.81 | ISO/IEC 10918-1 seems to think
    * this works.
    */
-  p = 0;
-  for (i = 1; i <= MAX_CLEN; i++) {
-    for (j = 0; j <= 255; j++) {
-      if (codesize[j] == i) {
-        htbl->huffval[p] = (UINT8)j;
-        p++;
-      }
-    }
+  for (i = 0; i < num_nz_symbols - 1; i++) {
+    htbl->huffval[bit_pos[codesize[i]]] = (UINT8)nz_index[i];
+    bit_pos[codesize[i]]++;
   }
 
   /* Set sent_table FALSE so updated table will be written to JPEG file. */
diff --git a/3rdparty/libjpeg-turbo/src/jchuff.h b/3rdparty/libjpeg-turbo/src/jchuff.h
index 314a2325c9..21f17b89b0 100644
--- a/3rdparty/libjpeg-turbo/src/jchuff.h
+++ b/3rdparty/libjpeg-turbo/src/jchuff.h
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -19,11 +19,13 @@
  * Hence the magnitude should always fit in 10 or 14 bits respectively.
  */
 
-#if BITS_IN_JSAMPLE == 8
-#define MAX_COEF_BITS  10
-#else
-#define MAX_COEF_BITS  14
-#endif
+/* The progressive Huffman encoder uses an unsigned 16-bit data type to store
+ * absolute values of coefficients, because it is possible to inject a
+ * coefficient value of -32768 into the encoder by attempting to transform a
+ * malformed 12-bit JPEG image, and the absolute value of -32768 would overflow
+ * a signed 16-bit integer.
+ */
+typedef unsigned short UJCOEF;
 
 /* Derived data constructed for each Huffman table */
 
diff --git a/3rdparty/libjpeg-turbo/src/jcinit.c b/3rdparty/libjpeg-turbo/src/jcinit.c
index 157353a22e..fe8a13a8d9 100644
--- a/3rdparty/libjpeg-turbo/src/jcinit.c
+++ b/3rdparty/libjpeg-turbo/src/jcinit.c
@@ -3,8 +3,10 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2020, D. R. Commander.
+ * Copyright (C) 2020, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -21,7 +23,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jpegcomp.h"
+#include "jpegapicomp.h"
 
 
 /*
@@ -38,34 +40,101 @@ jinit_compress_master(j_compress_ptr cinfo)
 
   /* Preprocessing */
   if (!cinfo->raw_data_in) {
-    jinit_color_converter(cinfo);
-    jinit_downsampler(cinfo);
-    jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
-  }
-  /* Forward DCT */
-  jinit_forward_dct(cinfo);
-  /* Entropy encoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code) {
-#ifdef C_ARITH_CODING_SUPPORTED
-    jinit_arith_encoder(cinfo);
+    if (cinfo->data_precision == 16) {
+#ifdef C_LOSSLESS_SUPPORTED
+      j16init_color_converter(cinfo);
+      j16init_downsampler(cinfo);
+      j16init_c_prep_controller(cinfo,
+                                FALSE /* never need full buffer here */);
 #else
-    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
 #endif
-  } else {
-    if (cinfo->progressive_mode) {
-#ifdef C_PROGRESSIVE_SUPPORTED
-      jinit_phuff_encoder(cinfo);
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-    } else
-      jinit_huff_encoder(cinfo);
+    } else if (cinfo->data_precision == 12) {
+      j12init_color_converter(cinfo);
+      j12init_downsampler(cinfo);
+      j12init_c_prep_controller(cinfo,
+                                FALSE /* never need full buffer here */);
+    } else {
+      jinit_color_converter(cinfo);
+      jinit_downsampler(cinfo);
+      jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
+    }
   }
 
-  /* Need a full-image coefficient buffer in any multi-pass mode. */
-  jinit_c_coef_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
-                                           cinfo->optimize_coding));
-  jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
+  if (cinfo->master->lossless) {
+#ifdef C_LOSSLESS_SUPPORTED
+    /* Prediction, sample differencing, and point transform */
+    if (cinfo->data_precision == 16)
+      j16init_lossless_compressor(cinfo);
+    else if (cinfo->data_precision == 12)
+      j12init_lossless_compressor(cinfo);
+    else
+      jinit_lossless_compressor(cinfo);
+    /* Entropy encoding: either Huffman or arithmetic coding. */
+    if (cinfo->arith_code) {
+      ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+    } else {
+      jinit_lhuff_encoder(cinfo);
+    }
+
+    /* Need a full-image difference buffer in any multi-pass mode. */
+    if (cinfo->data_precision == 16)
+      j16init_c_diff_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
+                                                 cinfo->optimize_coding));
+    else if (cinfo->data_precision == 12)
+      j12init_c_diff_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
+                                                 cinfo->optimize_coding));
+    else
+      jinit_c_diff_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
+                                               cinfo->optimize_coding));
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    if (cinfo->data_precision == 16)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+    /* Forward DCT */
+    if (cinfo->data_precision == 12)
+      j12init_forward_dct(cinfo);
+    else
+      jinit_forward_dct(cinfo);
+    /* Entropy encoding: either Huffman or arithmetic coding. */
+    if (cinfo->arith_code) {
+#ifdef C_ARITH_CODING_SUPPORTED
+      jinit_arith_encoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+#endif
+    } else {
+      if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+        jinit_phuff_encoder(cinfo);
+#else
+        ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+      } else
+        jinit_huff_encoder(cinfo);
+    }
+
+    /* Need a full-image coefficient buffer in any multi-pass mode. */
+    if (cinfo->data_precision == 12)
+      j12init_c_coef_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
+                                                 cinfo->optimize_coding));
+    else
+      jinit_c_coef_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
+                                               cinfo->optimize_coding));
+  }
+
+  if (cinfo->data_precision == 16)
+#ifdef C_LOSSLESS_SUPPORTED
+    j16init_c_main_controller(cinfo, FALSE /* never need full buffer here */);
+#else
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+  else if (cinfo->data_precision == 12)
+    j12init_c_main_controller(cinfo, FALSE /* never need full buffer here */);
+  else
+    jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
 
   jinit_marker_writer(cinfo);
 
diff --git a/3rdparty/libjpeg-turbo/src/jclhuff.c b/3rdparty/libjpeg-turbo/src/jclhuff.c
new file mode 100644
index 0000000000..ae4154532e
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jclhuff.c
@@ -0,0 +1,587 @@
+/*
+ * jclhuff.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains Huffman entropy encoding routines for lossless JPEG.
+ *
+ * Much of the complexity here has to do with supporting output suspension.
+ * If the data destination module demands suspension, we want to be able to
+ * back up to the start of the current MCU.  To do this, we copy state
+ * variables into local working storage, and update them back to the
+ * permanent JPEG objects only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jlossls.h"            /* Private declarations for lossless codec */
+#include "jchuff.h"             /* Declarations shared with jc*huff.c */
+
+
+#ifdef C_LOSSLESS_SUPPORTED
+
+/* The legal range of a spatial difference is
+ * -32767 .. +32768.
+ * Hence the magnitude should always fit in 16 bits.
+ */
+
+#define MAX_DIFF_BITS  16
+
+
+/* Expanded entropy encoder object for Huffman encoding in lossless mode.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  size_t put_buffer;            /* current bit-accumulation buffer */
+  int put_bits;                 /* # of bits now in it */
+} savable_state;
+
+
+typedef struct {
+  int ci, yoffset, MCU_width;
+} lhe_input_ptr_info;
+
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  savable_state saved;          /* Bit buffer at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+  int next_restart_num;         /* next restart number to write (0-7) */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  c_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
+
+  /* Pointers to derived tables to be used for each data unit within an MCU */
+  c_derived_tbl *cur_tbls[C_MAX_BLOCKS_IN_MCU];
+
+#ifdef ENTROPY_OPT_SUPPORTED    /* Statistics tables for optimization */
+  long *count_ptrs[NUM_HUFF_TBLS];
+
+  /* Pointers to stats tables to be used for each data unit within an MCU */
+  long *cur_counts[C_MAX_BLOCKS_IN_MCU];
+#endif
+
+  /* Pointers to the proper input difference row for each group of data units
+   * within an MCU.  For each component, there are Vi groups of Hi data units.
+   */
+  JDIFFROW input_ptr[C_MAX_BLOCKS_IN_MCU];
+
+  /* Number of input pointers in use for the current MCU.  This is the sum
+   * of all Vi in the MCU.
+   */
+  int num_input_ptrs;
+
+  /* Information used for positioning the input pointers within the input
+   * difference rows.
+   */
+  lhe_input_ptr_info input_ptr_info[C_MAX_BLOCKS_IN_MCU];
+
+  /* Index of the proper input pointer for each data unit within an MCU */
+  int input_ptr_index[C_MAX_BLOCKS_IN_MCU];
+
+} lhuff_entropy_encoder;
+
+typedef lhuff_entropy_encoder *lhuff_entropy_ptr;
+
+/* Working state while writing an MCU.
+ * This struct contains all the fields that are needed by subroutines.
+ */
+
+typedef struct {
+  JOCTET *next_output_byte;     /* => next byte to write in buffer */
+  size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
+  savable_state cur;            /* Current bit buffer & DC state */
+  j_compress_ptr cinfo;         /* dump_buffer needs access to this */
+} working_state;
+
+
+/* Forward declarations */
+METHODDEF(JDIMENSION) encode_mcus_huff(j_compress_ptr cinfo,
+                                       JDIFFIMAGE diff_buf,
+                                       JDIMENSION MCU_row_num,
+                                       JDIMENSION MCU_col_num,
+                                       JDIMENSION nMCU);
+METHODDEF(void) finish_pass_huff(j_compress_ptr cinfo);
+#ifdef ENTROPY_OPT_SUPPORTED
+METHODDEF(JDIMENSION) encode_mcus_gather(j_compress_ptr cinfo,
+                                         JDIFFIMAGE diff_buf,
+                                         JDIMENSION MCU_row_num,
+                                         JDIMENSION MCU_col_num,
+                                         JDIMENSION nMCU);
+METHODDEF(void) finish_pass_gather(j_compress_ptr cinfo);
+#endif
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ * If gather_statistics is TRUE, we do not output anything during the scan,
+ * just count the Huffman symbols used and generate Huffman code tables.
+ */
+
+METHODDEF(void)
+start_pass_lhuff(j_compress_ptr cinfo, boolean gather_statistics)
+{
+  lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
+  int ci, dctbl, sampn, ptrn, yoffset, xoffset;
+  jpeg_component_info *compptr;
+
+  if (gather_statistics) {
+#ifdef ENTROPY_OPT_SUPPORTED
+    entropy->pub.encode_mcus = encode_mcus_gather;
+    entropy->pub.finish_pass = finish_pass_gather;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    entropy->pub.encode_mcus = encode_mcus_huff;
+    entropy->pub.finish_pass = finish_pass_huff;
+  }
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    if (gather_statistics) {
+#ifdef ENTROPY_OPT_SUPPORTED
+      /* Check for invalid table indexes */
+      /* (make_c_derived_tbl does this in the other path) */
+      if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
+      /* Allocate and zero the statistics tables */
+      /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
+      if (entropy->count_ptrs[dctbl] == NULL)
+        entropy->count_ptrs[dctbl] = (long *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                      257 * sizeof(long));
+      memset(entropy->count_ptrs[dctbl], 0, 257 * sizeof(long));
+#endif
+    } else {
+      /* Compute derived values for Huffman tables */
+      /* We may do this more than once for a table, but it's not expensive */
+      jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,
+                              &entropy->derived_tbls[dctbl]);
+    }
+  }
+
+  /* Precalculate encoding info for each sample in an MCU of this scan */
+  for (sampn = 0, ptrn = 0; sampn < cinfo->blocks_in_MCU;) {
+    compptr = cinfo->cur_comp_info[cinfo->MCU_membership[sampn]];
+    ci = compptr->component_index;
+    for (yoffset = 0; yoffset < compptr->MCU_height; yoffset++, ptrn++) {
+      /* Precalculate the setup info for each input pointer */
+      entropy->input_ptr_info[ptrn].ci = ci;
+      entropy->input_ptr_info[ptrn].yoffset = yoffset;
+      entropy->input_ptr_info[ptrn].MCU_width = compptr->MCU_width;
+      for (xoffset = 0; xoffset < compptr->MCU_width; xoffset++, sampn++) {
+        /* Precalculate the input pointer index for each sample */
+        entropy->input_ptr_index[sampn] = ptrn;
+        /* Precalculate which tables to use for each sample */
+        entropy->cur_tbls[sampn] = entropy->derived_tbls[compptr->dc_tbl_no];
+        entropy->cur_counts[sampn] = entropy->count_ptrs[compptr->dc_tbl_no];
+      }
+    }
+  }
+  entropy->num_input_ptrs = ptrn;
+
+  /* Initialize bit buffer to empty */
+  entropy->saved.put_buffer = 0;
+  entropy->saved.put_bits = 0;
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/* Outputting bytes to the file */
+
+/* Emit a byte, taking 'action' if must suspend. */
+#define emit_byte(state, val, action) { \
+  *(state)->next_output_byte++ = (JOCTET)(val); \
+  if (--(state)->free_in_buffer == 0) \
+    if (!dump_buffer(state)) \
+      { action; } \
+}
+
+
+LOCAL(boolean)
+dump_buffer(working_state *state)
+/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */
+{
+  struct jpeg_destination_mgr *dest = state->cinfo->dest;
+
+  if (!(*dest->empty_output_buffer) (state->cinfo))
+    return FALSE;
+  /* After a successful buffer dump, must reset buffer pointers */
+  state->next_output_byte = dest->next_output_byte;
+  state->free_in_buffer = dest->free_in_buffer;
+  return TRUE;
+}
+
+
+/* Outputting bits to the file */
+
+/* Only the right 24 bits of put_buffer are used; the valid bits are
+ * left-justified in this part.  At most 16 bits can be passed to emit_bits
+ * in one call, and we never retain more than 7 bits in put_buffer
+ * between calls, so 24 bits are sufficient.
+ */
+
+INLINE
+LOCAL(boolean)
+emit_bits(working_state *state, unsigned int code, int size)
+/* Emit some bits; return TRUE if successful, FALSE if must suspend */
+{
+  /* This routine is heavily used, so it's worth coding tightly. */
+  register size_t put_buffer = (size_t)code;
+  register int put_bits = state->cur.put_bits;
+
+  /* if size is 0, caller used an invalid Huffman table entry */
+  if (size == 0)
+    ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
+
+  put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */
+
+  put_bits += size;             /* new number of bits in buffer */
+
+  put_buffer <<= 24 - put_bits; /* align incoming bits */
+
+  put_buffer |= state->cur.put_buffer; /* and merge with old buffer contents */
+
+  while (put_bits >= 8) {
+    int c = (int)((put_buffer >> 16) & 0xFF);
+
+    emit_byte(state, c, return FALSE);
+    if (c == 0xFF) {            /* need to stuff a zero byte? */
+      emit_byte(state, 0, return FALSE);
+    }
+    put_buffer <<= 8;
+    put_bits -= 8;
+  }
+
+  state->cur.put_buffer = put_buffer; /* update state variables */
+  state->cur.put_bits = put_bits;
+
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+flush_bits(working_state *state)
+{
+  if (!emit_bits(state, 0x7F, 7)) /* fill any partial byte with ones */
+    return FALSE;
+  state->cur.put_buffer = 0;    /* and reset bit-buffer to empty */
+  state->cur.put_bits = 0;
+  return TRUE;
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(boolean)
+emit_restart(working_state *state, int restart_num)
+{
+  if (!flush_bits(state))
+    return FALSE;
+
+  emit_byte(state, 0xFF, return FALSE);
+  emit_byte(state, JPEG_RST0 + restart_num, return FALSE);
+
+  /* The restart counter is not updated until we successfully write the MCU. */
+
+  return TRUE;
+}
+
+
+/*
+ * Encode and output nMCU MCUs' worth of Huffman-compressed differences.
+ */
+
+METHODDEF(JDIMENSION)
+encode_mcus_huff(j_compress_ptr cinfo, JDIFFIMAGE diff_buf,
+                 JDIMENSION MCU_row_num, JDIMENSION MCU_col_num,
+                 JDIMENSION nMCU)
+{
+  lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
+  working_state state;
+  int sampn, ci, yoffset, MCU_width, ptrn;
+  JDIMENSION mcu_num;
+
+  /* Load up working state */
+  state.next_output_byte = cinfo->dest->next_output_byte;
+  state.free_in_buffer = cinfo->dest->free_in_buffer;
+  state.cur = entropy->saved;
+  state.cinfo = cinfo;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (!emit_restart(&state, entropy->next_restart_num))
+        return 0;
+  }
+
+  /* Set input pointer locations based on MCU_col_num */
+  for (ptrn = 0; ptrn < entropy->num_input_ptrs; ptrn++) {
+    ci = entropy->input_ptr_info[ptrn].ci;
+    yoffset = entropy->input_ptr_info[ptrn].yoffset;
+    MCU_width = entropy->input_ptr_info[ptrn].MCU_width;
+    entropy->input_ptr[ptrn] =
+      diff_buf[ci][MCU_row_num + yoffset] + (MCU_col_num * MCU_width);
+  }
+
+  for (mcu_num = 0; mcu_num < nMCU; mcu_num++) {
+
+    /* Inner loop handles the samples in the MCU */
+    for (sampn = 0; sampn < cinfo->blocks_in_MCU; sampn++) {
+      register int temp, temp2;
+      register int nbits;
+      c_derived_tbl *dctbl = entropy->cur_tbls[sampn];
+
+      /* Encode the difference per section H.1.2.2 */
+
+      /* Input the sample difference */
+      temp = *entropy->input_ptr[entropy->input_ptr_index[sampn]]++;
+
+      if (temp & 0x8000) {      /* instead of temp < 0 */
+        temp = (-temp) & 0x7FFF; /* absolute value, mod 2^16 */
+        if (temp == 0)          /* special case: magnitude = 32768 */
+          temp2 = temp = 0x8000;
+        temp2 = ~temp;          /* one's complement of magnitude */
+      } else {
+        temp &= 0x7FFF;         /* abs value mod 2^16 */
+        temp2 = temp;           /* magnitude */
+      }
+
+      /* Find the number of bits needed for the magnitude of the difference */
+      nbits = 0;
+      while (temp) {
+        nbits++;
+        temp >>= 1;
+      }
+      /* Check for out-of-range difference values.
+       */
+      if (nbits > MAX_DIFF_BITS)
+        ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+      /* Emit the Huffman-coded symbol for the number of bits */
+      if (!emit_bits(&state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
+        return mcu_num;
+
+      /* Emit that number of bits of the value, if positive, */
+      /* or the complement of its magnitude, if negative. */
+      if (nbits &&              /* emit_bits rejects calls with size 0 */
+          nbits != 16)          /* special case: no bits should be emitted */
+        if (!emit_bits(&state, (unsigned int)temp2, nbits))
+          return mcu_num;
+    }
+
+    /* Completed MCU, so update state */
+    cinfo->dest->next_output_byte = state.next_output_byte;
+    cinfo->dest->free_in_buffer = state.free_in_buffer;
+    entropy->saved = state.cur;
+
+    /* Update restart-interval state too */
+    if (cinfo->restart_interval) {
+      if (entropy->restarts_to_go == 0) {
+        entropy->restarts_to_go = cinfo->restart_interval;
+        entropy->next_restart_num++;
+        entropy->next_restart_num &= 7;
+      }
+      entropy->restarts_to_go--;
+    }
+
+  }
+
+  return nMCU;
+}
+
+
+/*
+ * Finish up at the end of a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass_huff(j_compress_ptr cinfo)
+{
+  lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
+  working_state state;
+
+  /* Load up working state ... flush_bits needs it */
+  state.next_output_byte = cinfo->dest->next_output_byte;
+  state.free_in_buffer = cinfo->dest->free_in_buffer;
+  state.cur = entropy->saved;
+  state.cinfo = cinfo;
+
+  /* Flush out the last data */
+  if (!flush_bits(&state))
+    ERREXIT(cinfo, JERR_CANT_SUSPEND);
+
+  /* Update state */
+  cinfo->dest->next_output_byte = state.next_output_byte;
+  cinfo->dest->free_in_buffer = state.free_in_buffer;
+  entropy->saved = state.cur;
+}
+
+
+/*
+ * Huffman coding optimization.
+ *
+ * We first scan the supplied data and count the number of uses of each symbol
+ * that is to be Huffman-coded. (This process MUST agree with the code above.)
+ * Then we build a Huffman coding tree for the observed counts.
+ * Symbols which are not needed at all for the particular image are not
+ * assigned any code, which saves space in the DHT marker as well as in
+ * the compressed data.
+ */
+
+#ifdef ENTROPY_OPT_SUPPORTED
+
+/*
+ * Trial-encode nMCU MCUs' worth of Huffman-compressed differences.
+ * No data is actually output, so no suspension return is possible.
+ */
+
+METHODDEF(JDIMENSION)
+encode_mcus_gather(j_compress_ptr cinfo, JDIFFIMAGE diff_buf,
+                   JDIMENSION MCU_row_num, JDIMENSION MCU_col_num,
+                   JDIMENSION nMCU)
+{
+  lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
+  int sampn, ci, yoffset, MCU_width, ptrn;
+  JDIMENSION mcu_num;
+
+  /* Take care of restart intervals if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      /* Update restart state */
+      entropy->restarts_to_go = cinfo->restart_interval;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  /* Set input pointer locations based on MCU_col_num */
+  for (ptrn = 0; ptrn < entropy->num_input_ptrs; ptrn++) {
+    ci = entropy->input_ptr_info[ptrn].ci;
+    yoffset = entropy->input_ptr_info[ptrn].yoffset;
+    MCU_width = entropy->input_ptr_info[ptrn].MCU_width;
+    entropy->input_ptr[ptrn] =
+      diff_buf[ci][MCU_row_num + yoffset] + (MCU_col_num * MCU_width);
+  }
+
+  for (mcu_num = 0; mcu_num < nMCU; mcu_num++) {
+
+    /* Inner loop handles the samples in the MCU */
+    for (sampn = 0; sampn < cinfo->blocks_in_MCU; sampn++) {
+      register int temp;
+      register int nbits;
+      long *counts = entropy->cur_counts[sampn];
+
+      /* Encode the difference per section H.1.2.2 */
+
+      /* Input the sample difference */
+      temp = *entropy->input_ptr[entropy->input_ptr_index[sampn]]++;
+
+      if (temp & 0x8000) {      /* instead of temp < 0 */
+        temp = (-temp) & 0x7FFF; /* absolute value, mod 2^16 */
+        if (temp == 0)          /* special case: magnitude = 32768 */
+          temp = 0x8000;
+      } else
+        temp &= 0x7FFF;         /* abs value mod 2^16 */
+
+      /* Find the number of bits needed for the magnitude of the difference */
+      nbits = 0;
+      while (temp) {
+        nbits++;
+        temp >>= 1;
+      }
+      /* Check for out-of-range difference values.
+       */
+      if (nbits > MAX_DIFF_BITS)
+        ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+      /* Count the Huffman symbol for the number of bits */
+      counts[nbits]++;
+    }
+  }
+
+  return nMCU;
+}
+
+
+/*
+ * Finish up a statistics-gathering pass and create the new Huffman tables.
+ */
+
+METHODDEF(void)
+finish_pass_gather(j_compress_ptr cinfo)
+{
+  lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
+  int ci, dctbl;
+  jpeg_component_info *compptr;
+  JHUFF_TBL **htblptr;
+  boolean did_dc[NUM_HUFF_TBLS];
+
+  /* It's important not to apply jpeg_gen_optimal_table more than once
+   * per table, because it clobbers the input frequency counts!
+   */
+  memset(did_dc, 0, sizeof(did_dc));
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    if (!did_dc[dctbl]) {
+      htblptr = &cinfo->dc_huff_tbl_ptrs[dctbl];
+      if (*htblptr == NULL)
+        *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
+      jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[dctbl]);
+      did_dc[dctbl] = TRUE;
+    }
+  }
+}
+
+
+#endif /* ENTROPY_OPT_SUPPORTED */
+
+
+/*
+ * Module initialization routine for Huffman entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_lhuff_encoder(j_compress_ptr cinfo)
+{
+  lhuff_entropy_ptr entropy;
+  int i;
+
+  entropy = (lhuff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(lhuff_entropy_encoder));
+  cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
+  entropy->pub.start_pass = start_pass_lhuff;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->derived_tbls[i] = NULL;
+#ifdef ENTROPY_OPT_SUPPORTED
+    entropy->count_ptrs[i] = NULL;
+#endif
+  }
+}
+
+#endif /* C_LOSSLESS_SUPPORTED */
diff --git a/3rdparty/libjpeg-turbo/src/jclossls.c b/3rdparty/libjpeg-turbo/src/jclossls.c
new file mode 100644
index 0000000000..e9ba92a7df
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jclossls.c
@@ -0,0 +1,319 @@
+/*
+ * jclossls.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1998, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains prediction, sample differencing, and point transform
+ * routines for the lossless JPEG compressor.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jlossls.h"
+
+#ifdef C_LOSSLESS_SUPPORTED
+
+
+/************************** Sample differencing **************************/
+
+/*
+ * In order to avoid a performance penalty for checking which predictor is
+ * being used and which row is being processed for each call of the
+ * undifferencer, and to promote optimization, we have separate differencing
+ * functions for each predictor selection value.
+ *
+ * We are able to avoid duplicating source code by implementing the predictors
+ * and differencers as macros.  Each of the differencing functions is simply a
+ * wrapper around a DIFFERENCE macro with the appropriate PREDICTOR macro
+ * passed as an argument.
+ */
+
+/* Forward declarations */
+LOCAL(void) reset_predictor(j_compress_ptr cinfo, int ci);
+
+
+/* Predictor for the first column of the first row: 2^(P-Pt-1) */
+#define INITIAL_PREDICTORx  (1 << (cinfo->data_precision - cinfo->Al - 1))
+
+/* Predictor for the first column of the remaining rows: Rb */
+#define INITIAL_PREDICTOR2  prev_row[0]
+
+
+/*
+ * 1-Dimensional differencer routine.
+ *
+ * This macro implements the 1-D horizontal predictor (1).  INITIAL_PREDICTOR
+ * is used as the special case predictor for the first column, which must be
+ * either INITIAL_PREDICTOR2 or INITIAL_PREDICTORx.  The remaining samples
+ * use PREDICTOR1.
+ */
+
+#define DIFFERENCE_1D(INITIAL_PREDICTOR) \
+  lossless_comp_ptr losslessc = (lossless_comp_ptr)cinfo->fdct; \
+  boolean restart = FALSE; \
+  int samp, Ra; \
+  \
+  samp = *input_buf++; \
+  *diff_buf++ = samp - INITIAL_PREDICTOR; \
+  \
+  while (--width) { \
+    Ra = samp; \
+    samp = *input_buf++; \
+    *diff_buf++ = samp - PREDICTOR1; \
+  } \
+  \
+  /* Account for restart interval (no-op if not using restarts) */ \
+  if (cinfo->restart_interval) { \
+    if (--(losslessc->restart_rows_to_go[ci]) == 0) { \
+      reset_predictor(cinfo, ci); \
+      restart = TRUE; \
+    } \
+  }
+
+
+/*
+ * 2-Dimensional differencer routine.
+ *
+ * This macro implements the 2-D horizontal predictors (#2-7).  PREDICTOR2 is
+ * used as the special case predictor for the first column.  The remaining
+ * samples use PREDICTOR, which is a function of Ra, Rb, and Rc.
+ *
+ * Because prev_row and output_buf may point to the same storage area (in an
+ * interleaved image with Vi=1, for example), we must take care to buffer Rb/Rc
+ * before writing the current reconstructed sample value into output_buf.
+ */
+
+#define DIFFERENCE_2D(PREDICTOR) \
+  lossless_comp_ptr losslessc = (lossless_comp_ptr)cinfo->fdct; \
+  int samp, Ra, Rb, Rc; \
+  \
+  Rb = *prev_row++; \
+  samp = *input_buf++; \
+  *diff_buf++ = samp - PREDICTOR2; \
+  \
+  while (--width) { \
+    Rc = Rb; \
+    Rb = *prev_row++; \
+    Ra = samp; \
+    samp = *input_buf++; \
+    *diff_buf++ = samp - PREDICTOR; \
+  } \
+  \
+  /* Account for restart interval (no-op if not using restarts) */ \
+  if (cinfo->restart_interval) { \
+    if (--losslessc->restart_rows_to_go[ci] == 0) \
+      reset_predictor(cinfo, ci); \
+  }
+
+
+/*
+ * Differencers for the second and subsequent rows in a scan or restart
+ * interval.  The first sample in the row is differenced using the vertical
+ * predictor (2).  The rest of the samples are differenced using the predictor
+ * specified in the scan header.
+ */
+
+METHODDEF(void)
+jpeg_difference1(j_compress_ptr cinfo, int ci,
+                 _JSAMPROW input_buf, _JSAMPROW prev_row,
+                 JDIFFROW diff_buf, JDIMENSION width)
+{
+  DIFFERENCE_1D(INITIAL_PREDICTOR2);
+  (void)(restart);
+}
+
+METHODDEF(void)
+jpeg_difference2(j_compress_ptr cinfo, int ci,
+                 _JSAMPROW input_buf, _JSAMPROW prev_row,
+                 JDIFFROW diff_buf, JDIMENSION width)
+{
+  DIFFERENCE_2D(PREDICTOR2);
+  (void)(Ra);
+  (void)(Rc);
+}
+
+METHODDEF(void)
+jpeg_difference3(j_compress_ptr cinfo, int ci,
+                 _JSAMPROW input_buf, _JSAMPROW prev_row,
+                 JDIFFROW diff_buf, JDIMENSION width)
+{
+  DIFFERENCE_2D(PREDICTOR3);
+  (void)(Ra);
+}
+
+METHODDEF(void)
+jpeg_difference4(j_compress_ptr cinfo, int ci,
+                 _JSAMPROW input_buf, _JSAMPROW prev_row,
+                 JDIFFROW diff_buf, JDIMENSION width)
+{
+  DIFFERENCE_2D(PREDICTOR4);
+}
+
+METHODDEF(void)
+jpeg_difference5(j_compress_ptr cinfo, int ci,
+                 _JSAMPROW input_buf, _JSAMPROW prev_row,
+                 JDIFFROW diff_buf, JDIMENSION width)
+{
+  DIFFERENCE_2D(PREDICTOR5);
+}
+
+METHODDEF(void)
+jpeg_difference6(j_compress_ptr cinfo, int ci,
+                 _JSAMPROW input_buf, _JSAMPROW prev_row,
+                 JDIFFROW diff_buf, JDIMENSION width)
+{
+  DIFFERENCE_2D(PREDICTOR6);
+}
+
+METHODDEF(void)
+jpeg_difference7(j_compress_ptr cinfo, int ci,
+                 _JSAMPROW input_buf, _JSAMPROW prev_row,
+                 JDIFFROW diff_buf, JDIMENSION width)
+{
+  DIFFERENCE_2D(PREDICTOR7);
+  (void)(Rc);
+}
+
+
+/*
+ * Differencer for the first row in a scan or restart interval.  The first
+ * sample in the row is differenced using the special predictor constant
+ * x = 2 ^ (P-Pt-1).  The rest of the samples are differenced using the
+ * 1-D horizontal predictor (1).
+ */
+
+METHODDEF(void)
+jpeg_difference_first_row(j_compress_ptr cinfo, int ci,
+                          _JSAMPROW input_buf, _JSAMPROW prev_row,
+                          JDIFFROW diff_buf, JDIMENSION width)
+{
+  DIFFERENCE_1D(INITIAL_PREDICTORx);
+
+  /*
+   * Now that we have differenced the first row, we want to use the
+   * differencer that corresponds to the predictor specified in the
+   * scan header.
+   *
+   * Note that we don't do this if we have just reset the predictor
+   * for a new restart interval.
+   */
+  if (!restart) {
+    switch (cinfo->Ss) {
+    case 1:
+      losslessc->predict_difference[ci] = jpeg_difference1;
+      break;
+    case 2:
+      losslessc->predict_difference[ci] = jpeg_difference2;
+      break;
+    case 3:
+      losslessc->predict_difference[ci] = jpeg_difference3;
+      break;
+    case 4:
+      losslessc->predict_difference[ci] = jpeg_difference4;
+      break;
+    case 5:
+      losslessc->predict_difference[ci] = jpeg_difference5;
+      break;
+    case 6:
+      losslessc->predict_difference[ci] = jpeg_difference6;
+      break;
+    case 7:
+      losslessc->predict_difference[ci] = jpeg_difference7;
+      break;
+    }
+  }
+}
+
+/*
+ * Reset predictor at the start of a pass or restart interval.
+ */
+
+LOCAL(void)
+reset_predictor(j_compress_ptr cinfo, int ci)
+{
+  lossless_comp_ptr losslessc = (lossless_comp_ptr)cinfo->fdct;
+
+  /* Initialize restart counter */
+  losslessc->restart_rows_to_go[ci] =
+    cinfo->restart_interval / cinfo->MCUs_per_row;
+
+  /* Set difference function to first row function */
+  losslessc->predict_difference[ci] = jpeg_difference_first_row;
+}
+
+
+/********************** Sample downscaling by 2^Pt ***********************/
+
+METHODDEF(void)
+simple_downscale(j_compress_ptr cinfo,
+                 _JSAMPROW input_buf, _JSAMPROW output_buf, JDIMENSION width)
+{
+  do {
+    *output_buf++ = (_JSAMPLE)RIGHT_SHIFT(*input_buf++, cinfo->Al);
+  } while (--width);
+}
+
+
+METHODDEF(void)
+noscale(j_compress_ptr cinfo,
+        _JSAMPROW input_buf, _JSAMPROW output_buf, JDIMENSION width)
+{
+  memcpy(output_buf, input_buf, width * sizeof(_JSAMPLE));
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_lossless(j_compress_ptr cinfo)
+{
+  lossless_comp_ptr losslessc = (lossless_comp_ptr)cinfo->fdct;
+  int ci;
+
+  /* Set scaler function based on Pt */
+  if (cinfo->Al)
+    losslessc->scaler_scale = simple_downscale;
+  else
+    losslessc->scaler_scale = noscale;
+
+  /* Check that the restart interval is an integer multiple of the number
+   * of MCUs in an MCU row.
+   */
+  if (cinfo->restart_interval % cinfo->MCUs_per_row != 0)
+    ERREXIT2(cinfo, JERR_BAD_RESTART,
+             cinfo->restart_interval, cinfo->MCUs_per_row);
+
+  /* Set predictors for start of pass */
+  for (ci = 0; ci < cinfo->num_components; ci++)
+    reset_predictor(cinfo, ci);
+}
+
+
+/*
+ * Initialize the lossless compressor.
+ */
+
+GLOBAL(void)
+_jinit_lossless_compressor(j_compress_ptr cinfo)
+{
+  lossless_comp_ptr losslessc;
+
+  /* Create subobject in permanent pool */
+  losslessc = (lossless_comp_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                sizeof(jpeg_lossless_compressor));
+  cinfo->fdct = (struct jpeg_forward_dct *)losslessc;
+  losslessc->pub.start_pass = start_pass_lossless;
+}
+
+#endif /* C_LOSSLESS_SUPPORTED */
diff --git a/3rdparty/libjpeg-turbo/src/jcmainct.c b/3rdparty/libjpeg-turbo/src/jcmainct.c
index 3f23028c46..fe8fc0b1ac 100644
--- a/3rdparty/libjpeg-turbo/src/jcmainct.c
+++ b/3rdparty/libjpeg-turbo/src/jcmainct.c
@@ -3,8 +3,10 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -16,8 +18,11 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsamplecomp.h"
 
 
+#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
+
 /* Private buffer controller object */
 
 typedef struct {
@@ -32,7 +37,7 @@ typedef struct {
    * (we allocate one for each component).  In the full-image case, this
    * points to the currently accessible strips of the virtual arrays.
    */
-  JSAMPARRAY buffer[MAX_COMPONENTS];
+  _JSAMPARRAY buffer[MAX_COMPONENTS];
 } my_main_controller;
 
 typedef my_main_controller *my_main_ptr;
@@ -40,7 +45,7 @@ typedef my_main_controller *my_main_ptr;
 
 /* Forward declarations */
 METHODDEF(void) process_data_simple_main(j_compress_ptr cinfo,
-                                         JSAMPARRAY input_buf,
+                                         _JSAMPARRAY input_buf,
                                          JDIMENSION *in_row_ctr,
                                          JDIMENSION in_rows_avail);
 
@@ -65,7 +70,7 @@ start_pass_main(j_compress_ptr cinfo, J_BUF_MODE pass_mode)
   main_ptr->rowgroup_ctr = 0;
   main_ptr->suspended = FALSE;
   main_ptr->pass_mode = pass_mode;      /* save mode for use by process_data */
-  main_ptr->pub.process_data = process_data_simple_main;
+  main_ptr->pub._process_data = process_data_simple_main;
 }
 
 
@@ -76,28 +81,28 @@ start_pass_main(j_compress_ptr cinfo, J_BUF_MODE pass_mode)
  */
 
 METHODDEF(void)
-process_data_simple_main(j_compress_ptr cinfo, JSAMPARRAY input_buf,
+process_data_simple_main(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
                          JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail)
 {
   my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+  JDIMENSION data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
 
   while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) {
     /* Read input data if we haven't filled the main buffer yet */
-    if (main_ptr->rowgroup_ctr < DCTSIZE)
-      (*cinfo->prep->pre_process_data) (cinfo, input_buf, in_row_ctr,
-                                        in_rows_avail, main_ptr->buffer,
-                                        &main_ptr->rowgroup_ctr,
-                                        (JDIMENSION)DCTSIZE);
+    if (main_ptr->rowgroup_ctr < data_unit)
+      (*cinfo->prep->_pre_process_data) (cinfo, input_buf, in_row_ctr,
+                                         in_rows_avail, main_ptr->buffer,
+                                         &main_ptr->rowgroup_ctr, data_unit);
 
     /* If we don't have a full iMCU row buffered, return to application for
      * more data.  Note that preprocessor will always pad to fill the iMCU row
      * at the bottom of the image.
      */
-    if (main_ptr->rowgroup_ctr != DCTSIZE)
+    if (main_ptr->rowgroup_ctr != data_unit)
       return;
 
     /* Send the completed row to the compressor */
-    if (!(*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) {
+    if (!(*cinfo->coef->_compress_data) (cinfo, main_ptr->buffer)) {
       /* If compressor did not consume the whole row, then we must need to
        * suspend processing and return to the application.  In this situation
        * we pretend we didn't yet consume the last input row; otherwise, if
@@ -128,11 +133,15 @@ process_data_simple_main(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  */
 
 GLOBAL(void)
-jinit_c_main_controller(j_compress_ptr cinfo, boolean need_full_buffer)
+_jinit_c_main_controller(j_compress_ptr cinfo, boolean need_full_buffer)
 {
   my_main_ptr main_ptr;
   int ci;
   jpeg_component_info *compptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
 
   main_ptr = (my_main_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
@@ -153,10 +162,12 @@ jinit_c_main_controller(j_compress_ptr cinfo, boolean need_full_buffer)
     /* Allocate a strip buffer for each component */
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
          ci++, compptr++) {
-      main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray)
+      main_ptr->buffer[ci] = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
         ((j_common_ptr)cinfo, JPOOL_IMAGE,
-         compptr->width_in_blocks * DCTSIZE,
-         (JDIMENSION)(compptr->v_samp_factor * DCTSIZE));
+         compptr->width_in_blocks * data_unit,
+         (JDIMENSION)(compptr->v_samp_factor * data_unit));
     }
   }
 }
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
diff --git a/3rdparty/libjpeg-turbo/src/jcmarker.c b/3rdparty/libjpeg-turbo/src/jcmarker.c
index 801fbab4ef..a064d4dd9e 100644
--- a/3rdparty/libjpeg-turbo/src/jcmarker.c
+++ b/3rdparty/libjpeg-turbo/src/jcmarker.c
@@ -4,8 +4,10 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Modified 2003-2010 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, D. R. Commander.
+ * Copyright (C) 2010, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -15,7 +17,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jpegcomp.h"
+#include "jpegapicomp.h"
 
 
 typedef enum {                  /* JPEG marker codes */
@@ -497,25 +499,26 @@ write_file_header(j_compress_ptr cinfo)
 METHODDEF(void)
 write_frame_header(j_compress_ptr cinfo)
 {
-  int ci, prec;
+  int ci, prec = 0;
   boolean is_baseline;
   jpeg_component_info *compptr;
 
-  /* Emit DQT for each quantization table.
-   * Note that emit_dqt() suppresses any duplicate tables.
-   */
-  prec = 0;
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    prec += emit_dqt(cinfo, compptr->quant_tbl_no);
+  if (!cinfo->master->lossless) {
+    /* Emit DQT for each quantization table.
+     * Note that emit_dqt() suppresses any duplicate tables.
+     */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      prec += emit_dqt(cinfo, compptr->quant_tbl_no);
+    }
+    /* now prec is nonzero iff there are any 16-bit quant tables. */
   }
-  /* now prec is nonzero iff there are any 16-bit quant tables. */
 
   /* Check for a non-baseline specification.
    * Note we assume that Huffman table numbers won't be changed later.
    */
   if (cinfo->arith_code || cinfo->progressive_mode ||
-      cinfo->data_precision != 8) {
+      cinfo->master->lossless || cinfo->data_precision != 8) {
     is_baseline = FALSE;
   } else {
     is_baseline = TRUE;
@@ -540,6 +543,8 @@ write_frame_header(j_compress_ptr cinfo)
   } else {
     if (cinfo->progressive_mode)
       emit_sof(cinfo, M_SOF2);  /* SOF code for progressive Huffman */
+    else if (cinfo->master->lossless)
+      emit_sof(cinfo, M_SOF3);  /* SOF code for lossless Huffman */
     else if (is_baseline)
       emit_sof(cinfo, M_SOF0);  /* SOF code for baseline implementation */
     else
@@ -574,10 +579,11 @@ write_scan_header(j_compress_ptr cinfo)
     for (i = 0; i < cinfo->comps_in_scan; i++) {
       compptr = cinfo->cur_comp_info[i];
       /* DC needs no table for refinement scan */
-      if (cinfo->Ss == 0 && cinfo->Ah == 0)
+      if ((cinfo->Ss == 0 && cinfo->Ah == 0) || cinfo->master->lossless)
         emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
-      /* AC needs no table when not present */
-      if (cinfo->Se)
+      /* AC needs no table when not present, and lossless mode uses only DC
+         tables. */
+      if (cinfo->Se && !cinfo->master->lossless)
         emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
     }
   }
diff --git a/3rdparty/libjpeg-turbo/src/jcmaster.c b/3rdparty/libjpeg-turbo/src/jcmaster.c
index c2b2600031..161019763d 100644
--- a/3rdparty/libjpeg-turbo/src/jcmaster.c
+++ b/3rdparty/libjpeg-turbo/src/jcmaster.c
@@ -4,8 +4,10 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 2003-2010 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2016, 2018, D. R. Commander.
+ * Copyright (C) 2010, 2016, 2018, 2022-2024, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -18,40 +20,8 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jpegcomp.h"
-#include "jconfigint.h"
-
-
-/* Private state */
-
-typedef enum {
-  main_pass,                    /* input data, also do first output step */
-  huff_opt_pass,                /* Huffman code optimization pass */
-  output_pass                   /* data output pass */
-} c_pass_type;
-
-typedef struct {
-  struct jpeg_comp_master pub;  /* public fields */
-
-  c_pass_type pass_type;        /* the type of the current pass */
-
-  int pass_number;              /* # of passes completed */
-  int total_passes;             /* total # of passes needed */
-
-  int scan_number;              /* current index in scan_info[] */
-
-  /*
-   * This is here so we can add libjpeg-turbo version/build information to the
-   * global string table without introducing a new global symbol.  Adding this
-   * information to the global string table allows one to examine a binary
-   * object and determine which version of libjpeg-turbo it was built from or
-   * linked against.
-   */
-  const char *jpeg_version;
-
-} my_comp_master;
-
-typedef my_comp_master *my_master_ptr;
+#include "jpegapicomp.h"
+#include "jcmaster.h"
 
 
 /*
@@ -69,15 +39,124 @@ GLOBAL(void)
 jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo)
 /* Do computations that are needed before master selection phase */
 {
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
   /* Hardwire it to "no scaling" */
   cinfo->jpeg_width = cinfo->image_width;
   cinfo->jpeg_height = cinfo->image_height;
-  cinfo->min_DCT_h_scaled_size = DCTSIZE;
-  cinfo->min_DCT_v_scaled_size = DCTSIZE;
+  cinfo->min_DCT_h_scaled_size = data_unit;
+  cinfo->min_DCT_v_scaled_size = data_unit;
 }
 #endif
 
 
+LOCAL(boolean)
+using_std_huff_tables(j_compress_ptr cinfo)
+{
+  int i;
+
+  static const UINT8 bits_dc_luminance[17] = {
+    /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0
+  };
+  static const UINT8 val_dc_luminance[] = {
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
+  };
+
+  static const UINT8 bits_dc_chrominance[17] = {
+    /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
+  };
+  static const UINT8 val_dc_chrominance[] = {
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
+  };
+
+  static const UINT8 bits_ac_luminance[17] = {
+    /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d
+  };
+  static const UINT8 val_ac_luminance[] = {
+    0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+    0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+    0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+    0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+    0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+    0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+    0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+    0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+    0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+    0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+    0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+    0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+    0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+    0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+    0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+    0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+    0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+    0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+    0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+    0xf9, 0xfa
+  };
+
+  static const UINT8 bits_ac_chrominance[17] = {
+    /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77
+  };
+  static const UINT8 val_ac_chrominance[] = {
+    0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+    0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+    0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+    0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+    0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+    0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+    0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+    0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+    0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+    0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+    0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+    0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+    0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+    0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+    0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+    0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+    0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+    0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+    0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+    0xf9, 0xfa
+  };
+
+  if (cinfo->dc_huff_tbl_ptrs[0] == NULL ||
+      cinfo->ac_huff_tbl_ptrs[0] == NULL ||
+      cinfo->dc_huff_tbl_ptrs[1] == NULL ||
+      cinfo->ac_huff_tbl_ptrs[1] == NULL)
+    return FALSE;
+
+  for (i = 2; i < NUM_HUFF_TBLS; i++) {
+    if (cinfo->dc_huff_tbl_ptrs[i] != NULL ||
+        cinfo->ac_huff_tbl_ptrs[i] != NULL)
+      return FALSE;
+  }
+
+  if (memcmp(cinfo->dc_huff_tbl_ptrs[0]->bits, bits_dc_luminance,
+             sizeof(bits_dc_luminance)) ||
+      memcmp(cinfo->dc_huff_tbl_ptrs[0]->huffval, val_dc_luminance,
+             sizeof(val_dc_luminance)) ||
+      memcmp(cinfo->ac_huff_tbl_ptrs[0]->bits, bits_ac_luminance,
+             sizeof(bits_ac_luminance)) ||
+      memcmp(cinfo->ac_huff_tbl_ptrs[0]->huffval, val_ac_luminance,
+             sizeof(val_ac_luminance)) ||
+      memcmp(cinfo->dc_huff_tbl_ptrs[1]->bits, bits_dc_chrominance,
+             sizeof(bits_dc_chrominance)) ||
+      memcmp(cinfo->dc_huff_tbl_ptrs[1]->huffval, val_dc_chrominance,
+             sizeof(val_dc_chrominance)) ||
+      memcmp(cinfo->ac_huff_tbl_ptrs[1]->bits, bits_ac_chrominance,
+             sizeof(bits_ac_chrominance)) ||
+      memcmp(cinfo->ac_huff_tbl_ptrs[1]->huffval, val_ac_chrominance,
+             sizeof(val_ac_chrominance)))
+    return FALSE;
+
+  return TRUE;
+}
+
+
 LOCAL(void)
 initial_setup(j_compress_ptr cinfo, boolean transcode_only)
 /* Do computations that are needed before master selection phase */
@@ -86,6 +165,7 @@ initial_setup(j_compress_ptr cinfo, boolean transcode_only)
   jpeg_component_info *compptr;
   long samplesperrow;
   JDIMENSION jd_samplesperrow;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
 
 #if JPEG_LIB_VERSION >= 70
 #if JPEG_LIB_VERSION >= 80
@@ -110,8 +190,12 @@ initial_setup(j_compress_ptr cinfo, boolean transcode_only)
   if ((long)jd_samplesperrow != samplesperrow)
     ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
 
-  /* For now, precision must match compiled-in value... */
-  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+#ifdef C_LOSSLESS_SUPPORTED
+  if (cinfo->data_precision != 8 && cinfo->data_precision != 12 &&
+      cinfo->data_precision != 16)
+#else
+  if (cinfo->data_precision != 8 && cinfo->data_precision != 12)
+#endif
     ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
 
   /* Check that number of components won't exceed internal array sizes */
@@ -142,17 +226,17 @@ initial_setup(j_compress_ptr cinfo, boolean transcode_only)
     compptr->component_index = ci;
     /* For compression, we never do DCT scaling. */
 #if JPEG_LIB_VERSION >= 70
-    compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = DCTSIZE;
+    compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = data_unit;
 #else
-    compptr->DCT_scaled_size = DCTSIZE;
+    compptr->DCT_scaled_size = data_unit;
 #endif
-    /* Size in DCT blocks */
+    /* Size in data units */
     compptr->width_in_blocks = (JDIMENSION)
       jdiv_round_up((long)cinfo->_jpeg_width * (long)compptr->h_samp_factor,
-                    (long)(cinfo->max_h_samp_factor * DCTSIZE));
+                    (long)(cinfo->max_h_samp_factor * data_unit));
     compptr->height_in_blocks = (JDIMENSION)
       jdiv_round_up((long)cinfo->_jpeg_height * (long)compptr->v_samp_factor,
-                    (long)(cinfo->max_v_samp_factor * DCTSIZE));
+                    (long)(cinfo->max_v_samp_factor * data_unit));
     /* Size in samples */
     compptr->downsampled_width = (JDIMENSION)
       jdiv_round_up((long)cinfo->_jpeg_width * (long)compptr->h_samp_factor,
@@ -165,15 +249,19 @@ initial_setup(j_compress_ptr cinfo, boolean transcode_only)
   }
 
   /* Compute number of fully interleaved MCU rows (number of times that
-   * main controller will call coefficient controller).
+   * main controller will call coefficient or difference controller).
    */
   cinfo->total_iMCU_rows = (JDIMENSION)
     jdiv_round_up((long)cinfo->_jpeg_height,
-                  (long)(cinfo->max_v_samp_factor * DCTSIZE));
+                  (long)(cinfo->max_v_samp_factor * data_unit));
 }
 
 
-#ifdef C_MULTISCAN_FILES_SUPPORTED
+#if defined(C_MULTISCAN_FILES_SUPPORTED) || defined(C_LOSSLESS_SUPPORTED)
+#define NEED_SCAN_SCRIPT
+#endif
+
+#ifdef NEED_SCAN_SCRIPT
 
 LOCAL(void)
 validate_script(j_compress_ptr cinfo)
@@ -194,13 +282,29 @@ validate_script(j_compress_ptr cinfo)
   if (cinfo->num_scans <= 0)
     ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, 0);
 
+#ifndef C_MULTISCAN_FILES_SUPPORTED
+  if (cinfo->num_scans > 1)
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+
+  scanptr = cinfo->scan_info;
+  if (scanptr->Ss != 0 && scanptr->Se == 0) {
+#ifdef C_LOSSLESS_SUPPORTED
+    cinfo->master->lossless = TRUE;
+    cinfo->progressive_mode = FALSE;
+    for (ci = 0; ci < cinfo->num_components; ci++)
+      component_sent[ci] = FALSE;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  }
   /* For sequential JPEG, all scans must have Ss=0, Se=DCTSIZE2-1;
    * for progressive JPEG, no scan can have this.
    */
-  scanptr = cinfo->scan_info;
-  if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2 - 1) {
+  else if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2 - 1) {
 #ifdef C_PROGRESSIVE_SUPPORTED
     cinfo->progressive_mode = TRUE;
+    cinfo->master->lossless = FALSE;
     last_bitpos_ptr = &last_bitpos[0][0];
     for (ci = 0; ci < cinfo->num_components; ci++)
       for (coefi = 0; coefi < DCTSIZE2; coefi++)
@@ -209,7 +313,7 @@ validate_script(j_compress_ptr cinfo)
     ERREXIT(cinfo, JERR_NOT_COMPILED);
 #endif
   } else {
-    cinfo->progressive_mode = FALSE;
+    cinfo->progressive_mode = cinfo->master->lossless = FALSE;
     for (ci = 0; ci < cinfo->num_components; ci++)
       component_sent[ci] = FALSE;
   }
@@ -241,13 +345,10 @@ validate_script(j_compress_ptr cinfo)
        * out-of-range reconstructed DC values during the first DC scan,
        * which might cause problems for some decoders.
        */
-#if BITS_IN_JSAMPLE == 8
-#define MAX_AH_AL  10
-#else
-#define MAX_AH_AL  13
-#endif
+      int max_Ah_Al = cinfo->data_precision == 12 ? 13 : 10;
+
       if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
-          Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL)
+          Ah < 0 || Ah > max_Ah_Al || Al < 0 || Al > max_Ah_Al)
         ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
       if (Ss == 0) {
         if (Se != 0)            /* DC and AC together not OK */
@@ -275,9 +376,25 @@ validate_script(j_compress_ptr cinfo)
       }
 #endif
     } else {
-      /* For sequential JPEG, all progression parameters must be these: */
-      if (Ss != 0 || Se != DCTSIZE2 - 1 || Ah != 0 || Al != 0)
-        ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+#ifdef C_LOSSLESS_SUPPORTED
+      if (cinfo->master->lossless) {
+        /* The JPEG spec simply gives the range 0..15 for Al (Pt), but that
+         * seems wrong: the upper bound ought to depend on data precision.
+         * Perhaps they really meant 0..N-1 for N-bit precision, which is what
+         * we allow here.  Values greater than or equal to the data precision
+         * will result in a blank image.
+         */
+        if (Ss < 1 || Ss > 7 ||         /* predictor selection value */
+            Se != 0 || Ah != 0 ||
+            Al < 0 || Al >= cinfo->data_precision) /* point transform */
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      } else
+#endif
+      {
+        /* For sequential JPEG, all progression parameters must be these: */
+        if (Ss != 0 || Se != DCTSIZE2 - 1 || Ah != 0 || Al != 0)
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      }
       /* Make sure components are not sent twice */
       for (ci = 0; ci < ncomps; ci++) {
         thisi = scanptr->component_index[ci];
@@ -309,7 +426,7 @@ validate_script(j_compress_ptr cinfo)
   }
 }
 
-#endif /* C_MULTISCAN_FILES_SUPPORTED */
+#endif /* NEED_SCAN_SCRIPT */
 
 
 LOCAL(void)
@@ -318,7 +435,7 @@ select_scan_parameters(j_compress_ptr cinfo)
 {
   int ci;
 
-#ifdef C_MULTISCAN_FILES_SUPPORTED
+#ifdef NEED_SCAN_SCRIPT
   if (cinfo->scan_info != NULL) {
     /* Prepare for current scan --- the script is already validated */
     my_master_ptr master = (my_master_ptr)cinfo->master;
@@ -344,10 +461,12 @@ select_scan_parameters(j_compress_ptr cinfo)
     for (ci = 0; ci < cinfo->num_components; ci++) {
       cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
     }
-    cinfo->Ss = 0;
-    cinfo->Se = DCTSIZE2 - 1;
-    cinfo->Ah = 0;
-    cinfo->Al = 0;
+    if (!cinfo->master->lossless) {
+      cinfo->Ss = 0;
+      cinfo->Se = DCTSIZE2 - 1;
+      cinfo->Ah = 0;
+      cinfo->Al = 0;
+    }
   }
 }
 
@@ -359,6 +478,7 @@ per_scan_setup(j_compress_ptr cinfo)
 {
   int ci, mcublks, tmp;
   jpeg_component_info *compptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
 
   if (cinfo->comps_in_scan == 1) {
 
@@ -373,7 +493,7 @@ per_scan_setup(j_compress_ptr cinfo)
     compptr->MCU_width = 1;
     compptr->MCU_height = 1;
     compptr->MCU_blocks = 1;
-    compptr->MCU_sample_width = DCTSIZE;
+    compptr->MCU_sample_width = data_unit;
     compptr->last_col_width = 1;
     /* For noninterleaved scans, it is convenient to define last_row_height
      * as the number of block rows present in the last iMCU row.
@@ -396,10 +516,10 @@ per_scan_setup(j_compress_ptr cinfo)
     /* Overall image size in MCUs */
     cinfo->MCUs_per_row = (JDIMENSION)
       jdiv_round_up((long)cinfo->_jpeg_width,
-                    (long)(cinfo->max_h_samp_factor * DCTSIZE));
+                    (long)(cinfo->max_h_samp_factor * data_unit));
     cinfo->MCU_rows_in_scan = (JDIMENSION)
       jdiv_round_up((long)cinfo->_jpeg_height,
-                    (long)(cinfo->max_v_samp_factor * DCTSIZE));
+                    (long)(cinfo->max_v_samp_factor * data_unit));
 
     cinfo->blocks_in_MCU = 0;
 
@@ -409,7 +529,7 @@ per_scan_setup(j_compress_ptr cinfo)
       compptr->MCU_width = compptr->h_samp_factor;
       compptr->MCU_height = compptr->v_samp_factor;
       compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
-      compptr->MCU_sample_width = compptr->MCU_width * DCTSIZE;
+      compptr->MCU_sample_width = compptr->MCU_width * data_unit;
       /* Figure number of non-dummy blocks in last MCU column & row */
       tmp = (int)(compptr->width_in_blocks % compptr->MCU_width);
       if (tmp == 0) tmp = compptr->MCU_width;
@@ -481,7 +601,8 @@ prepare_for_pass(j_compress_ptr cinfo)
     /* Do Huffman optimization for a scan after the first one. */
     select_scan_parameters(cinfo);
     per_scan_setup(cinfo);
-    if (cinfo->Ss != 0 || cinfo->Ah == 0 || cinfo->arith_code) {
+    if (cinfo->Ss != 0 || cinfo->Ah == 0 || cinfo->arith_code ||
+        cinfo->master->lossless) {
       (*cinfo->entropy->start_pass) (cinfo, TRUE);
       (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
       master->pub.call_pass_startup = FALSE;
@@ -590,22 +711,17 @@ finish_pass_master(j_compress_ptr cinfo)
 GLOBAL(void)
 jinit_c_master_control(j_compress_ptr cinfo, boolean transcode_only)
 {
-  my_master_ptr master;
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+  boolean empty_huff_tables = TRUE;
+  int i;
 
-  master = (my_master_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                sizeof(my_comp_master));
-  cinfo->master = (struct jpeg_comp_master *)master;
   master->pub.prepare_for_pass = prepare_for_pass;
   master->pub.pass_startup = pass_startup;
   master->pub.finish_pass = finish_pass_master;
   master->pub.is_last_pass = FALSE;
 
-  /* Validate parameters, determine derived values */
-  initial_setup(cinfo, transcode_only);
-
   if (cinfo->scan_info != NULL) {
-#ifdef C_MULTISCAN_FILES_SUPPORTED
+#ifdef NEED_SCAN_SCRIPT
     validate_script(cinfo);
 #else
     ERREXIT(cinfo, JERR_NOT_COMPILED);
@@ -615,8 +731,42 @@ jinit_c_master_control(j_compress_ptr cinfo, boolean transcode_only)
     cinfo->num_scans = 1;
   }
 
-  if (cinfo->progressive_mode && !cinfo->arith_code)  /*  TEMPORARY HACK ??? */
-    cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */
+  /* Disable smoothing and subsampling in lossless mode, since those are lossy
+   * algorithms.  Set the JPEG colorspace to the input colorspace.  Disable raw
+   * (downsampled) data input, because it isn't particularly useful without
+   * subsampling and has not been tested in lossless mode.
+   */
+  if (cinfo->master->lossless) {
+    int ci;
+    jpeg_component_info *compptr;
+
+    cinfo->raw_data_in = FALSE;
+    cinfo->smoothing_factor = 0;
+    jpeg_default_colorspace(cinfo);
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++)
+      compptr->h_samp_factor = compptr->v_samp_factor = 1;
+  }
+
+  /* Validate parameters, determine derived values */
+  initial_setup(cinfo, transcode_only);
+
+  if (cinfo->master->lossless ||        /*  TEMPORARY HACK ??? */
+      (cinfo->progressive_mode && !cinfo->arith_code))
+    cinfo->optimize_coding = TRUE; /* assume default tables no good for
+                                      progressive mode or lossless mode */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    if (cinfo->dc_huff_tbl_ptrs[i] != NULL ||
+        cinfo->ac_huff_tbl_ptrs[i] != NULL) {
+      empty_huff_tables = FALSE;
+      break;
+    }
+  }
+  if (cinfo->data_precision == 12 && !cinfo->arith_code &&
+      !cinfo->optimize_coding &&
+      (empty_huff_tables || using_std_huff_tables(cinfo)))
+    cinfo->optimize_coding = TRUE; /* assume default tables no good for 12-bit
+                                      data precision */
 
   /* Initialize my private state */
   if (transcode_only) {
diff --git a/3rdparty/libjpeg-turbo/src/jcmaster.h b/3rdparty/libjpeg-turbo/src/jcmaster.h
new file mode 100644
index 0000000000..3b13289b69
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jcmaster.h
@@ -0,0 +1,43 @@
+/*
+ * jcmaster.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1995, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2016, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains master control structure for the JPEG compressor.
+ */
+
+/* Private state */
+
+typedef enum {
+  main_pass,                    /* input data, also do first output step */
+  huff_opt_pass,                /* Huffman code optimization pass */
+  output_pass                   /* data output pass */
+} c_pass_type;
+
+typedef struct {
+  struct jpeg_comp_master pub;  /* public fields */
+
+  c_pass_type pass_type;        /* the type of the current pass */
+
+  int pass_number;              /* # of passes completed */
+  int total_passes;             /* total # of passes needed */
+
+  int scan_number;              /* current index in scan_info[] */
+
+  /*
+   * This is here so we can add libjpeg-turbo version/build information to the
+   * global string table without introducing a new global symbol.  Adding this
+   * information to the global string table allows one to examine a binary
+   * object and determine which version of libjpeg-turbo it was built from or
+   * linked against.
+   */
+  const char *jpeg_version;
+
+} my_comp_master;
+
+typedef my_comp_master *my_master_ptr;
diff --git a/3rdparty/libjpeg-turbo/src/jcparam.c b/3rdparty/libjpeg-turbo/src/jcparam.c
index 5bc7174dcb..d1dee4da3d 100644
--- a/3rdparty/libjpeg-turbo/src/jcparam.c
+++ b/3rdparty/libjpeg-turbo/src/jcparam.c
@@ -4,8 +4,10 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Modified 2003-2008 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2018, D. R. Commander.
+ * Copyright (C) 2009-2011, 2018, 2023, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -202,7 +204,6 @@ jpeg_set_defaults(j_compress_ptr cinfo)
   cinfo->scale_num = 1;         /* 1:1 scaling */
   cinfo->scale_denom = 1;
 #endif
-  cinfo->data_precision = BITS_IN_JSAMPLE;
   /* Set up two quantization tables using default quality of 75 */
   jpeg_set_quality(cinfo, 75, TRUE);
   /* Set up two Huffman tables */
@@ -232,7 +233,7 @@ jpeg_set_defaults(j_compress_ptr cinfo)
    * tables will be computed.  This test can be removed if default tables
    * are supplied that are valid for the desired precision.
    */
-  if (cinfo->data_precision > 8)
+  if (cinfo->data_precision == 12 && !cinfo->arith_code)
     cinfo->optimize_coding = TRUE;
 
   /* By default, use the simpler non-cosited sampling alignment */
@@ -296,7 +297,10 @@ jpeg_default_colorspace(j_compress_ptr cinfo)
   case JCS_EXT_BGRA:
   case JCS_EXT_ABGR:
   case JCS_EXT_ARGB:
-    jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    if (cinfo->master->lossless)
+      jpeg_set_colorspace(cinfo, JCS_RGB);
+    else
+      jpeg_set_colorspace(cinfo, JCS_YCbCr);
     break;
   case JCS_YCbCr:
     jpeg_set_colorspace(cinfo, JCS_YCbCr);
@@ -475,6 +479,11 @@ jpeg_simple_progression(j_compress_ptr cinfo)
   if (cinfo->global_state != CSTATE_START)
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
+  if (cinfo->master->lossless) {
+    cinfo->master->lossless = FALSE;
+    jpeg_default_colorspace(cinfo);
+  }
+
   /* Figure space needed for script.  Calculation must match code below! */
   if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
     /* Custom script for YCbCr color images. */
@@ -539,3 +548,38 @@ jpeg_simple_progression(j_compress_ptr cinfo)
 }
 
 #endif /* C_PROGRESSIVE_SUPPORTED */
+
+
+#ifdef C_LOSSLESS_SUPPORTED
+
+/*
+ * Enable lossless mode.
+ */
+
+GLOBAL(void)
+jpeg_enable_lossless(j_compress_ptr cinfo, int predictor_selection_value,
+                     int point_transform)
+{
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  cinfo->master->lossless = TRUE;
+  cinfo->Ss = predictor_selection_value;
+  cinfo->Se = 0;
+  cinfo->Ah = 0;
+  cinfo->Al = point_transform;
+
+  /* The JPEG spec simply gives the range 0..15 for Al (Pt), but that seems
+   * wrong: the upper bound ought to depend on data precision.  Perhaps they
+   * really meant 0..N-1 for N-bit precision, which is what we allow here.
+   * Values greater than or equal to the data precision will result in a blank
+   * image.
+   */
+  if (cinfo->Ss < 1 || cinfo->Ss > 7 ||
+      cinfo->Al < 0 || cinfo->Al >= cinfo->data_precision)
+    ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+             cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+}
+
+#endif /* C_LOSSLESS_SUPPORTED */
diff --git a/3rdparty/libjpeg-turbo/src/jcphuff.c b/3rdparty/libjpeg-turbo/src/jcphuff.c
index 872e570bff..484e2d857f 100644
--- a/3rdparty/libjpeg-turbo/src/jcphuff.c
+++ b/3rdparty/libjpeg-turbo/src/jcphuff.c
@@ -3,9 +3,11 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander.
- * Copyright (C) 2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2011, 2015, 2018, 2021-2022, 2024, D. R. Commander.
+ * Copyright (C) 2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2020, Arm Limited.
  * Copyright (C) 2021, Alex Richardson.
  * For conditions of distribution and use, see the accompanying README.ijg
@@ -21,8 +23,11 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#ifdef WITH_SIMD
 #include "jsimd.h"
-#include "jconfigint.h"
+#else
+#include "jchuff.h"             /* Declarations shared with jc*huff.c */
+#endif
 #include <limits.h>
 
 #ifdef HAVE_INTRIN_H
@@ -39,40 +44,7 @@
 
 #ifdef C_PROGRESSIVE_SUPPORTED
 
-/*
- * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
- * used for bit counting rather than the lookup table.  This will reduce the
- * memory footprint by 64k, which is important for some mobile applications
- * that create many isolated instances of libjpeg-turbo (web browsers, for
- * instance.)  This may improve performance on some mobile platforms as well.
- * This feature is enabled by default only on Arm processors, because some x86
- * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
- * shown to have a significant performance impact even on the x86 chips that
- * have a fast implementation of it.  When building for Armv6, you can
- * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
- * flags (this defines __thumb__).
- */
-
-/* NOTE: Both GCC and Clang define __GNUC__ */
-#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
-    defined(_M_ARM) || defined(_M_ARM64)
-#if !defined(__thumb__) || defined(__thumb2__)
-#define USE_CLZ_INTRINSIC
-#endif
-#endif
-
-#ifdef USE_CLZ_INTRINSIC
-#if defined(_MSC_VER) && !defined(__clang__)
-#define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))
-#else
-#define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
-#endif
-#define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
-#else
-#include "jpeg_nbits_table.h"
-#define JPEG_NBITS(x)          (jpeg_nbits_table[x])
-#define JPEG_NBITS_NONZERO(x)  JPEG_NBITS(x)
-#endif
+#include "jpeg_nbits.h"
 
 
 /* Expanded entropy encoder object for progressive Huffman encoding. */
@@ -83,11 +55,11 @@ typedef struct {
   /* Pointer to routine to prepare data for encode_mcu_AC_first() */
   void (*AC_first_prepare) (const JCOEF *block,
                             const int *jpeg_natural_order_start, int Sl,
-                            int Al, JCOEF *values, size_t *zerobits);
+                            int Al, UJCOEF *values, size_t *zerobits);
   /* Pointer to routine to prepare data for encode_mcu_AC_refine() */
   int (*AC_refine_prepare) (const JCOEF *block,
                             const int *jpeg_natural_order_start, int Sl,
-                            int Al, JCOEF *absvalues, size_t *bits);
+                            int Al, UJCOEF *absvalues, size_t *bits);
 
   /* Mode flag: TRUE for optimization, FALSE for actual data output */
   boolean gather_statistics;
@@ -157,14 +129,14 @@ METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo,
                                        JBLOCKROW *MCU_data);
 METHODDEF(void) encode_mcu_AC_first_prepare
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits);
+   UJCOEF *values, size_t *zerobits);
 METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
                                        JBLOCKROW *MCU_data);
 METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,
                                         JBLOCKROW *MCU_data);
 METHODDEF(int) encode_mcu_AC_refine_prepare
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits);
+   UJCOEF *absvalues, size_t *bits);
 METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
                                         JBLOCKROW *MCU_data);
 METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);
@@ -224,18 +196,22 @@ start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics)
       entropy->pub.encode_mcu = encode_mcu_DC_first;
     else
       entropy->pub.encode_mcu = encode_mcu_AC_first;
+#ifdef WITH_SIMD
     if (jsimd_can_encode_mcu_AC_first_prepare())
       entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare;
     else
+#endif
       entropy->AC_first_prepare = encode_mcu_AC_first_prepare;
   } else {
     if (is_DC_band)
       entropy->pub.encode_mcu = encode_mcu_DC_refine;
     else {
       entropy->pub.encode_mcu = encode_mcu_AC_refine;
+#ifdef WITH_SIMD
       if (jsimd_can_encode_mcu_AC_refine_prepare())
         entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare;
       else
+#endif
         entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare;
       /* AC refinement needs a correction bit buffer */
       if (entropy->bit_buffer == NULL)
@@ -490,6 +466,7 @@ encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
   JBLOCKROW block;
   jpeg_component_info *compptr;
   ISHIFT_TEMPS
+  int max_coef_bits = cinfo->data_precision + 2;
 
   entropy->next_output_byte = cinfo->dest->next_output_byte;
   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
@@ -532,7 +509,7 @@ encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
     /* Check for out-of-range coefficient values.
      * Since we're encoding a difference, the range limit is twice as much.
      */
-    if (nbits > MAX_COEF_BITS + 1)
+    if (nbits > max_coef_bits + 1)
       ERREXIT(cinfo, JERR_BAD_DCT_COEF);
 
     /* Count/emit the Huffman-coded symbol for the number of bits */
@@ -584,8 +561,8 @@ encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
       continue; \
     /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
     temp2 ^= temp; \
-    values[k] = (JCOEF)temp; \
-    values[k + DCTSIZE2] = (JCOEF)temp2; \
+    values[k] = (UJCOEF)temp; \
+    values[k + DCTSIZE2] = (UJCOEF)temp2; \
     zerobits |= ((size_t)1U) << k; \
   } \
 }
@@ -593,7 +570,7 @@ encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 METHODDEF(void)
 encode_mcu_AC_first_prepare(const JCOEF *block,
                             const int *jpeg_natural_order_start, int Sl,
-                            int Al, JCOEF *values, size_t *bits)
+                            int Al, UJCOEF *values, size_t *bits)
 {
   register int k, temp, temp2;
   size_t zerobits = 0U;
@@ -643,7 +620,7 @@ label \
     /* Find the number of bits needed for the magnitude of the coefficient */ \
     nbits = JPEG_NBITS_NONZERO(temp);  /* there must be at least one 1 bit */ \
     /* Check for out-of-range coefficient values */ \
-    if (nbits > MAX_COEF_BITS) \
+    if (nbits > max_coef_bits) \
       ERREXIT(cinfo, JERR_BAD_DCT_COEF); \
     \
     /* Count/emit Huffman symbol for run length / number of bits */ \
@@ -666,11 +643,12 @@ encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
   register int nbits, r;
   int Sl = cinfo->Se - cinfo->Ss + 1;
   int Al = cinfo->Al;
-  JCOEF values_unaligned[2 * DCTSIZE2 + 15];
-  JCOEF *values;
-  const JCOEF *cvalue;
+  UJCOEF values_unaligned[2 * DCTSIZE2 + 15];
+  UJCOEF *values;
+  const UJCOEF *cvalue;
   size_t zerobits;
   size_t bits[8 / SIZEOF_SIZE_T];
+  int max_coef_bits = cinfo->data_precision + 2;
 
   entropy->next_output_byte = cinfo->dest->next_output_byte;
   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
@@ -681,7 +659,7 @@ encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
       emit_restart(entropy, entropy->next_restart_num);
 
 #ifdef WITH_SIMD
-  cvalue = values = (JCOEF *)PAD((JUINTPTR)values_unaligned, 16);
+  cvalue = values = (UJCOEF *)PAD((JUINTPTR)values_unaligned, 16);
 #else
   /* Not using SIMD, so alignment is not needed */
   cvalue = values = values_unaligned;
@@ -815,7 +793,7 @@ encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
       zerobits |= ((size_t)1U) << k; \
       signbits |= ((size_t)(temp2 + 1)) << k; \
     } \
-    absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \
+    absvalues[k] = (UJCOEF)temp; /* save abs value for main pass */ \
     if (temp == 1) \
       EOB = k + koffset;        /* EOB = index of last newly-nonzero coef */ \
   } \
@@ -824,7 +802,7 @@ encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 METHODDEF(int)
 encode_mcu_AC_refine_prepare(const JCOEF *block,
                              const int *jpeg_natural_order_start, int Sl,
-                             int Al, JCOEF *absvalues, size_t *bits)
+                             int Al, UJCOEF *absvalues, size_t *bits)
 {
   register int k, temp, temp2;
   int EOB = 0;
@@ -931,9 +909,9 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
   unsigned int BR;
   int Sl = cinfo->Se - cinfo->Ss + 1;
   int Al = cinfo->Al;
-  JCOEF absvalues_unaligned[DCTSIZE2 + 15];
-  JCOEF *absvalues;
-  const JCOEF *cabsvalue, *EOBPTR;
+  UJCOEF absvalues_unaligned[DCTSIZE2 + 15];
+  UJCOEF *absvalues;
+  const UJCOEF *cabsvalue, *EOBPTR;
   size_t zerobits, signbits;
   size_t bits[16 / SIZEOF_SIZE_T];
 
@@ -946,7 +924,7 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
       emit_restart(entropy, entropy->next_restart_num);
 
 #ifdef WITH_SIMD
-  cabsvalue = absvalues = (JCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16);
+  cabsvalue = absvalues = (UJCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16);
 #else
   /* Not using SIMD, so alignment is not needed */
   cabsvalue = absvalues = absvalues_unaligned;
diff --git a/3rdparty/libjpeg-turbo/src/jcprepct.c b/3rdparty/libjpeg-turbo/src/jcprepct.c
index f27cc34507..ac2311c138 100644
--- a/3rdparty/libjpeg-turbo/src/jcprepct.c
+++ b/3rdparty/libjpeg-turbo/src/jcprepct.c
@@ -1,8 +1,10 @@
 /*
  * jcprepct.c
  *
- * This file is part of the Independent JPEG Group's software:
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
@@ -20,8 +22,11 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsamplecomp.h"
 
 
+#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
+
 /* At present, jcsample.c can request context rows only for smoothing.
  * In the future, we might also need context rows for CCIR601 sampling
  * or other more-complex downsampling procedures.  The code to support
@@ -59,7 +64,7 @@ typedef struct {
   /* Downsampling input buffer.  This buffer holds color-converted data
    * until we have enough to do a downsample step.
    */
-  JSAMPARRAY color_buf[MAX_COMPONENTS];
+  _JSAMPARRAY color_buf[MAX_COMPONENTS];
 
   JDIMENSION rows_to_go;        /* counts rows remaining in source image */
   int next_buf_row;             /* index of next row to store in color_buf */
@@ -106,14 +111,14 @@ start_pass_prep(j_compress_ptr cinfo, J_BUF_MODE pass_mode)
  */
 
 LOCAL(void)
-expand_bottom_edge(JSAMPARRAY image_data, JDIMENSION num_cols, int input_rows,
+expand_bottom_edge(_JSAMPARRAY image_data, JDIMENSION num_cols, int input_rows,
                    int output_rows)
 {
   register int row;
 
   for (row = input_rows; row < output_rows; row++) {
-    jcopy_sample_rows(image_data, input_rows - 1, image_data, row, 1,
-                      num_cols);
+    _jcopy_sample_rows(image_data, input_rows - 1, image_data, row, 1,
+                       num_cols);
   }
 }
 
@@ -128,15 +133,16 @@ expand_bottom_edge(JSAMPARRAY image_data, JDIMENSION num_cols, int input_rows,
  */
 
 METHODDEF(void)
-pre_process_data(j_compress_ptr cinfo, JSAMPARRAY input_buf,
+pre_process_data(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
                  JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail,
-                 JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+                 _JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
                  JDIMENSION out_row_groups_avail)
 {
   my_prep_ptr prep = (my_prep_ptr)cinfo->prep;
   int numrows, ci;
   JDIMENSION inrows;
   jpeg_component_info *compptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
 
   while (*in_row_ctr < in_rows_avail &&
          *out_row_group_ctr < out_row_groups_avail) {
@@ -144,10 +150,10 @@ pre_process_data(j_compress_ptr cinfo, JSAMPARRAY input_buf,
     inrows = in_rows_avail - *in_row_ctr;
     numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
     numrows = (int)MIN((JDIMENSION)numrows, inrows);
-    (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
-                                       prep->color_buf,
-                                       (JDIMENSION)prep->next_buf_row,
-                                       numrows);
+    (*cinfo->cconvert->_color_convert) (cinfo, input_buf + *in_row_ctr,
+                                        prep->color_buf,
+                                        (JDIMENSION)prep->next_buf_row,
+                                        numrows);
     *in_row_ctr += numrows;
     prep->next_buf_row += numrows;
     prep->rows_to_go -= numrows;
@@ -162,9 +168,9 @@ pre_process_data(j_compress_ptr cinfo, JSAMPARRAY input_buf,
     }
     /* If we've filled the conversion buffer, empty it. */
     if (prep->next_buf_row == cinfo->max_v_samp_factor) {
-      (*cinfo->downsample->downsample) (cinfo,
-                                        prep->color_buf, (JDIMENSION)0,
-                                        output_buf, *out_row_group_ctr);
+      (*cinfo->downsample->_downsample) (cinfo,
+                                         prep->color_buf, (JDIMENSION)0,
+                                         output_buf, *out_row_group_ctr);
       prep->next_buf_row = 0;
       (*out_row_group_ctr)++;
     }
@@ -174,7 +180,8 @@ pre_process_data(j_compress_ptr cinfo, JSAMPARRAY input_buf,
     if (prep->rows_to_go == 0 && *out_row_group_ctr < out_row_groups_avail) {
       for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
            ci++, compptr++) {
-        expand_bottom_edge(output_buf[ci], compptr->width_in_blocks * DCTSIZE,
+        expand_bottom_edge(output_buf[ci],
+                           compptr->width_in_blocks * data_unit,
                            (int)(*out_row_group_ctr * compptr->v_samp_factor),
                            (int)(out_row_groups_avail * compptr->v_samp_factor));
       }
@@ -192,9 +199,9 @@ pre_process_data(j_compress_ptr cinfo, JSAMPARRAY input_buf,
  */
 
 METHODDEF(void)
-pre_process_context(j_compress_ptr cinfo, JSAMPARRAY input_buf,
+pre_process_context(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
                     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail,
-                    JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+                    _JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
                     JDIMENSION out_row_groups_avail)
 {
   my_prep_ptr prep = (my_prep_ptr)cinfo->prep;
@@ -208,17 +215,17 @@ pre_process_context(j_compress_ptr cinfo, JSAMPARRAY input_buf,
       inrows = in_rows_avail - *in_row_ctr;
       numrows = prep->next_buf_stop - prep->next_buf_row;
       numrows = (int)MIN((JDIMENSION)numrows, inrows);
-      (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
-                                         prep->color_buf,
-                                         (JDIMENSION)prep->next_buf_row,
-                                         numrows);
+      (*cinfo->cconvert->_color_convert) (cinfo, input_buf + *in_row_ctr,
+                                          prep->color_buf,
+                                          (JDIMENSION)prep->next_buf_row,
+                                          numrows);
       /* Pad at top of image, if first time through */
       if (prep->rows_to_go == cinfo->image_height) {
         for (ci = 0; ci < cinfo->num_components; ci++) {
           int row;
           for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
-            jcopy_sample_rows(prep->color_buf[ci], 0, prep->color_buf[ci],
-                              -row, 1, cinfo->image_width);
+            _jcopy_sample_rows(prep->color_buf[ci], 0, prep->color_buf[ci],
+                               -row, 1, cinfo->image_width);
           }
         }
       }
@@ -240,9 +247,9 @@ pre_process_context(j_compress_ptr cinfo, JSAMPARRAY input_buf,
     }
     /* If we've gotten enough data, downsample a row group. */
     if (prep->next_buf_row == prep->next_buf_stop) {
-      (*cinfo->downsample->downsample) (cinfo, prep->color_buf,
-                                        (JDIMENSION)prep->this_row_group,
-                                        output_buf, *out_row_group_ctr);
+      (*cinfo->downsample->_downsample) (cinfo, prep->color_buf,
+                                         (JDIMENSION)prep->this_row_group,
+                                         output_buf, *out_row_group_ctr);
       (*out_row_group_ctr)++;
       /* Advance pointers with wraparound as necessary. */
       prep->this_row_group += cinfo->max_v_samp_factor;
@@ -267,15 +274,16 @@ create_context_buffer(j_compress_ptr cinfo)
   int rgroup_height = cinfo->max_v_samp_factor;
   int ci, i;
   jpeg_component_info *compptr;
-  JSAMPARRAY true_buffer, fake_buffer;
+  _JSAMPARRAY true_buffer, fake_buffer;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
 
   /* Grab enough space for fake row pointers for all the components;
    * we need five row groups' worth of pointers for each component.
    */
-  fake_buffer = (JSAMPARRAY)
+  fake_buffer = (_JSAMPARRAY)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 (cinfo->num_components * 5 * rgroup_height) *
-                                sizeof(JSAMPROW));
+                                sizeof(_JSAMPROW));
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
@@ -283,14 +291,14 @@ create_context_buffer(j_compress_ptr cinfo)
      * We make the buffer wide enough to allow the downsampler to edge-expand
      * horizontally within the buffer, if it so chooses.
      */
-    true_buffer = (*cinfo->mem->alloc_sarray)
+    true_buffer = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
       ((j_common_ptr)cinfo, JPOOL_IMAGE,
-       (JDIMENSION)(((long)compptr->width_in_blocks * DCTSIZE *
+       (JDIMENSION)(((long)compptr->width_in_blocks * data_unit *
                      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
        (JDIMENSION)(3 * rgroup_height));
     /* Copy true buffer row pointers into the middle of the fake row array */
     memcpy(fake_buffer + rgroup_height, true_buffer,
-           3 * rgroup_height * sizeof(JSAMPROW));
+           3 * rgroup_height * sizeof(_JSAMPROW));
     /* Fill in the above and below wraparound pointers */
     for (i = 0; i < rgroup_height; i++) {
       fake_buffer[i] = true_buffer[2 * rgroup_height + i];
@@ -309,11 +317,15 @@ create_context_buffer(j_compress_ptr cinfo)
  */
 
 GLOBAL(void)
-jinit_c_prep_controller(j_compress_ptr cinfo, boolean need_full_buffer)
+_jinit_c_prep_controller(j_compress_ptr cinfo, boolean need_full_buffer)
 {
   my_prep_ptr prep;
   int ci;
   jpeg_component_info *compptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
 
   if (need_full_buffer)         /* safety check */
     ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
@@ -331,21 +343,23 @@ jinit_c_prep_controller(j_compress_ptr cinfo, boolean need_full_buffer)
   if (cinfo->downsample->need_context_rows) {
     /* Set up to provide context rows */
 #ifdef CONTEXT_ROWS_SUPPORTED
-    prep->pub.pre_process_data = pre_process_context;
+    prep->pub._pre_process_data = pre_process_context;
     create_context_buffer(cinfo);
 #else
     ERREXIT(cinfo, JERR_NOT_COMPILED);
 #endif
   } else {
     /* No context, just make it tall enough for one row group */
-    prep->pub.pre_process_data = pre_process_data;
+    prep->pub._pre_process_data = pre_process_data;
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
          ci++, compptr++) {
-      prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+      prep->color_buf[ci] = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
         ((j_common_ptr)cinfo, JPOOL_IMAGE,
-         (JDIMENSION)(((long)compptr->width_in_blocks * DCTSIZE *
+         (JDIMENSION)(((long)compptr->width_in_blocks * data_unit *
                        cinfo->max_h_samp_factor) / compptr->h_samp_factor),
          (JDIMENSION)cinfo->max_v_samp_factor);
     }
   }
 }
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
diff --git a/3rdparty/libjpeg-turbo/src/jcsample.c b/3rdparty/libjpeg-turbo/src/jcsample.c
index e8515ebf0f..30e6e54b40 100644
--- a/3rdparty/libjpeg-turbo/src/jcsample.c
+++ b/3rdparty/libjpeg-turbo/src/jcsample.c
@@ -3,10 +3,12 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2014, MIPS Technologies, Inc., California.
- * Copyright (C) 2015, 2019, D. R. Commander.
+ * Copyright (C) 2015, 2019, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -54,13 +56,16 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
+#include "jsamplecomp.h"
 
 
+#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
+
 /* Pointer to routine to downsample a single component */
 typedef void (*downsample1_ptr) (j_compress_ptr cinfo,
                                  jpeg_component_info *compptr,
-                                 JSAMPARRAY input_data,
-                                 JSAMPARRAY output_data);
+                                 _JSAMPARRAY input_data,
+                                 _JSAMPARRAY output_data);
 
 /* Private subobject */
 
@@ -91,11 +96,11 @@ start_pass_downsample(j_compress_ptr cinfo)
  */
 
 LOCAL(void)
-expand_right_edge(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols,
+expand_right_edge(_JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols,
                   JDIMENSION output_cols)
 {
-  register JSAMPROW ptr;
-  register JSAMPLE pixval;
+  register _JSAMPROW ptr;
+  register _JSAMPLE pixval;
   register int count;
   int row;
   int numcols = (int)(output_cols - input_cols);
@@ -118,14 +123,14 @@ expand_right_edge(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols,
  */
 
 METHODDEF(void)
-sep_downsample(j_compress_ptr cinfo, JSAMPIMAGE input_buf,
-               JDIMENSION in_row_index, JSAMPIMAGE output_buf,
+sep_downsample(j_compress_ptr cinfo, _JSAMPIMAGE input_buf,
+               JDIMENSION in_row_index, _JSAMPIMAGE output_buf,
                JDIMENSION out_row_group_index)
 {
   my_downsample_ptr downsample = (my_downsample_ptr)cinfo->downsample;
   int ci;
   jpeg_component_info *compptr;
-  JSAMPARRAY in_ptr, out_ptr;
+  _JSAMPARRAY in_ptr, out_ptr;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
@@ -145,12 +150,13 @@ sep_downsample(j_compress_ptr cinfo, JSAMPIMAGE input_buf,
 
 METHODDEF(void)
 int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
-               JSAMPARRAY input_data, JSAMPARRAY output_data)
+               _JSAMPARRAY input_data, _JSAMPARRAY output_data)
 {
   int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
   JDIMENSION outcol, outcol_h;  /* outcol_h == outcol*h_expand */
-  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
-  JSAMPROW inptr, outptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+  JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
+  _JSAMPROW inptr, outptr;
   JLONG outvalue;
 
   h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor;
@@ -177,7 +183,7 @@ int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
           outvalue += (JLONG)(*inptr++);
         }
       }
-      *outptr++ = (JSAMPLE)((outvalue + numpix2) / numpix);
+      *outptr++ = (_JSAMPLE)((outvalue + numpix2) / numpix);
     }
     inrow += v_expand;
   }
@@ -192,14 +198,16 @@ int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
 
 METHODDEF(void)
 fullsize_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
-                    JSAMPARRAY input_data, JSAMPARRAY output_data)
+                    _JSAMPARRAY input_data, _JSAMPARRAY output_data)
 {
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
   /* Copy the data */
-  jcopy_sample_rows(input_data, 0, output_data, 0, cinfo->max_v_samp_factor,
-                    cinfo->image_width);
+  _jcopy_sample_rows(input_data, 0, output_data, 0, cinfo->max_v_samp_factor,
+                     cinfo->image_width);
   /* Edge-expand */
   expand_right_edge(output_data, cinfo->max_v_samp_factor, cinfo->image_width,
-                    compptr->width_in_blocks * DCTSIZE);
+                    compptr->width_in_blocks * data_unit);
 }
 
 
@@ -217,12 +225,13 @@ fullsize_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
 
 METHODDEF(void)
 h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
-                JSAMPARRAY input_data, JSAMPARRAY output_data)
+                _JSAMPARRAY input_data, _JSAMPARRAY output_data)
 {
   int outrow;
   JDIMENSION outcol;
-  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
-  register JSAMPROW inptr, outptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+  JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
+  register _JSAMPROW inptr, outptr;
   register int bias;
 
   /* Expand input data enough to let all the output samples be generated
@@ -237,7 +246,7 @@ h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
     inptr = input_data[outrow];
     bias = 0;                   /* bias = 0,1,0,1,... for successive samples */
     for (outcol = 0; outcol < output_cols; outcol++) {
-      *outptr++ = (JSAMPLE)((inptr[0] + inptr[1] + bias) >> 1);
+      *outptr++ = (_JSAMPLE)((inptr[0] + inptr[1] + bias) >> 1);
       bias ^= 1;                /* 0=>1, 1=>0 */
       inptr += 2;
     }
@@ -253,12 +262,13 @@ h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
 
 METHODDEF(void)
 h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
-                JSAMPARRAY input_data, JSAMPARRAY output_data)
+                _JSAMPARRAY input_data, _JSAMPARRAY output_data)
 {
   int inrow, outrow;
   JDIMENSION outcol;
-  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
-  register JSAMPROW inptr0, inptr1, outptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+  JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
+  register _JSAMPROW inptr0, inptr1, outptr;
   register int bias;
 
   /* Expand input data enough to let all the output samples be generated
@@ -275,8 +285,8 @@ h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
     inptr1 = input_data[inrow + 1];
     bias = 1;                   /* bias = 1,2,1,2,... for successive samples */
     for (outcol = 0; outcol < output_cols; outcol++) {
-      *outptr++ =
-        (JSAMPLE)((inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1] + bias) >> 2);
+      *outptr++ = (_JSAMPLE)
+        ((inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1] + bias) >> 2);
       bias ^= 3;                /* 1=>2, 2=>1 */
       inptr0 += 2;  inptr1 += 2;
     }
@@ -295,12 +305,13 @@ h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
 
 METHODDEF(void)
 h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
-                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+                       _JSAMPARRAY input_data, _JSAMPARRAY output_data)
 {
   int inrow, outrow;
   JDIMENSION colctr;
-  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
-  register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+  JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
+  register _JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
   JLONG membersum, neighsum, memberscale, neighscale;
 
   /* Expand input data enough to let all the output samples be generated
@@ -341,7 +352,7 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
     neighsum += neighsum;
     neighsum += above_ptr[0] + above_ptr[2] + below_ptr[0] + below_ptr[2];
     membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
+    *outptr++ = (_JSAMPLE)((membersum + 32768) >> 16);
     inptr0 += 2;  inptr1 += 2;  above_ptr += 2;  below_ptr += 2;
 
     for (colctr = output_cols - 2; colctr > 0; colctr--) {
@@ -357,7 +368,7 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
       /* form final output scaled up by 2^16 */
       membersum = membersum * memberscale + neighsum * neighscale;
       /* round, descale and output it */
-      *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
+      *outptr++ = (_JSAMPLE)((membersum + 32768) >> 16);
       inptr0 += 2;  inptr1 += 2;  above_ptr += 2;  below_ptr += 2;
     }
 
@@ -368,7 +379,7 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
     neighsum += neighsum;
     neighsum += above_ptr[-1] + above_ptr[1] + below_ptr[-1] + below_ptr[1];
     membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr = (JSAMPLE)((membersum + 32768) >> 16);
+    *outptr = (_JSAMPLE)((membersum + 32768) >> 16);
 
     inrow += 2;
   }
@@ -383,12 +394,13 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
 
 METHODDEF(void)
 fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
-                           JSAMPARRAY input_data, JSAMPARRAY output_data)
+                           _JSAMPARRAY input_data, _JSAMPARRAY output_data)
 {
   int outrow;
   JDIMENSION colctr;
-  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
-  register JSAMPROW inptr, above_ptr, below_ptr, outptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+  JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
+  register _JSAMPROW inptr, above_ptr, below_ptr, outptr;
   JLONG membersum, neighsum, memberscale, neighscale;
   int colsum, lastcolsum, nextcolsum;
 
@@ -420,7 +432,7 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
     nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
     neighsum = colsum + (colsum - membersum) + nextcolsum;
     membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
+    *outptr++ = (_JSAMPLE)((membersum + 32768) >> 16);
     lastcolsum = colsum;  colsum = nextcolsum;
 
     for (colctr = output_cols - 2; colctr > 0; colctr--) {
@@ -429,7 +441,7 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
       nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
       neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
       membersum = membersum * memberscale + neighsum * neighscale;
-      *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
+      *outptr++ = (_JSAMPLE)((membersum + 32768) >> 16);
       lastcolsum = colsum;  colsum = nextcolsum;
     }
 
@@ -437,7 +449,7 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
     membersum = *inptr;
     neighsum = lastcolsum + (colsum - membersum) + colsum;
     membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr = (JSAMPLE)((membersum + 32768) >> 16);
+    *outptr = (_JSAMPLE)((membersum + 32768) >> 16);
 
   }
 }
@@ -451,19 +463,22 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jinit_downsampler(j_compress_ptr cinfo)
+_jinit_downsampler(j_compress_ptr cinfo)
 {
   my_downsample_ptr downsample;
   int ci;
   jpeg_component_info *compptr;
   boolean smoothok = TRUE;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   downsample = (my_downsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(my_downsampler));
   cinfo->downsample = (struct jpeg_downsampler *)downsample;
   downsample->pub.start_pass = start_pass_downsample;
-  downsample->pub.downsample = sep_downsample;
+  downsample->pub._downsample = sep_downsample;
   downsample->pub.need_context_rows = FALSE;
 
   if (cinfo->CCIR601_sampling)
@@ -484,15 +499,17 @@ jinit_downsampler(j_compress_ptr cinfo)
     } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
                compptr->v_samp_factor == cinfo->max_v_samp_factor) {
       smoothok = FALSE;
+#ifdef WITH_SIMD
       if (jsimd_can_h2v1_downsample())
         downsample->methods[ci] = jsimd_h2v1_downsample;
       else
+#endif
         downsample->methods[ci] = h2v1_downsample;
     } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
                compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
 #ifdef INPUT_SMOOTHING_SUPPORTED
       if (cinfo->smoothing_factor) {
-#if defined(__mips__)
+#if defined(WITH_SIMD) && defined(__mips__)
         if (jsimd_can_h2v2_smooth_downsample())
           downsample->methods[ci] = jsimd_h2v2_smooth_downsample;
         else
@@ -502,9 +519,11 @@ jinit_downsampler(j_compress_ptr cinfo)
       } else
 #endif
       {
+#ifdef WITH_SIMD
         if (jsimd_can_h2v2_downsample())
           downsample->methods[ci] = jsimd_h2v2_downsample;
         else
+#endif
           downsample->methods[ci] = h2v2_downsample;
       }
     } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 &&
@@ -520,3 +539,5 @@ jinit_downsampler(j_compress_ptr cinfo)
     TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
 #endif
 }
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
diff --git a/3rdparty/libjpeg-turbo/src/jcstest.c b/3rdparty/libjpeg-turbo/src/jcstest.c
new file mode 100644
index 0000000000..8b1fe38082
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jcstest.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This program demonstrates how to check for the colorspace extension
+   capabilities of libjpeg-turbo at both compile time and run time. */
+
+#include <stdio.h>
+#include <jpeglib.h>
+#include <jerror.h>
+#include <setjmp.h>
+
+#ifndef JCS_EXTENSIONS
+#define JCS_EXT_RGB  6
+#endif
+#if !defined(JCS_EXTENSIONS) || !defined(JCS_ALPHA_EXTENSIONS)
+#define JCS_EXT_RGBA  12
+#endif
+
+static char lasterror[JMSG_LENGTH_MAX] = "No error";
+
+typedef struct _error_mgr {
+  struct jpeg_error_mgr pub;
+  jmp_buf jb;
+} error_mgr;
+
+static void my_error_exit(j_common_ptr cinfo)
+{
+  error_mgr *myerr = (error_mgr *)cinfo->err;
+  (*cinfo->err->output_message) (cinfo);
+  longjmp(myerr->jb, 1);
+}
+
+static void my_output_message(j_common_ptr cinfo)
+{
+  (*cinfo->err->format_message) (cinfo, lasterror);
+}
+
+int main(void)
+{
+  int jcs_valid = -1, jcs_alpha_valid = -1;
+  struct jpeg_compress_struct cinfo;
+  error_mgr jerr;
+
+  printf("libjpeg-turbo colorspace extensions:\n");
+#if JCS_EXTENSIONS
+  printf("  Present at compile time\n");
+#else
+  printf("  Not present at compile time\n");
+#endif
+
+  cinfo.err = jpeg_std_error(&jerr.pub);
+  jerr.pub.error_exit = my_error_exit;
+  jerr.pub.output_message = my_output_message;
+
+  if (setjmp(jerr.jb)) {
+    /* this will execute if libjpeg has an error */
+    jcs_valid = 0;
+    goto done;
+  }
+
+  jpeg_create_compress(&cinfo);
+  cinfo.input_components = 3;
+  jpeg_set_defaults(&cinfo);
+  cinfo.in_color_space = JCS_EXT_RGB;
+  jpeg_default_colorspace(&cinfo);
+  jcs_valid = 1;
+
+done:
+  if (jcs_valid)
+    printf("  Working properly\n");
+  else
+    printf("  Not working properly.  Error returned was:\n    %s\n",
+           lasterror);
+
+  printf("libjpeg-turbo alpha colorspace extensions:\n");
+#if JCS_ALPHA_EXTENSIONS
+  printf("  Present at compile time\n");
+#else
+  printf("  Not present at compile time\n");
+#endif
+
+  if (setjmp(jerr.jb)) {
+    /* this will execute if libjpeg has an error */
+    jcs_alpha_valid = 0;
+    goto done2;
+  }
+
+  cinfo.in_color_space = JCS_EXT_RGBA;
+  jpeg_default_colorspace(&cinfo);
+  jcs_alpha_valid = 1;
+
+done2:
+  if (jcs_alpha_valid)
+    printf("  Working properly\n");
+  else
+    printf("  Not working properly.  Error returned was:\n    %s\n",
+           lasterror);
+
+  jpeg_destroy_compress(&cinfo);
+  return 0;
+}
diff --git a/3rdparty/libjpeg-turbo/src/jctrans.c b/3rdparty/libjpeg-turbo/src/jctrans.c
index e121028ec7..ae52e3989e 100644
--- a/3rdparty/libjpeg-turbo/src/jctrans.c
+++ b/3rdparty/libjpeg-turbo/src/jctrans.c
@@ -17,7 +17,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jpegcomp.h"
+#include "jpegapicomp.h"
 
 
 /* Forward declarations */
@@ -42,6 +42,9 @@ LOCAL(void) transencode_coef_controller(j_compress_ptr cinfo,
 GLOBAL(void)
 jpeg_write_coefficients(j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays)
 {
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
   if (cinfo->global_state != CSTATE_START)
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
   /* Mark all tables to be written */
@@ -72,6 +75,9 @@ jpeg_copy_critical_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo)
   JQUANT_TBL *c_quant, *slot_quant;
   int tblno, ci, coefi;
 
+  if (srcinfo->master->lossless)
+    ERREXIT(dstinfo, JERR_NOTIMPL);
+
   /* Safety check to ensure start_compress not called yet. */
   if (dstinfo->global_state != CSTATE_START)
     ERREXIT1(dstinfo, JERR_BAD_STATE, dstinfo->global_state);
@@ -364,6 +370,13 @@ compress_output(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
 }
 
 
+METHODDEF(boolean)
+compress_output_12(j_compress_ptr cinfo, J12SAMPIMAGE input_buf)
+{
+  return compress_output(cinfo, (JSAMPIMAGE)input_buf);
+}
+
+
 /*
  * Initialize coefficient buffer controller.
  *
@@ -386,6 +399,7 @@ transencode_coef_controller(j_compress_ptr cinfo,
   cinfo->coef = (struct jpeg_c_coef_controller *)coef;
   coef->pub.start_pass = start_pass_coef;
   coef->pub.compress_data = compress_output;
+  coef->pub.compress_data_12 = compress_output_12;
 
   /* Save pointer to virtual arrays */
   coef->whole_image = coef_arrays;
diff --git a/3rdparty/libjpeg-turbo/src/jdapimin.c b/3rdparty/libjpeg-turbo/src/jdapimin.c
index f50c27edc3..30d92841a8 100644
--- a/3rdparty/libjpeg-turbo/src/jdapimin.c
+++ b/3rdparty/libjpeg-turbo/src/jdapimin.c
@@ -3,8 +3,10 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2016, 2022, D. R. Commander.
+ * Copyright (C) 2016, 2022, 2024, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -23,7 +25,6 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jdmaster.h"
-#include "jconfigint.h"
 
 
 /*
@@ -83,6 +84,8 @@ jpeg_CreateDecompress(j_decompress_ptr cinfo, int version, size_t structsize)
   /* And initialize the overall input controller. */
   jinit_input_controller(cinfo);
 
+  cinfo->data_precision = BITS_IN_JSAMPLE;
+
   /* OK, I'm ready */
   cinfo->global_state = DSTATE_START;
 
@@ -157,13 +160,19 @@ default_decompress_parms(j_decompress_ptr cinfo)
       int cid1 = cinfo->comp_info[1].component_id;
       int cid2 = cinfo->comp_info[2].component_id;
 
-      if (cid0 == 1 && cid1 == 2 && cid2 == 3)
-        cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
-      else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
+      if (cid0 == 1 && cid1 == 2 && cid2 == 3) {
+        if (cinfo->master->lossless)
+          cinfo->jpeg_color_space = JCS_RGB; /* assume RGB w/out marker */
+        else
+          cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
+      } else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
         cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
       else {
         TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
-        cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+        if (cinfo->master->lossless)
+          cinfo->jpeg_color_space = JCS_RGB; /* assume it's RGB */
+        else
+          cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
       }
     }
     /* Always guess RGB is proper output colorspace. */
diff --git a/3rdparty/libjpeg-turbo/src/jdapistd.c b/3rdparty/libjpeg-turbo/src/jdapistd.c
index 8827d8abf5..1f44927236 100644
--- a/3rdparty/libjpeg-turbo/src/jdapistd.c
+++ b/3rdparty/libjpeg-turbo/src/jdapistd.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2015-2020, 2022, D. R. Commander.
+ * Copyright (C) 2010, 2015-2020, 2022-2023, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -19,13 +19,20 @@
  */
 
 #include "jinclude.h"
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
 #include "jdmainct.h"
 #include "jdcoefct.h"
+#else
+#define JPEG_INTERNALS
+#include "jpeglib.h"
+#endif
 #include "jdmaster.h"
 #include "jdmerge.h"
 #include "jdsample.h"
 #include "jmemsys.h"
 
+#if BITS_IN_JSAMPLE == 8
+
 /* Forward declarations */
 LOCAL(boolean) output_pass_setup(j_decompress_ptr cinfo);
 
@@ -121,8 +128,20 @@ output_pass_setup(j_decompress_ptr cinfo)
       }
       /* Process some data */
       last_scanline = cinfo->output_scanline;
-      (*cinfo->main->process_data) (cinfo, (JSAMPARRAY)NULL,
-                                    &cinfo->output_scanline, (JDIMENSION)0);
+#ifdef D_LOSSLESS_SUPPORTED
+      if (cinfo->data_precision == 16)
+        (*cinfo->main->process_data_16) (cinfo, (J16SAMPARRAY)NULL,
+                                         &cinfo->output_scanline,
+                                         (JDIMENSION)0);
+      else
+#endif
+      if (cinfo->data_precision == 12)
+        (*cinfo->main->process_data_12) (cinfo, (J12SAMPARRAY)NULL,
+                                         &cinfo->output_scanline,
+                                         (JDIMENSION)0);
+      else
+        (*cinfo->main->process_data) (cinfo, (JSAMPARRAY)NULL,
+                                      &cinfo->output_scanline, (JDIMENSION)0);
       if (cinfo->output_scanline == last_scanline)
         return FALSE;           /* No progress made, must suspend */
     }
@@ -135,33 +154,46 @@ output_pass_setup(j_decompress_ptr cinfo)
 #endif /* QUANT_2PASS_SUPPORTED */
   }
   /* Ready for application to drive output pass through
-   * jpeg_read_scanlines or jpeg_read_raw_data.
+   * _jpeg_read_scanlines or _jpeg_read_raw_data.
    */
   cinfo->global_state = cinfo->raw_data_out ? DSTATE_RAW_OK : DSTATE_SCANNING;
   return TRUE;
 }
 
+#endif /* BITS_IN_JSAMPLE == 8 */
+
+
+#if BITS_IN_JSAMPLE != 16
 
 /*
  * Enable partial scanline decompression
  *
  * Must be called after jpeg_start_decompress() and before any calls to
- * jpeg_read_scanlines() or jpeg_skip_scanlines().
+ * _jpeg_read_scanlines() or _jpeg_skip_scanlines().
  *
  * Refer to libjpeg.txt for more information.
  */
 
 GLOBAL(void)
-jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
-                   JDIMENSION *width)
+_jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
+                    JDIMENSION *width)
 {
   int ci, align, orig_downsampled_width;
   JDIMENSION input_xoffset;
   boolean reinit_upsampler = FALSE;
   jpeg_component_info *compptr;
+#ifdef UPSAMPLE_MERGING_SUPPORTED
   my_master_ptr master = (my_master_ptr)cinfo->master;
+#endif
 
-  if (cinfo->global_state != DSTATE_SCANNING || cinfo->output_scanline != 0)
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  if ((cinfo->global_state != DSTATE_SCANNING &&
+       cinfo->global_state != DSTATE_BUFIMAGE) || cinfo->output_scanline != 0)
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
   if (!xoffset || !width)
@@ -209,11 +241,13 @@ jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
    */
   *width = *width + input_xoffset - *xoffset;
   cinfo->output_width = *width;
+#ifdef UPSAMPLE_MERGING_SUPPORTED
   if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
     my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
     upsample->out_row_width =
       cinfo->output_width * cinfo->out_color_components;
   }
+#endif
 
   /* Set the first and last iMCU columns that we must decompress.  These values
    * will be used in single-scan decompressions.
@@ -231,9 +265,11 @@ jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
     /* Set downsampled_width to the new output width. */
     orig_downsampled_width = compptr->downsampled_width;
     compptr->downsampled_width =
-      (JDIMENSION)jdiv_round_up((long)(cinfo->output_width *
-                                       compptr->h_samp_factor),
-                                (long)cinfo->max_h_samp_factor);
+      (JDIMENSION)jdiv_round_up((long)cinfo->output_width *
+                                (long)(compptr->h_samp_factor *
+                                       compptr->_DCT_scaled_size),
+                                (long)(cinfo->max_h_samp_factor *
+                                       cinfo->_min_DCT_scaled_size));
     if (compptr->downsampled_width < 2 && orig_downsampled_width >= 2)
       reinit_upsampler = TRUE;
 
@@ -249,11 +285,13 @@ jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
 
   if (reinit_upsampler) {
     cinfo->master->jinit_upsampler_no_alloc = TRUE;
-    jinit_upsampler(cinfo);
+    _jinit_upsampler(cinfo);
     cinfo->master->jinit_upsampler_no_alloc = FALSE;
   }
 }
 
+#endif /* BITS_IN_JSAMPLE != 16 */
+
 
 /*
  * Read some scanlines of data from the JPEG decompressor.
@@ -263,17 +301,21 @@ jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
  * including bottom of image, data source suspension, and operating
  * modes that emit multiple scanlines at a time.
  *
- * Note: we warn about excess calls to jpeg_read_scanlines() since
+ * Note: we warn about excess calls to _jpeg_read_scanlines() since
  * this likely signals an application programmer error.  However,
  * an oversize buffer (max_lines > scanlines remaining) is not an error.
  */
 
 GLOBAL(JDIMENSION)
-jpeg_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines,
-                    JDIMENSION max_lines)
+_jpeg_read_scanlines(j_decompress_ptr cinfo, _JSAMPARRAY scanlines,
+                     JDIMENSION max_lines)
 {
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
   JDIMENSION row_ctr;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   if (cinfo->global_state != DSTATE_SCANNING)
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
   if (cinfo->output_scanline >= cinfo->output_height) {
@@ -290,30 +332,36 @@ jpeg_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines,
 
   /* Process some data */
   row_ctr = 0;
-  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, max_lines);
+  (*cinfo->main->_process_data) (cinfo, scanlines, &row_ctr, max_lines);
   cinfo->output_scanline += row_ctr;
   return row_ctr;
+#else
+  ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  return 0;
+#endif
 }
 
 
-/* Dummy color convert function used by jpeg_skip_scanlines() */
+#if BITS_IN_JSAMPLE != 16
+
+/* Dummy color convert function used by _jpeg_skip_scanlines() */
 LOCAL(void)
-noop_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-             JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+noop_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+             JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
 }
 
 
-/* Dummy quantize function used by jpeg_skip_scanlines() */
+/* Dummy quantize function used by _jpeg_skip_scanlines() */
 LOCAL(void)
-noop_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-              JSAMPARRAY output_buf, int num_rows)
+noop_quantize(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+              _JSAMPARRAY output_buf, int num_rows)
 {
 }
 
 
 /*
- * In some cases, it is best to call jpeg_read_scanlines() and discard the
+ * In some cases, it is best to call _jpeg_read_scanlines() and discard the
  * output, rather than skipping the scanlines, because this allows us to
  * maintain the internal state of the context-based upsampler.  In these cases,
  * we set up and tear down a dummy color converter in order to avoid valgrind
@@ -324,49 +372,53 @@ LOCAL(void)
 read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
 {
   JDIMENSION n;
+#ifdef UPSAMPLE_MERGING_SUPPORTED
   my_master_ptr master = (my_master_ptr)cinfo->master;
-  JSAMPLE dummy_sample[1] = { 0 };
-  JSAMPROW dummy_row = dummy_sample;
-  JSAMPARRAY scanlines = NULL;
-  void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                         JDIMENSION input_row, JSAMPARRAY output_buf,
+#endif
+  _JSAMPLE dummy_sample[1] = { 0 };
+  _JSAMPROW dummy_row = dummy_sample;
+  _JSAMPARRAY scanlines = NULL;
+  void (*color_convert) (j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                         JDIMENSION input_row, _JSAMPARRAY output_buf,
                          int num_rows) = NULL;
-  void (*color_quantize) (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-                          JSAMPARRAY output_buf, int num_rows) = NULL;
+  void (*color_quantize) (j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                          _JSAMPARRAY output_buf, int num_rows) = NULL;
 
-  if (cinfo->cconvert && cinfo->cconvert->color_convert) {
-    color_convert = cinfo->cconvert->color_convert;
-    cinfo->cconvert->color_convert = noop_convert;
+  if (cinfo->cconvert && cinfo->cconvert->_color_convert) {
+    color_convert = cinfo->cconvert->_color_convert;
+    cinfo->cconvert->_color_convert = noop_convert;
     /* This just prevents UBSan from complaining about adding 0 to a NULL
      * pointer.  The pointer isn't actually used.
      */
     scanlines = &dummy_row;
   }
 
-  if (cinfo->cquantize && cinfo->cquantize->color_quantize) {
-    color_quantize = cinfo->cquantize->color_quantize;
-    cinfo->cquantize->color_quantize = noop_quantize;
+  if (cinfo->cquantize && cinfo->cquantize->_color_quantize) {
+    color_quantize = cinfo->cquantize->_color_quantize;
+    cinfo->cquantize->_color_quantize = noop_quantize;
   }
 
+#ifdef UPSAMPLE_MERGING_SUPPORTED
   if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
     my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
     scanlines = &upsample->spare_row;
   }
+#endif
 
   for (n = 0; n < num_lines; n++)
-    jpeg_read_scanlines(cinfo, scanlines, 1);
+    _jpeg_read_scanlines(cinfo, scanlines, 1);
 
   if (color_convert)
-    cinfo->cconvert->color_convert = color_convert;
+    cinfo->cconvert->_color_convert = color_convert;
 
   if (color_quantize)
-    cinfo->cquantize->color_quantize = color_quantize;
+    cinfo->cquantize->_color_quantize = color_quantize;
 }
 
 
 /*
- * Called by jpeg_skip_scanlines().  This partially skips a decompress block by
- * incrementing the rowgroup counter.
+ * Called by _jpeg_skip_scanlines().  This partially skips a decompress block
+ * by incrementing the rowgroup counter.
  */
 
 LOCAL(void)
@@ -405,7 +457,7 @@ increment_simple_rowgroup_ctr(j_decompress_ptr cinfo, JDIMENSION rows)
  */
 
 GLOBAL(JDIMENSION)
-jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
+_jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
 {
   my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
   my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
@@ -416,6 +468,12 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
   JDIMENSION lines_per_iMCU_row, lines_left_in_iMCU_row, lines_after_iMCU_row;
   JDIMENSION lines_to_skip, lines_to_read;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
   /* Two-pass color quantization is not supported. */
   if (cinfo->quantize_colors && cinfo->two_pass_quantize)
     ERREXIT(cinfo, JERR_NOTIMPL);
@@ -517,7 +575,7 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
    * all of the entropy decoding occurs in jpeg_start_decompress(), assuming
    * that the input data source is non-suspending.  This makes skipping easy.
    */
-  if (cinfo->inputctl->has_multiple_scans) {
+  if (cinfo->inputctl->has_multiple_scans || cinfo->buffered_image) {
     if (cinfo->upsample->need_context_rows) {
       cinfo->output_scanline += lines_to_skip;
       cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row;
@@ -588,11 +646,17 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
  */
 
 GLOBAL(JDIMENSION)
-jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
-                   JDIMENSION max_lines)
+_jpeg_read_raw_data(j_decompress_ptr cinfo, _JSAMPIMAGE data,
+                    JDIMENSION max_lines)
 {
   JDIMENSION lines_per_iMCU_row;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
   if (cinfo->global_state != DSTATE_RAW_OK)
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
   if (cinfo->output_scanline >= cinfo->output_height) {
@@ -613,7 +677,7 @@ jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
     ERREXIT(cinfo, JERR_BUFFER_SIZE);
 
   /* Decompress directly into user's buffer. */
-  if (!(*cinfo->coef->decompress_data) (cinfo, data))
+  if (!(*cinfo->coef->_decompress_data) (cinfo, data))
     return 0;                   /* suspension forced, can do nothing more */
 
   /* OK, we processed one iMCU row. */
@@ -621,6 +685,10 @@ jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
   return lines_per_iMCU_row;
 }
 
+#endif /* BITS_IN_JSAMPLE != 16 */
+
+
+#if BITS_IN_JSAMPLE == 8
 
 /* Additional entry points for buffered-image mode. */
 
@@ -678,3 +746,5 @@ jpeg_finish_output(j_decompress_ptr cinfo)
 }
 
 #endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+#endif /* BITS_IN_JSAMPLE == 8 */
diff --git a/3rdparty/libjpeg-turbo/src/jdatadst-tj.c b/3rdparty/libjpeg-turbo/src/jdatadst-tj.c
new file mode 100644
index 0000000000..cce263af74
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jdatadst-tj.c
@@ -0,0 +1,198 @@
+/*
+ * jdatadst-tj.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2012 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2011, 2014, 2016, 2019, 2022-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains compression data destination routines for the case of
+ * emitting JPEG data to memory or to a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different destination manager.
+ * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
+ * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+void jpeg_mem_dest_tj(j_compress_ptr cinfo, unsigned char **outbuffer,
+                      size_t *outsize, boolean alloc);
+
+
+#define OUTPUT_BUF_SIZE  4096   /* choose an efficiently fwrite'able size */
+
+
+/* Expanded data destination object for memory output */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  unsigned char **outbuffer;    /* target buffer */
+  size_t *outsize;
+  unsigned char *newbuffer;     /* newly allocated buffer */
+  JOCTET *buffer;               /* start of buffer */
+  size_t bufsize;
+  boolean alloc;
+} my_mem_destination_mgr;
+
+typedef my_mem_destination_mgr *my_mem_dest_ptr;
+
+
+/*
+ * Initialize destination --- called by jpeg_start_compress
+ * before any data is actually written.
+ */
+
+METHODDEF(void)
+init_mem_destination(j_compress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Empty the output buffer --- called whenever buffer fills up.
+ *
+ * In typical applications, this should write the entire output buffer
+ * (ignoring the current state of next_output_byte & free_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been dumped.
+ *
+ * In applications that need to be able to suspend compression due to output
+ * overrun, a FALSE return indicates that the buffer cannot be emptied now.
+ * In this situation, the compressor will return to its caller (possibly with
+ * an indication that it has not accepted all the supplied scanlines).  The
+ * application should resume compression after it has made more room in the
+ * output buffer.  Note that there are substantial restrictions on the use of
+ * suspension --- see the documentation.
+ *
+ * When suspending, the compressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_output_byte & free_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point will be regenerated after resumption, so do not
+ * write it out when emptying the buffer externally.
+ */
+
+METHODDEF(boolean)
+empty_mem_output_buffer(j_compress_ptr cinfo)
+{
+  size_t nextsize;
+  JOCTET *nextbuffer;
+  my_mem_dest_ptr dest = (my_mem_dest_ptr)cinfo->dest;
+
+  if (!dest->alloc) ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Try to allocate new buffer with double size */
+  nextsize = dest->bufsize * 2;
+  nextbuffer = (JOCTET *)malloc(nextsize);
+
+  if (nextbuffer == NULL)
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+
+  memcpy(nextbuffer, dest->buffer, dest->bufsize);
+
+  free(dest->newbuffer);
+
+  dest->newbuffer = nextbuffer;
+
+  dest->pub.next_output_byte = nextbuffer + dest->bufsize;
+  dest->pub.free_in_buffer = dest->bufsize;
+
+  dest->buffer = nextbuffer;
+  dest->bufsize = nextsize;
+
+  return TRUE;
+}
+
+
+/*
+ * Terminate destination --- called by jpeg_finish_compress
+ * after all data has been written.  Usually needs to flush buffer.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_mem_destination(j_compress_ptr cinfo)
+{
+  my_mem_dest_ptr dest = (my_mem_dest_ptr)cinfo->dest;
+
+  if (dest->alloc) *dest->outbuffer = dest->buffer;
+  *dest->outsize = dest->bufsize - dest->pub.free_in_buffer;
+}
+
+
+/*
+ * Prepare for output to a memory buffer.
+ * The caller may supply an own initial buffer with appropriate size.
+ * Otherwise, or when the actual data output exceeds the given size,
+ * the library adapts the buffer size as necessary.
+ * The standard library functions malloc/free are used for allocating
+ * larger memory, so the buffer is available to the application after
+ * finishing compression, and then the application is responsible for
+ * freeing the requested memory.
+ */
+
+GLOBAL(void)
+jpeg_mem_dest_tj(j_compress_ptr cinfo, unsigned char **outbuffer,
+                 size_t *outsize, boolean alloc)
+{
+  boolean reused = FALSE;
+  my_mem_dest_ptr dest;
+
+  if (outbuffer == NULL || outsize == NULL)     /* sanity check */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same buffer without re-executing jpeg_mem_dest.
+   */
+  if (cinfo->dest == NULL) {    /* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                  sizeof(my_mem_destination_mgr));
+    dest = (my_mem_dest_ptr)cinfo->dest;
+    dest->newbuffer = NULL;
+    dest->buffer = NULL;
+  } else if (cinfo->dest->init_destination != init_mem_destination) {
+    /* It is unsafe to reuse the existing destination manager unless it was
+     * created by this function.
+     */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  }
+
+  dest = (my_mem_dest_ptr)cinfo->dest;
+  dest->pub.init_destination = init_mem_destination;
+  dest->pub.empty_output_buffer = empty_mem_output_buffer;
+  dest->pub.term_destination = term_mem_destination;
+  if (dest->buffer == *outbuffer && *outbuffer != NULL && alloc)
+    reused = TRUE;
+  dest->outbuffer = outbuffer;
+  dest->outsize = outsize;
+  dest->alloc = alloc;
+
+  if (*outbuffer == NULL || *outsize == 0) {
+    if (alloc) {
+      /* Allocate initial buffer */
+      dest->newbuffer = *outbuffer = (unsigned char *)malloc(OUTPUT_BUF_SIZE);
+      if (dest->newbuffer == NULL)
+        ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+      *outsize = OUTPUT_BUF_SIZE;
+    } else
+      ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  }
+
+  dest->pub.next_output_byte = dest->buffer = *outbuffer;
+  if (!reused)
+    dest->bufsize = *outsize;
+  dest->pub.free_in_buffer = dest->bufsize;
+}
diff --git a/3rdparty/libjpeg-turbo/src/jdatadst.c b/3rdparty/libjpeg-turbo/src/jdatadst.c
index 6b4fed2339..529f93b490 100644
--- a/3rdparty/libjpeg-turbo/src/jdatadst.c
+++ b/3rdparty/libjpeg-turbo/src/jdatadst.c
@@ -38,7 +38,6 @@ typedef my_destination_mgr *my_dest_ptr;
 #define OUTPUT_BUF_SIZE  4096   /* choose an efficiently fwrite'able size */
 
 
-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 /* Expanded data destination object for memory output */
 
 typedef struct {
@@ -52,7 +51,6 @@ typedef struct {
 } my_mem_destination_mgr;
 
 typedef my_mem_destination_mgr *my_mem_dest_ptr;
-#endif
 
 
 /*
@@ -74,13 +72,11 @@ init_destination(j_compress_ptr cinfo)
   dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
 }
 
-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 METHODDEF(void)
 init_mem_destination(j_compress_ptr cinfo)
 {
   /* no work necessary here */
 }
-#endif
 
 
 /*
@@ -121,7 +117,6 @@ empty_output_buffer(j_compress_ptr cinfo)
   return TRUE;
 }
 
-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 METHODDEF(boolean)
 empty_mem_output_buffer(j_compress_ptr cinfo)
 {
@@ -150,7 +145,6 @@ empty_mem_output_buffer(j_compress_ptr cinfo)
 
   return TRUE;
 }
-#endif
 
 
 /*
@@ -179,7 +173,6 @@ term_destination(j_compress_ptr cinfo)
     ERREXIT(cinfo, JERR_FILE_WRITE);
 }
 
-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 METHODDEF(void)
 term_mem_destination(j_compress_ptr cinfo)
 {
@@ -188,7 +181,6 @@ term_mem_destination(j_compress_ptr cinfo)
   *dest->outbuffer = dest->buffer;
   *dest->outsize = (unsigned long)(dest->bufsize - dest->pub.free_in_buffer);
 }
-#endif
 
 
 /*
@@ -227,7 +219,6 @@ jpeg_stdio_dest(j_compress_ptr cinfo, FILE *outfile)
 }
 
 
-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 /*
  * Prepare for output to a memory buffer.
  * The caller may supply an own initial buffer with appropriate size.
@@ -284,4 +275,3 @@ jpeg_mem_dest(j_compress_ptr cinfo, unsigned char **outbuffer,
   dest->pub.next_output_byte = dest->buffer = *outbuffer;
   dest->pub.free_in_buffer = dest->bufsize = *outsize;
 }
-#endif
diff --git a/3rdparty/libjpeg-turbo/src/jdatasrc-tj.c b/3rdparty/libjpeg-turbo/src/jdatasrc-tj.c
new file mode 100644
index 0000000000..a5970b53fe
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jdatasrc-tj.c
@@ -0,0 +1,194 @@
+/*
+ * jdatasrc-tj.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2011 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2011, 2016, 2019, 2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains decompression data source routines for the case of
+ * reading JPEG data from memory or from a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different source manager.
+ * IMPORTANT: we assume that fread() will correctly transcribe an array of
+ * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+void jpeg_mem_src_tj(j_decompress_ptr cinfo, const unsigned char *inbuffer,
+                     size_t insize);
+
+
+/*
+ * Initialize source --- called by jpeg_read_header
+ * before any data is actually read.
+ */
+
+METHODDEF(void)
+init_mem_source(j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Fill the input buffer --- called whenever buffer is emptied.
+ *
+ * In typical applications, this should read fresh data into the buffer
+ * (ignoring the current state of next_input_byte & bytes_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been reloaded.  It is not necessary to
+ * fill the buffer entirely, only to obtain at least one more byte.
+ *
+ * There is no such thing as an EOF return.  If the end of the file has been
+ * reached, the routine has a choice of ERREXIT() or inserting fake data into
+ * the buffer.  In most cases, generating a warning message and inserting a
+ * fake EOI marker is the best course of action --- this will allow the
+ * decompressor to output however much of the image is there.  However,
+ * the resulting error message is misleading if the real problem is an empty
+ * input file, so we handle that case specially.
+ *
+ * In applications that need to be able to suspend compression due to input
+ * not being available yet, a FALSE return indicates that no more data can be
+ * obtained right now, but more may be forthcoming later.  In this situation,
+ * the decompressor will return to its caller (with an indication of the
+ * number of scanlines it has read, if any).  The application should resume
+ * decompression after it has loaded more data into the input buffer.  Note
+ * that there are substantial restrictions on the use of suspension --- see
+ * the documentation.
+ *
+ * When suspending, the decompressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point must be rescanned after resumption, so move it to
+ * the front of the buffer rather than discarding it.
+ */
+
+METHODDEF(boolean)
+fill_mem_input_buffer(j_decompress_ptr cinfo)
+{
+  static const JOCTET mybuffer[4] = {
+    (JOCTET)0xFF, (JOCTET)JPEG_EOI, 0, 0
+  };
+
+  /* The whole JPEG data is expected to reside in the supplied memory
+   * buffer, so any request for more data beyond the given buffer size
+   * is treated as an error.
+   */
+  WARNMS(cinfo, JWRN_JPEG_EOF);
+
+  /* Insert a fake EOI marker */
+
+  cinfo->src->next_input_byte = mybuffer;
+  cinfo->src->bytes_in_buffer = 2;
+
+  return TRUE;
+}
+
+
+/*
+ * Skip data --- used to skip over a potentially large amount of
+ * uninteresting data (such as an APPn marker).
+ *
+ * Writers of suspendable-input applications must note that skip_input_data
+ * is not granted the right to give a suspension return.  If the skip extends
+ * beyond the data currently in the buffer, the buffer can be marked empty so
+ * that the next read will cause a fill_input_buffer call that can suspend.
+ * Arranging for additional bytes to be discarded before reloading the input
+ * buffer is the application writer's problem.
+ */
+
+METHODDEF(void)
+skip_input_data(j_decompress_ptr cinfo, long num_bytes)
+{
+  struct jpeg_source_mgr *src = cinfo->src;
+
+  /* Just a dumb implementation for now.  Could use fseek() except
+   * it doesn't work on pipes.  Not clear that being smart is worth
+   * any trouble anyway --- large skips are infrequent.
+   */
+  if (num_bytes > 0) {
+    while (num_bytes > (long)src->bytes_in_buffer) {
+      num_bytes -= (long)src->bytes_in_buffer;
+      (void)(*src->fill_input_buffer) (cinfo);
+      /* note we assume that fill_input_buffer will never return FALSE,
+       * so suspension need not be handled.
+       */
+    }
+    src->next_input_byte += (size_t)num_bytes;
+    src->bytes_in_buffer -= (size_t)num_bytes;
+  }
+}
+
+
+/*
+ * An additional method that can be provided by data source modules is the
+ * resync_to_restart method for error recovery in the presence of RST markers.
+ * For the moment, this source module just uses the default resync method
+ * provided by the JPEG library.  That method assumes that no backtracking
+ * is possible.
+ */
+
+
+/*
+ * Terminate source --- called by jpeg_finish_decompress
+ * after all data has been read.  Often a no-op.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_source(j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Prepare for input from a supplied memory buffer.
+ * The buffer must contain the whole JPEG data.
+ */
+
+GLOBAL(void)
+jpeg_mem_src_tj(j_decompress_ptr cinfo, const unsigned char *inbuffer,
+                size_t insize)
+{
+  struct jpeg_source_mgr *src;
+
+  if (inbuffer == NULL || insize == 0)  /* Treat empty input as fatal error */
+    ERREXIT(cinfo, JERR_INPUT_EMPTY);
+
+  /* The source object is made permanent so that a series of JPEG images
+   * can be read from the same buffer by calling jpeg_mem_src only before
+   * the first one.
+   */
+  if (cinfo->src == NULL) {     /* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                  sizeof(struct jpeg_source_mgr));
+  } else if (cinfo->src->init_source != init_mem_source) {
+    /* It is unsafe to reuse the existing source manager unless it was created
+     * by this function.
+     */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  }
+
+  src = cinfo->src;
+  src->init_source = init_mem_source;
+  src->fill_input_buffer = fill_mem_input_buffer;
+  src->skip_input_data = skip_input_data;
+  src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->term_source = term_source;
+  src->bytes_in_buffer = insize;
+  src->next_input_byte = (const JOCTET *)inbuffer;
+}
diff --git a/3rdparty/libjpeg-turbo/src/jdatasrc.c b/3rdparty/libjpeg-turbo/src/jdatasrc.c
index e36a30d894..dc135f43a4 100644
--- a/3rdparty/libjpeg-turbo/src/jdatasrc.c
+++ b/3rdparty/libjpeg-turbo/src/jdatasrc.c
@@ -56,13 +56,11 @@ init_source(j_decompress_ptr cinfo)
   src->start_of_file = TRUE;
 }
 
-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 METHODDEF(void)
 init_mem_source(j_decompress_ptr cinfo)
 {
   /* no work necessary here */
 }
-#endif
 
 
 /*
@@ -123,7 +121,6 @@ fill_input_buffer(j_decompress_ptr cinfo)
   return TRUE;
 }
 
-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 METHODDEF(boolean)
 fill_mem_input_buffer(j_decompress_ptr cinfo)
 {
@@ -144,7 +141,6 @@ fill_mem_input_buffer(j_decompress_ptr cinfo)
 
   return TRUE;
 }
-#endif
 
 
 /*
@@ -253,7 +249,6 @@ jpeg_stdio_src(j_decompress_ptr cinfo, FILE *infile)
 }
 
 
-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 /*
  * Prepare for input from a supplied memory buffer.
  * The buffer must contain the whole JPEG data.
@@ -292,4 +287,3 @@ jpeg_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer,
   src->bytes_in_buffer = (size_t)insize;
   src->next_input_byte = (const JOCTET *)inbuffer;
 }
-#endif
diff --git a/3rdparty/libjpeg-turbo/src/jdcoefct.c b/3rdparty/libjpeg-turbo/src/jdcoefct.c
index 15e6cded62..40ce27259b 100644
--- a/3rdparty/libjpeg-turbo/src/jdcoefct.c
+++ b/3rdparty/libjpeg-turbo/src/jdcoefct.c
@@ -5,13 +5,13 @@
  * Copyright (C) 1994-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2010, 2015-2016, 2019-2020, D. R. Commander.
+ * Copyright (C) 2010, 2015-2016, 2019-2020, 2022-2023, D. R. Commander.
  * Copyright (C) 2015, 2020, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
  * This file contains the coefficient buffer controller for decompression.
- * This controller is the top level of the JPEG decompressor proper.
+ * This controller is the top level of the lossy JPEG decompressor proper.
  * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
  *
  * In buffered-image mode, this controller is the interface between
@@ -21,19 +21,20 @@
 
 #include "jinclude.h"
 #include "jdcoefct.h"
-#include "jpegcomp.h"
+#include "jpegapicomp.h"
+#include "jsamplecomp.h"
 
 
 /* Forward declarations */
 METHODDEF(int) decompress_onepass(j_decompress_ptr cinfo,
-                                  JSAMPIMAGE output_buf);
+                                  _JSAMPIMAGE output_buf);
 #ifdef D_MULTISCAN_FILES_SUPPORTED
-METHODDEF(int) decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf);
+METHODDEF(int) decompress_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf);
 #endif
 #ifdef BLOCK_SMOOTHING_SUPPORTED
 LOCAL(boolean) smoothing_ok(j_decompress_ptr cinfo);
 METHODDEF(int) decompress_smooth_data(j_decompress_ptr cinfo,
-                                      JSAMPIMAGE output_buf);
+                                      _JSAMPIMAGE output_buf);
 #endif
 
 
@@ -62,9 +63,9 @@ start_output_pass(j_decompress_ptr cinfo)
   /* If multipass, check to see whether to use block smoothing on this pass */
   if (coef->pub.coef_arrays != NULL) {
     if (cinfo->do_block_smoothing && smoothing_ok(cinfo))
-      coef->pub.decompress_data = decompress_smooth_data;
+      coef->pub._decompress_data = decompress_smooth_data;
     else
-      coef->pub.decompress_data = decompress_data;
+      coef->pub._decompress_data = decompress_data;
   }
 #endif
   cinfo->output_iMCU_row = 0;
@@ -82,17 +83,17 @@ start_output_pass(j_decompress_ptr cinfo)
  */
 
 METHODDEF(int)
-decompress_onepass(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+decompress_onepass(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
 {
   my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
   JDIMENSION MCU_col_num;       /* index of current MCU within row */
   JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
   JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
   int blkn, ci, xindex, yindex, yoffset, useful_width;
-  JSAMPARRAY output_ptr;
+  _JSAMPARRAY output_ptr;
   JDIMENSION start_col, output_col;
   jpeg_component_info *compptr;
-  inverse_DCT_method_ptr inverse_DCT;
+  _inverse_DCT_method_ptr inverse_DCT;
 
   /* Loop to process as much as one whole iMCU row */
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
@@ -129,7 +130,7 @@ decompress_onepass(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
             blkn += compptr->MCU_blocks;
             continue;
           }
-          inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
+          inverse_DCT = cinfo->idct->_inverse_DCT[compptr->component_index];
           useful_width = (MCU_col_num < last_MCU_col) ?
                          compptr->MCU_width : compptr->last_col_width;
           output_ptr = output_buf[compptr->component_index] +
@@ -262,7 +263,7 @@ consume_data(j_decompress_ptr cinfo)
  */
 
 METHODDEF(int)
-decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+decompress_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
 {
   my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
   JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
@@ -270,10 +271,10 @@ decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
   int ci, block_row, block_rows;
   JBLOCKARRAY buffer;
   JBLOCKROW buffer_ptr;
-  JSAMPARRAY output_ptr;
+  _JSAMPARRAY output_ptr;
   JDIMENSION output_col;
   jpeg_component_info *compptr;
-  inverse_DCT_method_ptr inverse_DCT;
+  _inverse_DCT_method_ptr inverse_DCT;
 
   /* Force some input to be done if we are getting ahead of the input. */
   while (cinfo->input_scan_number < cinfo->output_scan_number ||
@@ -302,7 +303,7 @@ decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
       block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
       if (block_rows == 0) block_rows = compptr->v_samp_factor;
     }
-    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    inverse_DCT = cinfo->idct->_inverse_DCT[ci];
     output_ptr = output_buf[ci];
     /* Loop over all DCT blocks to be processed. */
     for (block_row = 0; block_row < block_rows; block_row++) {
@@ -425,19 +426,20 @@ smoothing_ok(j_decompress_ptr cinfo)
  */
 
 METHODDEF(int)
-decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+decompress_smooth_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
 {
   my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
   JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
   JDIMENSION block_num, last_block_column;
-  int ci, block_row, block_rows, access_rows;
+  int ci, block_row, block_rows, access_rows, image_block_row,
+    image_block_rows;
   JBLOCKARRAY buffer;
   JBLOCKROW buffer_ptr, prev_prev_block_row, prev_block_row;
   JBLOCKROW next_block_row, next_next_block_row;
-  JSAMPARRAY output_ptr;
+  _JSAMPARRAY output_ptr;
   JDIMENSION output_col;
   jpeg_component_info *compptr;
-  inverse_DCT_method_ptr inverse_DCT;
+  _inverse_DCT_method_ptr inverse_DCT;
   boolean change_dc;
   JCOEF *workspace;
   int *coef_bits;
@@ -475,7 +477,7 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
     if (!compptr->component_needed)
       continue;
     /* Count non-dummy DCT block rows in this iMCU row. */
-    if (cinfo->output_iMCU_row < last_iMCU_row - 1) {
+    if (cinfo->output_iMCU_row + 1 < last_iMCU_row) {
       block_rows = compptr->v_samp_factor;
       access_rows = block_rows * 3; /* this and next two iMCU rows */
     } else if (cinfo->output_iMCU_row < last_iMCU_row) {
@@ -496,6 +498,7 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
          (JDIMENSION)access_rows, FALSE);
       buffer += 2 * compptr->v_samp_factor; /* point to current iMCU row */
     } else if (cinfo->output_iMCU_row > 0) {
+      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
       buffer = (*cinfo->mem->access_virt_barray)
         ((j_common_ptr)cinfo, coef->whole_image[ci],
          (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
@@ -535,32 +538,33 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
       Q21 = quanttbl->quantval[Q21_POS];
       Q30 = quanttbl->quantval[Q30_POS];
     }
-    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    inverse_DCT = cinfo->idct->_inverse_DCT[ci];
     output_ptr = output_buf[ci];
     /* Loop over all DCT blocks to be processed. */
+    image_block_rows = block_rows * cinfo->total_iMCU_rows;
     for (block_row = 0; block_row < block_rows; block_row++) {
+      image_block_row = cinfo->output_iMCU_row * block_rows + block_row;
       buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci];
 
-      if (block_row > 0 || cinfo->output_iMCU_row > 0)
+      if (image_block_row > 0)
         prev_block_row =
           buffer[block_row - 1] + cinfo->master->first_MCU_col[ci];
       else
         prev_block_row = buffer_ptr;
 
-      if (block_row > 1 || cinfo->output_iMCU_row > 1)
+      if (image_block_row > 1)
         prev_prev_block_row =
           buffer[block_row - 2] + cinfo->master->first_MCU_col[ci];
       else
         prev_prev_block_row = prev_block_row;
 
-      if (block_row < block_rows - 1 || cinfo->output_iMCU_row < last_iMCU_row)
+      if (image_block_row < image_block_rows - 1)
         next_block_row =
           buffer[block_row + 1] + cinfo->master->first_MCU_col[ci];
       else
         next_block_row = buffer_ptr;
 
-      if (block_row < block_rows - 2 ||
-          cinfo->output_iMCU_row < last_iMCU_row - 1)
+      if (image_block_row < image_block_rows - 2)
         next_next_block_row =
           buffer[block_row + 2] + cinfo->master->first_MCU_col[ci];
       else
@@ -583,11 +587,11 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
         /* Update DC values */
         if (block_num == cinfo->master->first_MCU_col[ci] &&
             block_num < last_block_column) {
-          DC04 = (int)prev_prev_block_row[1][0];
-          DC09 = (int)prev_block_row[1][0];
-          DC14 = (int)buffer_ptr[1][0];
-          DC19 = (int)next_block_row[1][0];
-          DC24 = (int)next_next_block_row[1][0];
+          DC04 = DC05 = (int)prev_prev_block_row[1][0];
+          DC09 = DC10 = (int)prev_block_row[1][0];
+          DC14 = DC15 = (int)buffer_ptr[1][0];
+          DC19 = DC20 = (int)next_block_row[1][0];
+          DC24 = DC25 = (int)next_next_block_row[1][0];
         }
         if (block_num + 1 < last_block_column) {
           DC05 = (int)prev_prev_block_row[2][0];
@@ -810,10 +814,13 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
  */
 
 GLOBAL(void)
-jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
+_jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
 {
   my_coef_ptr coef;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   coef = (my_coef_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(my_coef_controller));
@@ -850,7 +857,7 @@ jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
          (JDIMENSION)access_rows);
     }
     coef->pub.consume_data = consume_data;
-    coef->pub.decompress_data = decompress_data;
+    coef->pub._decompress_data = decompress_data;
     coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
 #else
     ERREXIT(cinfo, JERR_NOT_COMPILED);
@@ -867,7 +874,7 @@ jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
       coef->MCU_buffer[i] = buffer + i;
     }
     coef->pub.consume_data = dummy_consume_data;
-    coef->pub.decompress_data = decompress_onepass;
+    coef->pub._decompress_data = decompress_onepass;
     coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
   }
 
diff --git a/3rdparty/libjpeg-turbo/src/jdcoefct.h b/3rdparty/libjpeg-turbo/src/jdcoefct.h
index 9a0e780663..bbe9e97051 100644
--- a/3rdparty/libjpeg-turbo/src/jdcoefct.h
+++ b/3rdparty/libjpeg-turbo/src/jdcoefct.h
@@ -6,6 +6,7 @@
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2020, Google, Inc.
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  */
@@ -14,6 +15,8 @@
 #include "jpeglib.h"
 
 
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
 /* Block smoothing is only applicable for progressive JPEG, so: */
 #ifndef D_PROGRESSIVE_SUPPORTED
 #undef BLOCK_SMOOTHING_SUPPORTED
@@ -81,3 +84,5 @@ start_iMCU_row(j_decompress_ptr cinfo)
   coef->MCU_ctr = 0;
   coef->MCU_vert_offset = 0;
 }
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
diff --git a/3rdparty/libjpeg-turbo/src/jdcol565.c b/3rdparty/libjpeg-turbo/src/jdcol565.c
index 53c7bd9187..2172d98fda 100644
--- a/3rdparty/libjpeg-turbo/src/jdcol565.c
+++ b/3rdparty/libjpeg-turbo/src/jdcol565.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modifications:
  * Copyright (C) 2013, Linaro Limited.
- * Copyright (C) 2014-2015, D. R. Commander.
+ * Copyright (C) 2014-2015, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -17,18 +17,19 @@
 
 INLINE
 LOCAL(void)
-ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                            JDIMENSION input_row, JSAMPARRAY output_buf,
+ycc_rgb565_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                            JDIMENSION input_row, _JSAMPARRAY output_buf,
                             int num_rows)
 {
+#if BITS_IN_JSAMPLE != 16
   my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
   register int y, cb, cr;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->output_width;
   /* copy these pointers into registers if possible */
-  register JSAMPLE *range_limit = cinfo->sample_range_limit;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   register int *Crrtab = cconvert->Cr_r_tab;
   register int *Cbbtab = cconvert->Cb_b_tab;
   register JLONG *Crgtab = cconvert->Cr_g_tab;
@@ -91,23 +92,27 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
       *(INT16 *)outptr = (INT16)rgb;
     }
   }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
 }
 
 
 INLINE
 LOCAL(void)
-ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                             JDIMENSION input_row, JSAMPARRAY output_buf,
+ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                             JDIMENSION input_row, _JSAMPARRAY output_buf,
                              int num_rows)
 {
+#if BITS_IN_JSAMPLE != 16
   my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
   register int y, cb, cr;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->output_width;
   /* copy these pointers into registers if possible */
-  register JSAMPLE *range_limit = cinfo->sample_range_limit;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   register int *Crrtab = cconvert->Cr_r_tab;
   register int *Cbbtab = cconvert->Cb_b_tab;
   register JLONG *Crgtab = cconvert->Cr_g_tab;
@@ -177,17 +182,20 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
       *(INT16 *)outptr = (INT16)rgb;
     }
   }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
 }
 
 
 INLINE
 LOCAL(void)
-rgb_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                            JDIMENSION input_row, JSAMPARRAY output_buf,
+rgb_rgb565_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                            JDIMENSION input_row, _JSAMPARRAY output_buf,
                             int num_rows)
 {
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->output_width;
   SHIFT_TEMPS
@@ -237,14 +245,14 @@ rgb_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 INLINE
 LOCAL(void)
-rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                             JDIMENSION input_row, JSAMPARRAY output_buf,
+rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                             JDIMENSION input_row, _JSAMPARRAY output_buf,
                              int num_rows)
 {
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
   register JDIMENSION col;
-  register JSAMPLE *range_limit = cinfo->sample_range_limit;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   JDIMENSION num_cols = cinfo->output_width;
   JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
   SHIFT_TEMPS
@@ -296,11 +304,11 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 INLINE
 LOCAL(void)
-gray_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                             JDIMENSION input_row, JSAMPARRAY output_buf,
+gray_rgb565_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                             JDIMENSION input_row, _JSAMPARRAY output_buf,
                              int num_rows)
 {
-  register JSAMPROW inptr, outptr;
+  register _JSAMPROW inptr, outptr;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->output_width;
 
@@ -336,13 +344,13 @@ gray_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 INLINE
 LOCAL(void)
-gray_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                              JDIMENSION input_row, JSAMPARRAY output_buf,
+gray_rgb565D_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                              JDIMENSION input_row, _JSAMPARRAY output_buf,
                               int num_rows)
 {
-  register JSAMPROW inptr, outptr;
+  register _JSAMPROW inptr, outptr;
   register JDIMENSION col;
-  register JSAMPLE *range_limit = cinfo->sample_range_limit;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   JDIMENSION num_cols = cinfo->output_width;
   JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
 
diff --git a/3rdparty/libjpeg-turbo/src/jdcolext.c b/3rdparty/libjpeg-turbo/src/jdcolext.c
index 863c7a2fbc..f22e29d722 100644
--- a/3rdparty/libjpeg-turbo/src/jdcolext.c
+++ b/3rdparty/libjpeg-turbo/src/jdcolext.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2011, 2015, D. R. Commander.
+ * Copyright (C) 2009, 2011, 2015, 2022-2023, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -28,18 +28,19 @@
 
 INLINE
 LOCAL(void)
-ycc_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                         JDIMENSION input_row, JSAMPARRAY output_buf,
+ycc_rgb_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                         JDIMENSION input_row, _JSAMPARRAY output_buf,
                          int num_rows)
 {
+#if BITS_IN_JSAMPLE != 16
   my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
   register int y, cb, cr;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->output_width;
   /* copy these pointers into registers if possible */
-  register JSAMPLE *range_limit = cinfo->sample_range_limit;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   register int *Crrtab = cconvert->Cr_r_tab;
   register int *Cbbtab = cconvert->Cb_b_tab;
   register JLONG *Crgtab = cconvert->Cr_g_tab;
@@ -62,14 +63,17 @@ ycc_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                               ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
                                                 SCALEBITS))];
       outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
-      /* Set unused byte to 0xFF so it can be interpreted as an opaque */
-      /* alpha channel value */
+      /* Set unused byte to _MAXJSAMPLE so it can be interpreted as an */
+      /* opaque alpha channel value */
 #ifdef RGB_ALPHA
-      outptr[RGB_ALPHA] = 0xFF;
+      outptr[RGB_ALPHA] = _MAXJSAMPLE;
 #endif
       outptr += RGB_PIXELSIZE;
     }
   }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
 }
 
 
@@ -81,11 +85,11 @@ ycc_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 INLINE
 LOCAL(void)
-gray_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                          JDIMENSION input_row, JSAMPARRAY output_buf,
+gray_rgb_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                          JDIMENSION input_row, _JSAMPARRAY output_buf,
                           int num_rows)
 {
-  register JSAMPROW inptr, outptr;
+  register _JSAMPROW inptr, outptr;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->output_width;
 
@@ -94,10 +98,10 @@ gray_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr = *output_buf++;
     for (col = 0; col < num_cols; col++) {
       outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
-      /* Set unused byte to 0xFF so it can be interpreted as an opaque */
-      /* alpha channel value */
+      /* Set unused byte to _MAXJSAMPLE so it can be interpreted as an */
+      /* opaque alpha channel value */
 #ifdef RGB_ALPHA
-      outptr[RGB_ALPHA] = 0xFF;
+      outptr[RGB_ALPHA] = _MAXJSAMPLE;
 #endif
       outptr += RGB_PIXELSIZE;
     }
@@ -111,12 +115,12 @@ gray_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 INLINE
 LOCAL(void)
-rgb_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                         JDIMENSION input_row, JSAMPARRAY output_buf,
+rgb_rgb_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                         JDIMENSION input_row, _JSAMPARRAY output_buf,
                          int num_rows)
 {
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
+  register _JSAMPROW outptr;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->output_width;
 
@@ -130,10 +134,10 @@ rgb_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
       outptr[RGB_RED] = inptr0[col];
       outptr[RGB_GREEN] = inptr1[col];
       outptr[RGB_BLUE] = inptr2[col];
-      /* Set unused byte to 0xFF so it can be interpreted as an opaque */
-      /* alpha channel value */
+      /* Set unused byte to _MAXJSAMPLE so it can be interpreted as an */
+      /* opaque alpha channel value */
 #ifdef RGB_ALPHA
-      outptr[RGB_ALPHA] = 0xFF;
+      outptr[RGB_ALPHA] = _MAXJSAMPLE;
 #endif
       outptr += RGB_PIXELSIZE;
     }
diff --git a/3rdparty/libjpeg-turbo/src/jdcolor.c b/3rdparty/libjpeg-turbo/src/jdcolor.c
index 8da2b4eaf2..e5c7b58ebf 100644
--- a/3rdparty/libjpeg-turbo/src/jdcolor.c
+++ b/3rdparty/libjpeg-turbo/src/jdcolor.c
@@ -6,7 +6,7 @@
  * Modified 2011 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009, 2011-2012, 2014-2015, D. R. Commander.
+ * Copyright (C) 2009, 2011-2012, 2014-2015, 2022, D. R. Commander.
  * Copyright (C) 2013, Linaro Limited.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -18,14 +18,17 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
-#include "jconfigint.h"
+#include "jsamplecomp.h"
 
 
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
 /* Private subobject */
 
 typedef struct {
   struct jpeg_color_deconverter pub; /* public fields */
 
+#if BITS_IN_JSAMPLE != 16
   /* Private state for YCC->RGB conversion */
   int *Cr_r_tab;                /* => table for Cr to R conversion */
   int *Cb_b_tab;                /* => table for Cb to B conversion */
@@ -34,6 +37,7 @@ typedef struct {
 
   /* Private state for RGB->Y conversion */
   JLONG *rgb_y_tab;             /* => table for RGB to Y conversion */
+#endif
 } my_color_deconverter;
 
 typedef my_color_deconverter *my_cconvert_ptr;
@@ -44,7 +48,7 @@ typedef my_color_deconverter *my_cconvert_ptr;
 
 /*
  * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
- * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * normalized to the range 0.._MAXJSAMPLE rather than -0.5 .. 0.5.
  * The conversion equations to be implemented are therefore
  *
  *      R = Y                + 1.40200 * Cr
@@ -53,7 +57,7 @@ typedef my_color_deconverter *my_cconvert_ptr;
  *
  *      Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
  *
- * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
+ * where Cb and Cr represent the incoming values less _CENTERJSAMPLE.
  * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
  *
  * To avoid floating-point arithmetic, we represent the fractional constants
@@ -64,7 +68,7 @@ typedef my_color_deconverter *my_cconvert_ptr;
  *
  * For even more speed, we avoid doing any multiplications in the inner loop
  * by precalculating the constants times Cb and Cr for all possible values.
- * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * For 8-bit samples this is very reasonable (only 256 entries per table);
  * for 12-bit samples it is still acceptable.  It's not very reasonable for
  * 16-bit samples, but if you want lossless storage you shouldn't be changing
  * colorspace anyway.
@@ -85,9 +89,9 @@ typedef my_color_deconverter *my_cconvert_ptr;
  */
 
 #define R_Y_OFF         0                       /* offset to R => Y section */
-#define G_Y_OFF         (1 * (MAXJSAMPLE + 1))  /* offset to G => Y section */
-#define B_Y_OFF         (2 * (MAXJSAMPLE + 1))  /* etc. */
-#define TABLE_SIZE      (3 * (MAXJSAMPLE + 1))
+#define G_Y_OFF         (1 * (_MAXJSAMPLE + 1)) /* offset to G => Y section */
+#define B_Y_OFF         (2 * (_MAXJSAMPLE + 1)) /* etc. */
+#define TABLE_SIZE      (3 * (_MAXJSAMPLE + 1))
 
 
 /* Include inline routines for colorspace extensions */
@@ -210,6 +214,7 @@ typedef my_color_deconverter *my_cconvert_ptr;
 LOCAL(void)
 build_ycc_rgb_table(j_decompress_ptr cinfo)
 {
+#if BITS_IN_JSAMPLE != 16
   my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
   int i;
   JLONG x;
@@ -217,20 +222,20 @@ build_ycc_rgb_table(j_decompress_ptr cinfo)
 
   cconvert->Cr_r_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                (MAXJSAMPLE + 1) * sizeof(int));
+                                (_MAXJSAMPLE + 1) * sizeof(int));
   cconvert->Cb_b_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                (MAXJSAMPLE + 1) * sizeof(int));
+                                (_MAXJSAMPLE + 1) * sizeof(int));
   cconvert->Cr_g_tab = (JLONG *)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                (MAXJSAMPLE + 1) * sizeof(JLONG));
+                                (_MAXJSAMPLE + 1) * sizeof(JLONG));
   cconvert->Cb_g_tab = (JLONG *)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                (MAXJSAMPLE + 1) * sizeof(JLONG));
+                                (_MAXJSAMPLE + 1) * sizeof(JLONG));
 
-  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
-    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
-    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+  for (i = 0, x = -_CENTERJSAMPLE; i <= _MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0.._MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - _CENTERJSAMPLE */
     /* Cr=>R value is nearest int to 1.40200 * x */
     cconvert->Cr_r_tab[i] = (int)
                     RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
@@ -243,6 +248,9 @@ build_ycc_rgb_table(j_decompress_ptr cinfo)
     /* We also add in ONE_HALF so that need not do it in inner loop */
     cconvert->Cb_g_tab[i] = (-FIX(0.34414)) * x + ONE_HALF;
   }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
 }
 
 
@@ -251,8 +259,8 @@ build_ycc_rgb_table(j_decompress_ptr cinfo)
  */
 
 METHODDEF(void)
-ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+ycc_rgb_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
@@ -301,6 +309,7 @@ ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 LOCAL(void)
 build_rgb_y_table(j_decompress_ptr cinfo)
 {
+#if BITS_IN_JSAMPLE != 16
   my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
   JLONG *rgb_y_tab;
   JLONG i;
@@ -310,11 +319,14 @@ build_rgb_y_table(j_decompress_ptr cinfo)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 (TABLE_SIZE * sizeof(JLONG)));
 
-  for (i = 0; i <= MAXJSAMPLE; i++) {
+  for (i = 0; i <= _MAXJSAMPLE; i++) {
     rgb_y_tab[i + R_Y_OFF] = FIX(0.29900) * i;
     rgb_y_tab[i + G_Y_OFF] = FIX(0.58700) * i;
     rgb_y_tab[i + B_Y_OFF] = FIX(0.11400) * i + ONE_HALF;
   }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
 }
 
 
@@ -323,14 +335,15 @@ build_rgb_y_table(j_decompress_ptr cinfo)
  */
 
 METHODDEF(void)
-rgb_gray_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                 JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+rgb_gray_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                 JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
+#if BITS_IN_JSAMPLE != 16
   my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
   register int r, g, b;
   register JLONG *ctab = cconvert->rgb_y_tab;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->output_width;
 
@@ -345,10 +358,13 @@ rgb_gray_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
       g = inptr1[col];
       b = inptr2[col];
       /* Y */
-      outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
-                               ctab[b + B_Y_OFF]) >> SCALEBITS);
+      outptr[col] = (_JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
+                                ctab[b + B_Y_OFF]) >> SCALEBITS);
     }
   }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
 }
 
 
@@ -358,10 +374,10 @@ rgb_gray_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  */
 
 METHODDEF(void)
-null_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-             JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+null_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+             JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
-  register JSAMPROW inptr, inptr0, inptr1, inptr2, inptr3, outptr;
+  register _JSAMPROW inptr, inptr0, inptr1, inptr2, inptr3, outptr;
   register JDIMENSION col;
   register int num_components = cinfo->num_components;
   JDIMENSION num_cols = cinfo->output_width;
@@ -419,11 +435,11 @@ null_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  */
 
 METHODDEF(void)
-grayscale_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                  JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+grayscale_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                  JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
-  jcopy_sample_rows(input_buf[0], (int)input_row, output_buf, 0, num_rows,
-                    cinfo->output_width);
+  _jcopy_sample_rows(input_buf[0], (int)input_row, output_buf, 0, num_rows,
+                     cinfo->output_width);
 }
 
 
@@ -432,8 +448,8 @@ grayscale_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  */
 
 METHODDEF(void)
-gray_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                 JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+gray_rgb_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                 JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
@@ -477,8 +493,8 @@ gray_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  */
 
 METHODDEF(void)
-rgb_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+rgb_rgb_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
@@ -525,17 +541,18 @@ rgb_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  */
 
 METHODDEF(void)
-ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                  JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+ycck_cmyk_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                  JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
+#if BITS_IN_JSAMPLE != 16
   my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
   register int y, cb, cr;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2, inptr3;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->output_width;
   /* copy these pointers into registers if possible */
-  register JSAMPLE *range_limit = cinfo->sample_range_limit;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   register int *Crrtab = cconvert->Cr_r_tab;
   register int *Cbbtab = cconvert->Cb_b_tab;
   register JLONG *Crgtab = cconvert->Cr_g_tab;
@@ -554,16 +571,19 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
       cb = inptr1[col];
       cr = inptr2[col];
       /* Range-limiting is essential due to noise introduced by DCT losses. */
-      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];   /* red */
-      outptr[1] = range_limit[MAXJSAMPLE - (y +                 /* green */
+      outptr[0] = range_limit[_MAXJSAMPLE - (y + Crrtab[cr])];  /* red */
+      outptr[1] = range_limit[_MAXJSAMPLE - (y +                /* green */
                               ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
                                                  SCALEBITS)))];
-      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];   /* blue */
+      outptr[2] = range_limit[_MAXJSAMPLE - (y + Cbbtab[cb])];  /* blue */
       /* K passes through unchanged */
       outptr[3] = inptr3[col];
       outptr += 4;
     }
   }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
 }
 
 
@@ -653,8 +673,8 @@ static INLINE boolean is_big_endian(void)
 
 
 METHODDEF(void)
-ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                   JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+ycc_rgb565_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                   JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
   if (is_big_endian())
     ycc_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
@@ -664,8 +684,8 @@ ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 
 METHODDEF(void)
-ycc_rgb565D_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                    JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+ycc_rgb565D_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                    JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
   if (is_big_endian())
     ycc_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
@@ -675,8 +695,8 @@ ycc_rgb565D_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 
 METHODDEF(void)
-rgb_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                   JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+rgb_rgb565_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                   JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
   if (is_big_endian())
     rgb_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
@@ -686,8 +706,8 @@ rgb_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 
 METHODDEF(void)
-rgb_rgb565D_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                    JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+rgb_rgb565D_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                    JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
   if (is_big_endian())
     rgb_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
@@ -697,8 +717,8 @@ rgb_rgb565D_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 
 METHODDEF(void)
-gray_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                    JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+gray_rgb565_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                    JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
   if (is_big_endian())
     gray_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
@@ -708,8 +728,8 @@ gray_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 
 METHODDEF(void)
-gray_rgb565D_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                     JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
+gray_rgb565D_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                     JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
 {
   if (is_big_endian())
     gray_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
@@ -734,11 +754,14 @@ start_pass_dcolor(j_decompress_ptr cinfo)
  */
 
 GLOBAL(void)
-jinit_color_deconverter(j_decompress_ptr cinfo)
+_jinit_color_deconverter(j_decompress_ptr cinfo)
 {
   my_cconvert_ptr cconvert;
   int ci;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   cconvert = (my_cconvert_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(my_color_deconverter));
@@ -773,19 +796,24 @@ jinit_color_deconverter(j_decompress_ptr cinfo)
   /* Set out_color_components and conversion method based on requested space.
    * Also clear the component_needed flags for any unused components,
    * so that earlier pipeline stages can avoid useless computation.
+   * NOTE: We do not allow any lossy color conversion algorithms in lossless
+   * mode.
    */
 
   switch (cinfo->out_color_space) {
   case JCS_GRAYSCALE:
+    if (cinfo->master->lossless &&
+        cinfo->jpeg_color_space != cinfo->out_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     cinfo->out_color_components = 1;
     if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
         cinfo->jpeg_color_space == JCS_YCbCr) {
-      cconvert->pub.color_convert = grayscale_convert;
+      cconvert->pub._color_convert = grayscale_convert;
       /* For color->grayscale conversion, only the Y (0) component is needed */
       for (ci = 1; ci < cinfo->num_components; ci++)
         cinfo->comp_info[ci].component_needed = FALSE;
     } else if (cinfo->jpeg_color_space == JCS_RGB) {
-      cconvert->pub.color_convert = rgb_gray_convert;
+      cconvert->pub._color_convert = rgb_gray_convert;
       build_rgb_y_table(cinfo);
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
@@ -802,65 +830,78 @@ jinit_color_deconverter(j_decompress_ptr cinfo)
   case JCS_EXT_BGRA:
   case JCS_EXT_ABGR:
   case JCS_EXT_ARGB:
+    if (cinfo->master->lossless && cinfo->jpeg_color_space != JCS_RGB)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
     if (cinfo->jpeg_color_space == JCS_YCbCr) {
+#ifdef WITH_SIMD
       if (jsimd_can_ycc_rgb())
-        cconvert->pub.color_convert = jsimd_ycc_rgb_convert;
-      else {
-        cconvert->pub.color_convert = ycc_rgb_convert;
+        cconvert->pub._color_convert = jsimd_ycc_rgb_convert;
+      else
+#endif
+      {
+        cconvert->pub._color_convert = ycc_rgb_convert;
         build_ycc_rgb_table(cinfo);
       }
     } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
-      cconvert->pub.color_convert = gray_rgb_convert;
+      cconvert->pub._color_convert = gray_rgb_convert;
     } else if (cinfo->jpeg_color_space == JCS_RGB) {
       if (rgb_red[cinfo->out_color_space] == 0 &&
           rgb_green[cinfo->out_color_space] == 1 &&
           rgb_blue[cinfo->out_color_space] == 2 &&
           rgb_pixelsize[cinfo->out_color_space] == 3)
-        cconvert->pub.color_convert = null_convert;
+        cconvert->pub._color_convert = null_convert;
       else
-        cconvert->pub.color_convert = rgb_rgb_convert;
+        cconvert->pub._color_convert = rgb_rgb_convert;
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
   case JCS_RGB565:
+    if (cinfo->master->lossless)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     cinfo->out_color_components = 3;
     if (cinfo->dither_mode == JDITHER_NONE) {
       if (cinfo->jpeg_color_space == JCS_YCbCr) {
+#ifdef WITH_SIMD
         if (jsimd_can_ycc_rgb565())
-          cconvert->pub.color_convert = jsimd_ycc_rgb565_convert;
-        else {
-          cconvert->pub.color_convert = ycc_rgb565_convert;
+          cconvert->pub._color_convert = jsimd_ycc_rgb565_convert;
+        else
+#endif
+        {
+          cconvert->pub._color_convert = ycc_rgb565_convert;
           build_ycc_rgb_table(cinfo);
         }
       } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
-        cconvert->pub.color_convert = gray_rgb565_convert;
+        cconvert->pub._color_convert = gray_rgb565_convert;
       } else if (cinfo->jpeg_color_space == JCS_RGB) {
-        cconvert->pub.color_convert = rgb_rgb565_convert;
+        cconvert->pub._color_convert = rgb_rgb565_convert;
       } else
         ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     } else {
       /* only ordered dithering is supported */
       if (cinfo->jpeg_color_space == JCS_YCbCr) {
-        cconvert->pub.color_convert = ycc_rgb565D_convert;
+        cconvert->pub._color_convert = ycc_rgb565D_convert;
         build_ycc_rgb_table(cinfo);
       } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
-        cconvert->pub.color_convert = gray_rgb565D_convert;
+        cconvert->pub._color_convert = gray_rgb565D_convert;
       } else if (cinfo->jpeg_color_space == JCS_RGB) {
-        cconvert->pub.color_convert = rgb_rgb565D_convert;
+        cconvert->pub._color_convert = rgb_rgb565D_convert;
       } else
         ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     }
     break;
 
   case JCS_CMYK:
+    if (cinfo->master->lossless &&
+        cinfo->jpeg_color_space != cinfo->out_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     cinfo->out_color_components = 4;
     if (cinfo->jpeg_color_space == JCS_YCCK) {
-      cconvert->pub.color_convert = ycck_cmyk_convert;
+      cconvert->pub._color_convert = ycck_cmyk_convert;
       build_ycc_rgb_table(cinfo);
     } else if (cinfo->jpeg_color_space == JCS_CMYK) {
-      cconvert->pub.color_convert = null_convert;
+      cconvert->pub._color_convert = null_convert;
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
@@ -869,7 +910,7 @@ jinit_color_deconverter(j_decompress_ptr cinfo)
     /* Permit null conversion to same output space */
     if (cinfo->out_color_space == cinfo->jpeg_color_space) {
       cinfo->out_color_components = cinfo->num_components;
-      cconvert->pub.color_convert = null_convert;
+      cconvert->pub._color_convert = null_convert;
     } else                      /* unsupported non-null conversion */
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
@@ -880,3 +921,5 @@ jinit_color_deconverter(j_decompress_ptr cinfo)
   else
     cinfo->output_components = cinfo->out_color_components;
 }
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
diff --git a/3rdparty/libjpeg-turbo/src/jdct.h b/3rdparty/libjpeg-turbo/src/jdct.h
index 66d1718b77..0411a79bc0 100644
--- a/3rdparty/libjpeg-turbo/src/jdct.h
+++ b/3rdparty/libjpeg-turbo/src/jdct.h
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015, D. R. Commander.
+ * Copyright (C) 2015, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -15,13 +15,15 @@
  * machine-dependent tuning (e.g., assembly coding).
  */
 
+#include "jsamplecomp.h"
+
 
 /*
  * A forward DCT routine is given a pointer to a work area of type DCTELEM[];
  * the DCT is to be performed in-place in that buffer.  Type DCTELEM is int
  * for 8-bit samples, JLONG for 12-bit samples.  (NOTE: Floating-point DCT
  * implementations use an array of type FAST_FLOAT, instead.)
- * The DCT inputs are expected to be signed (range +-CENTERJSAMPLE).
+ * The DCT inputs are expected to be signed (range +-_CENTERJSAMPLE).
  * The DCT outputs are returned scaled up by a factor of 8; they therefore
  * have a range of +-8K for 8-bit data, +-128K for 12-bit data.  This
  * convention improves accuracy in integer implementations and saves some
@@ -76,78 +78,89 @@ typedef FAST_FLOAT FLOAT_MULT_TYPE;  /* preferred floating type */
 
 /*
  * Each IDCT routine is responsible for range-limiting its results and
- * converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
+ * converting them to unsigned form (0.._MAXJSAMPLE).  The raw outputs could
  * be quite far out of range if the input data is corrupt, so a bulletproof
  * range-limiting step is required.  We use a mask-and-table-lookup method
  * to do the combined operations quickly.  See the comments with
  * prepare_range_limit_table (in jdmaster.c) for more info.
  */
 
-#define IDCT_range_limit(cinfo)  ((cinfo)->sample_range_limit + CENTERJSAMPLE)
+#define IDCT_range_limit(cinfo) \
+  ((_JSAMPLE *)((cinfo)->sample_range_limit) + _CENTERJSAMPLE)
 
-#define RANGE_MASK  (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
+#define RANGE_MASK  (_MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
 
 
 /* Extern declarations for the forward and inverse DCT routines. */
 
-EXTERN(void) jpeg_fdct_islow(DCTELEM *data);
-EXTERN(void) jpeg_fdct_ifast(DCTELEM *data);
+EXTERN(void) _jpeg_fdct_islow(DCTELEM *data);
+EXTERN(void) _jpeg_fdct_ifast(DCTELEM *data);
 EXTERN(void) jpeg_fdct_float(FAST_FLOAT *data);
 
-EXTERN(void) jpeg_idct_islow(j_decompress_ptr cinfo,
-                             jpeg_component_info *compptr, JCOEFPTR coef_block,
-                             JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_ifast(j_decompress_ptr cinfo,
-                             jpeg_component_info *compptr, JCOEFPTR coef_block,
-                             JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_float(j_decompress_ptr cinfo,
-                             jpeg_component_info *compptr, JCOEFPTR coef_block,
-                             JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_7x7(j_decompress_ptr cinfo,
-                           jpeg_component_info *compptr, JCOEFPTR coef_block,
-                           JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_6x6(j_decompress_ptr cinfo,
-                           jpeg_component_info *compptr, JCOEFPTR coef_block,
-                           JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_5x5(j_decompress_ptr cinfo,
-                           jpeg_component_info *compptr, JCOEFPTR coef_block,
-                           JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_4x4(j_decompress_ptr cinfo,
-                           jpeg_component_info *compptr, JCOEFPTR coef_block,
-                           JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_3x3(j_decompress_ptr cinfo,
-                           jpeg_component_info *compptr, JCOEFPTR coef_block,
-                           JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_2x2(j_decompress_ptr cinfo,
-                           jpeg_component_info *compptr, JCOEFPTR coef_block,
-                           JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_1x1(j_decompress_ptr cinfo,
-                           jpeg_component_info *compptr, JCOEFPTR coef_block,
-                           JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_9x9(j_decompress_ptr cinfo,
-                           jpeg_component_info *compptr, JCOEFPTR coef_block,
-                           JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_10x10(j_decompress_ptr cinfo,
-                             jpeg_component_info *compptr, JCOEFPTR coef_block,
-                             JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_11x11(j_decompress_ptr cinfo,
-                             jpeg_component_info *compptr, JCOEFPTR coef_block,
-                             JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_12x12(j_decompress_ptr cinfo,
-                             jpeg_component_info *compptr, JCOEFPTR coef_block,
-                             JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_13x13(j_decompress_ptr cinfo,
-                             jpeg_component_info *compptr, JCOEFPTR coef_block,
-                             JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_14x14(j_decompress_ptr cinfo,
-                             jpeg_component_info *compptr, JCOEFPTR coef_block,
-                             JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_15x15(j_decompress_ptr cinfo,
-                             jpeg_component_info *compptr, JCOEFPTR coef_block,
-                             JSAMPARRAY output_buf, JDIMENSION output_col);
-EXTERN(void) jpeg_idct_16x16(j_decompress_ptr cinfo,
-                             jpeg_component_info *compptr, JCOEFPTR coef_block,
-                             JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_islow(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_ifast(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_float(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_7x7(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_6x6(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_5x5(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_4x4(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_3x3(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_2x2(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_1x1(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_9x9(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_10x10(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_11x11(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_12x12(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_13x13(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_14x14(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_15x15(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_16x16(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
 
 
 /*
diff --git a/3rdparty/libjpeg-turbo/src/jddctmgr.c b/3rdparty/libjpeg-turbo/src/jddctmgr.c
index e78d7bebe2..0bd8c2b591 100644
--- a/3rdparty/libjpeg-turbo/src/jddctmgr.c
+++ b/3rdparty/libjpeg-turbo/src/jddctmgr.c
@@ -26,7 +26,7 @@
 #include "jpeglib.h"
 #include "jdct.h"               /* Private declarations for DCT subsystem */
 #include "jsimddct.h"
-#include "jpegcomp.h"
+#include "jpegapicomp.h"
 
 
 /*
@@ -100,7 +100,7 @@ start_pass(j_decompress_ptr cinfo)
   int ci, i;
   jpeg_component_info *compptr;
   int method = 0;
-  inverse_DCT_method_ptr method_ptr = NULL;
+  _inverse_DCT_method_ptr method_ptr = NULL;
   JQUANT_TBL *qtbl;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
@@ -109,42 +109,46 @@ start_pass(j_decompress_ptr cinfo)
     switch (compptr->_DCT_scaled_size) {
 #ifdef IDCT_SCALING_SUPPORTED
     case 1:
-      method_ptr = jpeg_idct_1x1;
+      method_ptr = _jpeg_idct_1x1;
       method = JDCT_ISLOW;      /* jidctred uses islow-style table */
       break;
     case 2:
+#ifdef WITH_SIMD
       if (jsimd_can_idct_2x2())
         method_ptr = jsimd_idct_2x2;
       else
-        method_ptr = jpeg_idct_2x2;
+#endif
+        method_ptr = _jpeg_idct_2x2;
       method = JDCT_ISLOW;      /* jidctred uses islow-style table */
       break;
     case 3:
-      method_ptr = jpeg_idct_3x3;
+      method_ptr = _jpeg_idct_3x3;
       method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
     case 4:
+#ifdef WITH_SIMD
       if (jsimd_can_idct_4x4())
         method_ptr = jsimd_idct_4x4;
       else
-        method_ptr = jpeg_idct_4x4;
+#endif
+        method_ptr = _jpeg_idct_4x4;
       method = JDCT_ISLOW;      /* jidctred uses islow-style table */
       break;
     case 5:
-      method_ptr = jpeg_idct_5x5;
+      method_ptr = _jpeg_idct_5x5;
       method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
     case 6:
-#if defined(__mips__)
+#if defined(WITH_SIMD) && defined(__mips__)
       if (jsimd_can_idct_6x6())
         method_ptr = jsimd_idct_6x6;
       else
 #endif
-      method_ptr = jpeg_idct_6x6;
+      method_ptr = _jpeg_idct_6x6;
       method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
     case 7:
-      method_ptr = jpeg_idct_7x7;
+      method_ptr = _jpeg_idct_7x7;
       method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
 #endif
@@ -152,28 +156,34 @@ start_pass(j_decompress_ptr cinfo)
       switch (cinfo->dct_method) {
 #ifdef DCT_ISLOW_SUPPORTED
       case JDCT_ISLOW:
+#ifdef WITH_SIMD
         if (jsimd_can_idct_islow())
           method_ptr = jsimd_idct_islow;
         else
-          method_ptr = jpeg_idct_islow;
+#endif
+          method_ptr = _jpeg_idct_islow;
         method = JDCT_ISLOW;
         break;
 #endif
 #ifdef DCT_IFAST_SUPPORTED
       case JDCT_IFAST:
+#ifdef WITH_SIMD
         if (jsimd_can_idct_ifast())
           method_ptr = jsimd_idct_ifast;
         else
-          method_ptr = jpeg_idct_ifast;
+#endif
+          method_ptr = _jpeg_idct_ifast;
         method = JDCT_IFAST;
         break;
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
       case JDCT_FLOAT:
+#ifdef WITH_SIMD
         if (jsimd_can_idct_float())
           method_ptr = jsimd_idct_float;
         else
-          method_ptr = jpeg_idct_float;
+#endif
+          method_ptr = _jpeg_idct_float;
         method = JDCT_FLOAT;
         break;
 #endif
@@ -184,40 +194,40 @@ start_pass(j_decompress_ptr cinfo)
       break;
 #ifdef IDCT_SCALING_SUPPORTED
     case 9:
-      method_ptr = jpeg_idct_9x9;
+      method_ptr = _jpeg_idct_9x9;
       method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
     case 10:
-      method_ptr = jpeg_idct_10x10;
+      method_ptr = _jpeg_idct_10x10;
       method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
     case 11:
-      method_ptr = jpeg_idct_11x11;
+      method_ptr = _jpeg_idct_11x11;
       method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
     case 12:
-#if defined(__mips__)
+#if defined(WITH_SIMD) && defined(__mips__)
       if (jsimd_can_idct_12x12())
         method_ptr = jsimd_idct_12x12;
       else
 #endif
-      method_ptr = jpeg_idct_12x12;
+      method_ptr = _jpeg_idct_12x12;
       method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
     case 13:
-      method_ptr = jpeg_idct_13x13;
+      method_ptr = _jpeg_idct_13x13;
       method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
     case 14:
-      method_ptr = jpeg_idct_14x14;
+      method_ptr = _jpeg_idct_14x14;
       method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
     case 15:
-      method_ptr = jpeg_idct_15x15;
+      method_ptr = _jpeg_idct_15x15;
       method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
     case 16:
-      method_ptr = jpeg_idct_16x16;
+      method_ptr = _jpeg_idct_16x16;
       method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
 #endif
@@ -225,7 +235,7 @@ start_pass(j_decompress_ptr cinfo)
       ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->_DCT_scaled_size);
       break;
     }
-    idct->pub.inverse_DCT[ci] = method_ptr;
+    idct->pub._inverse_DCT[ci] = method_ptr;
     /* Create multiplier table from quant table.
      * However, we can skip this if the component is uninteresting
      * or if we already built the table.  Also, if no quant table
@@ -327,12 +337,15 @@ start_pass(j_decompress_ptr cinfo)
  */
 
 GLOBAL(void)
-jinit_inverse_dct(j_decompress_ptr cinfo)
+_jinit_inverse_dct(j_decompress_ptr cinfo)
 {
   my_idct_ptr idct;
   int ci;
   jpeg_component_info *compptr;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   idct = (my_idct_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(my_idct_controller));
diff --git a/3rdparty/libjpeg-turbo/src/jddiffct.c b/3rdparty/libjpeg-turbo/src/jddiffct.c
new file mode 100644
index 0000000000..f1d7f61b52
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jddiffct.c
@@ -0,0 +1,403 @@
+/*
+ * jddiffct.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains the [un]difference buffer controller for decompression.
+ * This controller is the top level of the lossless JPEG decompressor proper.
+ * The difference buffer lies between the entropy decoding and
+ * prediction/undifferencing steps.  The undifference buffer lies between the
+ * prediction/undifferencing and scaling steps.
+ *
+ * In buffered-image mode, this controller is the interface between
+ * input-oriented processing and output-oriented processing.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jlossls.h"            /* Private declarations for lossless codec */
+
+
+#ifdef D_LOSSLESS_SUPPORTED
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_coef_controller pub; /* public fields */
+
+  /* These variables keep track of the current location of the input side. */
+  /* cinfo->input_iMCU_row is also used for this. */
+  JDIMENSION MCU_ctr;           /* counts MCUs processed in current row */
+  unsigned int restart_rows_to_go;      /* MCU rows left in this restart
+                                           interval */
+  unsigned int MCU_vert_offset;         /* counts MCU rows within iMCU row */
+  unsigned int MCU_rows_per_iMCU_row;   /* number of such rows needed */
+
+  /* The output side's location is represented by cinfo->output_iMCU_row. */
+
+  JDIFFARRAY diff_buf[MAX_COMPONENTS];  /* iMCU row of differences */
+  JDIFFARRAY undiff_buf[MAX_COMPONENTS]; /* iMCU row of undiff'd samples */
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* In multi-pass modes, we need a virtual sample array for each component. */
+  jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
+#endif
+} my_diff_controller;
+
+typedef my_diff_controller *my_diff_ptr;
+
+/* Forward declarations */
+METHODDEF(int) decompress_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf);
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+METHODDEF(int) output_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf);
+#endif
+
+
+LOCAL(void)
+start_iMCU_row(j_decompress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row (input side) */
+{
+  my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    diff->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1))
+      diff->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      diff->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  diff->MCU_ctr = 0;
+  diff->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for an input processing pass.
+ */
+
+METHODDEF(void)
+start_input_pass(j_decompress_ptr cinfo)
+{
+  my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
+
+  /* Because it is hitching a ride on the jpeg_inverse_dct struct,
+   * start_pass_lossless() will be called at the start of the output pass.
+   * This ensures that it will be called at the start of the input pass as
+   * well.
+   */
+  (*cinfo->idct->start_pass) (cinfo);
+
+  /* Check that the restart interval is an integer multiple of the number
+   * of MCUs in an MCU row.
+   */
+  if (cinfo->restart_interval % cinfo->MCUs_per_row != 0)
+    ERREXIT2(cinfo, JERR_BAD_RESTART,
+             cinfo->restart_interval, cinfo->MCUs_per_row);
+
+  /* Initialize restart counter */
+  diff->restart_rows_to_go = cinfo->restart_interval / cinfo->MCUs_per_row;
+
+  cinfo->input_iMCU_row = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Check for a restart marker & resynchronize decoder, undifferencer.
+ * Returns FALSE if must suspend.
+ */
+
+METHODDEF(boolean)
+process_restart(j_decompress_ptr cinfo)
+{
+  my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
+
+  if (!(*cinfo->entropy->process_restart) (cinfo))
+    return FALSE;
+
+  (*cinfo->idct->start_pass) (cinfo);
+
+  /* Reset restart counter */
+  diff->restart_rows_to_go = cinfo->restart_interval / cinfo->MCUs_per_row;
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for an output processing pass.
+ */
+
+METHODDEF(void)
+start_output_pass(j_decompress_ptr cinfo)
+{
+  cinfo->output_iMCU_row = 0;
+}
+
+
+/*
+ * Decompress and return some data in the supplied buffer.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Input and output must run in lockstep since we have only a one-MCU buffer.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(int)
+decompress_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
+{
+  my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
+  lossless_decomp_ptr losslessd = (lossless_decomp_ptr)cinfo->idct;
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
+  JDIMENSION MCU_count;         /* number of MCUs decoded */
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int ci, compi, row, prev_row;
+  unsigned int yoffset;
+  jpeg_component_info *compptr;
+
+  /* Loop to process as much as one whole iMCU row */
+  for (yoffset = diff->MCU_vert_offset; yoffset < diff->MCU_rows_per_iMCU_row;
+       yoffset++) {
+
+    /* Process restart marker if needed; may have to suspend */
+    if (cinfo->restart_interval) {
+      if (diff->restart_rows_to_go == 0)
+        if (!process_restart(cinfo))
+          return JPEG_SUSPENDED;
+    }
+
+    MCU_col_num = diff->MCU_ctr;
+    /* Try to fetch an MCU row (or remaining portion of suspended MCU row). */
+    MCU_count =
+      (*cinfo->entropy->decode_mcus) (cinfo,
+                                      diff->diff_buf, yoffset, MCU_col_num,
+                                      cinfo->MCUs_per_row - MCU_col_num);
+    if (MCU_count != cinfo->MCUs_per_row - MCU_col_num) {
+      /* Suspension forced; update state counters and exit */
+      diff->MCU_vert_offset = yoffset;
+      diff->MCU_ctr += MCU_count;
+      return JPEG_SUSPENDED;
+    }
+
+    /* Account for restart interval (no-op if not using restarts) */
+    if (cinfo->restart_interval)
+      diff->restart_rows_to_go--;
+
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    diff->MCU_ctr = 0;
+  }
+
+  /*
+   * Undifference and scale each scanline of the disassembled MCU row
+   * separately.  We do not process dummy samples at the end of a scanline
+   * or dummy rows at the end of the image.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    compi = compptr->component_index;
+    for (row = 0, prev_row = compptr->v_samp_factor - 1;
+         row < (cinfo->input_iMCU_row == last_iMCU_row ?
+                compptr->last_row_height : compptr->v_samp_factor);
+         prev_row = row, row++) {
+      (*losslessd->predict_undifference[compi])
+        (cinfo, compi, diff->diff_buf[compi][row],
+          diff->undiff_buf[compi][prev_row], diff->undiff_buf[compi][row],
+          compptr->width_in_blocks);
+      (*losslessd->scaler_scale) (cinfo, diff->undiff_buf[compi][row],
+                                  output_buf[compi][row],
+                                  compptr->width_in_blocks);
+    }
+  }
+
+  /* Completed the iMCU row, advance counters for next one.
+   *
+   * NB: output_data will increment output_iMCU_row.
+   * This counter is not needed for the single-pass case
+   * or the input side of the multi-pass case.
+   */
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Dummy consume-input routine for single-pass operation.
+ */
+
+METHODDEF(int)
+dummy_consume_data(j_decompress_ptr cinfo)
+{
+  return JPEG_SUSPENDED;        /* Always indicate nothing was done */
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Consume input data and store it in the full-image sample buffer.
+ * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
+ * ie, v_samp_factor rows for each component in the scan.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ */
+
+METHODDEF(int)
+consume_data(j_decompress_ptr cinfo)
+{
+  my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
+  int ci, compi;
+  _JSAMPARRAY buffer[MAX_COMPS_IN_SCAN];
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    compi = compptr->component_index;
+    buffer[compi] = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
+      ((j_common_ptr)cinfo, diff->whole_image[compi],
+       cinfo->input_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION)compptr->v_samp_factor, TRUE);
+  }
+
+  return decompress_data(cinfo, buffer);
+}
+
+
+/*
+ * Output some data from the full-image sample buffer in the multi-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image.
+ */
+
+METHODDEF(int)
+output_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
+{
+  my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int ci, samp_rows, row;
+  _JSAMPARRAY buffer;
+  jpeg_component_info *compptr;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number < cinfo->output_scan_number ||
+         (cinfo->input_scan_number == cinfo->output_scan_number &&
+          cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Align the virtual buffer for this component. */
+    buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
+      ((j_common_ptr)cinfo, diff->whole_image[ci],
+       cinfo->output_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION)compptr->v_samp_factor, FALSE);
+
+    if (cinfo->output_iMCU_row < last_iMCU_row)
+      samp_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      samp_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
+      if (samp_rows == 0) samp_rows = compptr->v_samp_factor;
+    }
+
+    for (row = 0; row < samp_rows; row++) {
+      memcpy(output_buf[ci][row], buffer[row],
+             compptr->width_in_blocks * sizeof(_JSAMPLE));
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+/*
+ * Initialize difference buffer controller.
+ */
+
+GLOBAL(void)
+_jinit_d_diff_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_diff_ptr diff;
+  int ci;
+  jpeg_component_info *compptr;
+
+  diff = (my_diff_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_diff_controller));
+  cinfo->coef = (struct jpeg_d_coef_controller *)diff;
+  diff->pub.start_input_pass = start_input_pass;
+  diff->pub.start_output_pass = start_output_pass;
+
+  /* Create the [un]difference buffers. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    diff->diff_buf[ci] =
+      ALLOC_DARRAY(JPOOL_IMAGE,
+                   (JDIMENSION)jround_up((long)compptr->width_in_blocks,
+                                         (long)compptr->h_samp_factor),
+                   (JDIMENSION)compptr->v_samp_factor);
+    diff->undiff_buf[ci] =
+      ALLOC_DARRAY(JPOOL_IMAGE,
+                   (JDIMENSION)jround_up((long)compptr->width_in_blocks,
+                                         (long)compptr->h_samp_factor),
+                   (JDIMENSION)compptr->v_samp_factor);
+  }
+
+  if (need_full_buffer) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+    /* Allocate a full-image virtual array for each component. */
+    int access_rows;
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      access_rows = compptr->v_samp_factor;
+      diff->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
+        ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE,
+         (JDIMENSION)jround_up((long)compptr->width_in_blocks,
+                               (long)compptr->h_samp_factor),
+         (JDIMENSION)jround_up((long)compptr->height_in_blocks,
+                               (long)compptr->v_samp_factor),
+         (JDIMENSION)access_rows);
+    }
+    diff->pub.consume_data = consume_data;
+    diff->pub._decompress_data = output_data;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    diff->pub.consume_data = dummy_consume_data;
+    diff->pub._decompress_data = decompress_data;
+    diff->whole_image[0] = NULL; /* flag for no virtual arrays */
+  }
+}
+
+#endif /* D_LOSSLESS_SUPPORTED */
diff --git a/3rdparty/libjpeg-turbo/src/jdhuff.c b/3rdparty/libjpeg-turbo/src/jdhuff.c
index 679d221685..cd8c0847a2 100644
--- a/3rdparty/libjpeg-turbo/src/jdhuff.c
+++ b/3rdparty/libjpeg-turbo/src/jdhuff.c
@@ -3,8 +3,10 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2016, 2018-2019, D. R. Commander.
+ * Copyright (C) 2009-2011, 2016, 2018-2019, 2022, D. R. Commander.
  * Copyright (C) 2018, Matthias Räncker.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -24,8 +26,8 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdhuff.h"             /* Declarations shared with jdphuff.c */
-#include "jpegcomp.h"
+#include "jdhuff.h"             /* Declarations shared with jd*huff.c */
+#include "jpegapicomp.h"
 #include "jstdhuff.c"
 
 
@@ -134,7 +136,7 @@ start_pass_huff_decoder(j_decompress_ptr cinfo)
  * Compute the derived values for a Huffman table.
  * This routine also performs some validation checks on the table.
  *
- * Note this is also used by jdphuff.c.
+ * Note this is also used by jdphuff.c and jdlhuff.c.
  */
 
 GLOBAL(void)
@@ -245,14 +247,14 @@ jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno,
 
   /* Validate symbols as being reasonable.
    * For AC tables, we make no check, but accept all byte values 0..255.
-   * For DC tables, we require the symbols to be in range 0..15.
-   * (Tighter bounds could be applied depending on the data depth and mode,
-   * but this is sufficient to ensure safe decoding.)
+   * For DC tables, we require the symbols to be in range 0..15 in lossy mode
+   * and 0..16 in lossless mode.  (Tighter bounds could be applied depending on
+   * the data depth and mode, but this is sufficient to ensure safe decoding.)
    */
   if (isDC) {
     for (i = 0; i < numsymbols; i++) {
       int sym = htbl->huffval[i];
-      if (sym < 0 || sym > 15)
+      if (sym < 0 || sym > (cinfo->master->lossless ? 16 : 15))
         ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     }
   }
@@ -260,7 +262,7 @@ jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno,
 
 
 /*
- * Out-of-line code for bit fetching (shared with jdphuff.c).
+ * Out-of-line code for bit fetching (shared with jdphuff.c and jdlhuff.c).
  * See jdhuff.h for info about usage.
  * Note: current values of get_buffer and bits_left are passed as parameters,
  * but are returned in the corresponding fields of the state struct.
diff --git a/3rdparty/libjpeg-turbo/src/jdhuff.h b/3rdparty/libjpeg-turbo/src/jdhuff.h
index cfa0b7f558..3eee002c02 100644
--- a/3rdparty/libjpeg-turbo/src/jdhuff.h
+++ b/3rdparty/libjpeg-turbo/src/jdhuff.h
@@ -3,6 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2010-2011, 2015-2016, 2021, D. R. Commander.
  * Copyright (C) 2018, Matthias Räncker.
@@ -10,8 +12,9 @@
  * file.
  *
  * This file contains declarations for Huffman entropy decoding routines
- * that are shared between the sequential decoder (jdhuff.c) and the
- * progressive decoder (jdphuff.c).  No other modules need to see these.
+ * that are shared between the sequential decoder (jdhuff.c), the progressive
+ * decoder (jdphuff.c), and the lossless decoder (jdlhuff.c).  No other modules
+ * need to see these.
  */
 
 #include "jconfigint.h"
diff --git a/3rdparty/libjpeg-turbo/src/jdinput.c b/3rdparty/libjpeg-turbo/src/jdinput.c
index 1bc5aff1a7..136bef59d7 100644
--- a/3rdparty/libjpeg-turbo/src/jdinput.c
+++ b/3rdparty/libjpeg-turbo/src/jdinput.c
@@ -3,6 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2010, 2016, 2018, 2022, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
@@ -11,14 +13,15 @@
  *
  * This file contains input control logic for the JPEG decompressor.
  * These routines are concerned with controlling the decompressor's input
- * processing (marker reading and coefficient decoding).  The actual input
- * reading is done in jdmarker.c, jdhuff.c, and jdphuff.c.
+ * processing (marker reading and coefficient/difference decoding).
+ * The actual input reading is done in jdmarker.c, jdhuff.c, jdphuff.c,
+ * and jdlhuff.c.
  */
 
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jpegcomp.h"
+#include "jpegapicomp.h"
 
 
 /* Private state */
@@ -46,6 +49,7 @@ initial_setup(j_decompress_ptr cinfo)
 {
   int ci;
   jpeg_component_info *compptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
 
   /* Make sure image isn't bigger than I can handle */
   if ((long)cinfo->image_height > (long)JPEG_MAX_DIMENSION ||
@@ -53,7 +57,12 @@ initial_setup(j_decompress_ptr cinfo)
     ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int)JPEG_MAX_DIMENSION);
 
   /* For now, precision must match compiled-in value... */
-  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+#ifdef D_LOSSLESS_SUPPORTED
+  if (cinfo->data_precision != 8 && cinfo->data_precision != 12 &&
+      cinfo->data_precision != 16)
+#else
+  if (cinfo->data_precision != 8 && cinfo->data_precision != 12)
+#endif
     ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
 
   /* Check that number of components won't exceed internal array sizes */
@@ -78,36 +87,36 @@ initial_setup(j_decompress_ptr cinfo)
   }
 
 #if JPEG_LIB_VERSION >= 80
-  cinfo->block_size = DCTSIZE;
+  cinfo->block_size = data_unit;
   cinfo->natural_order = jpeg_natural_order;
   cinfo->lim_Se = DCTSIZE2 - 1;
 #endif
 
-  /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE.
-   * In the full decompressor, this will be overridden by jdmaster.c;
+  /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE in lossy
+   * mode.  In the full decompressor, this will be overridden by jdmaster.c;
    * but in the transcoder, jdmaster.c is not used, so we must do it here.
    */
 #if JPEG_LIB_VERSION >= 70
-  cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = DCTSIZE;
+  cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = data_unit;
 #else
-  cinfo->min_DCT_scaled_size = DCTSIZE;
+  cinfo->min_DCT_scaled_size = data_unit;
 #endif
 
   /* Compute dimensions of components */
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
 #if JPEG_LIB_VERSION >= 70
-    compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = DCTSIZE;
+    compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = data_unit;
 #else
-    compptr->DCT_scaled_size = DCTSIZE;
+    compptr->DCT_scaled_size = data_unit;
 #endif
-    /* Size in DCT blocks */
+    /* Size in data units */
     compptr->width_in_blocks = (JDIMENSION)
       jdiv_round_up((long)cinfo->image_width * (long)compptr->h_samp_factor,
-                    (long)(cinfo->max_h_samp_factor * DCTSIZE));
+                    (long)(cinfo->max_h_samp_factor * data_unit));
     compptr->height_in_blocks = (JDIMENSION)
       jdiv_round_up((long)cinfo->image_height * (long)compptr->v_samp_factor,
-                    (long)(cinfo->max_v_samp_factor * DCTSIZE));
+                    (long)(cinfo->max_v_samp_factor * data_unit));
     /* Set the first and last MCU columns to decompress from multi-scan images.
      * By default, decompress all of the MCU columns.
      */
@@ -133,7 +142,7 @@ initial_setup(j_decompress_ptr cinfo)
   /* Compute number of fully interleaved MCU rows. */
   cinfo->total_iMCU_rows = (JDIMENSION)
     jdiv_round_up((long)cinfo->image_height,
-                  (long)(cinfo->max_v_samp_factor * DCTSIZE));
+                  (long)(cinfo->max_v_samp_factor * data_unit));
 
   /* Decide whether file contains multiple scans */
   if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
@@ -150,6 +159,7 @@ per_scan_setup(j_decompress_ptr cinfo)
 {
   int ci, mcublks, tmp;
   jpeg_component_info *compptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
 
   if (cinfo->comps_in_scan == 1) {
 
@@ -160,14 +170,14 @@ per_scan_setup(j_decompress_ptr cinfo)
     cinfo->MCUs_per_row = compptr->width_in_blocks;
     cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
 
-    /* For noninterleaved scan, always one block per MCU */
+    /* For noninterleaved scan, always one data unit per MCU */
     compptr->MCU_width = 1;
     compptr->MCU_height = 1;
     compptr->MCU_blocks = 1;
     compptr->MCU_sample_width = compptr->_DCT_scaled_size;
     compptr->last_col_width = 1;
     /* For noninterleaved scans, it is convenient to define last_row_height
-     * as the number of block rows present in the last iMCU row.
+     * as the number of data unit rows present in the last iMCU row.
      */
     tmp = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
     if (tmp == 0) tmp = compptr->v_samp_factor;
@@ -187,22 +197,22 @@ per_scan_setup(j_decompress_ptr cinfo)
     /* Overall image size in MCUs */
     cinfo->MCUs_per_row = (JDIMENSION)
       jdiv_round_up((long)cinfo->image_width,
-                    (long)(cinfo->max_h_samp_factor * DCTSIZE));
+                    (long)(cinfo->max_h_samp_factor * data_unit));
     cinfo->MCU_rows_in_scan = (JDIMENSION)
       jdiv_round_up((long)cinfo->image_height,
-                    (long)(cinfo->max_v_samp_factor * DCTSIZE));
+                    (long)(cinfo->max_v_samp_factor * data_unit));
 
     cinfo->blocks_in_MCU = 0;
 
     for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
       compptr = cinfo->cur_comp_info[ci];
-      /* Sampling factors give # of blocks of component in each MCU */
+      /* Sampling factors give # of data units of component in each MCU */
       compptr->MCU_width = compptr->h_samp_factor;
       compptr->MCU_height = compptr->v_samp_factor;
       compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
       compptr->MCU_sample_width = compptr->MCU_width *
                                   compptr->_DCT_scaled_size;
-      /* Figure number of non-dummy blocks in last MCU column & row */
+      /* Figure number of non-dummy data units in last MCU column & row */
       tmp = (int)(compptr->width_in_blocks % compptr->MCU_width);
       if (tmp == 0) tmp = compptr->MCU_width;
       compptr->last_col_width = tmp;
@@ -281,7 +291,8 @@ METHODDEF(void)
 start_input_pass(j_decompress_ptr cinfo)
 {
   per_scan_setup(cinfo);
-  latch_quant_tables(cinfo);
+  if (!cinfo->master->lossless)
+    latch_quant_tables(cinfo);
   (*cinfo->entropy->start_pass) (cinfo);
   (*cinfo->coef->start_input_pass) (cinfo);
   cinfo->inputctl->consume_input = cinfo->coef->consume_data;
@@ -290,8 +301,8 @@ start_input_pass(j_decompress_ptr cinfo)
 
 /*
  * Finish up after inputting a compressed-data scan.
- * This is called by the coefficient controller after it's read all
- * the expected data of the scan.
+ * This is called by the coefficient or difference controller after it's read
+ * all the expected data of the scan.
  */
 
 METHODDEF(void)
@@ -307,8 +318,8 @@ finish_input_pass(j_decompress_ptr cinfo)
  * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
  *
  * The consume_input method pointer points either here or to the
- * coefficient controller's consume_data routine, depending on whether
- * we are reading a compressed data segment or inter-segment markers.
+ * coefficient or difference controller's consume_data routine, depending on
+ * whether we are reading a compressed data segment or inter-segment markers.
  */
 
 METHODDEF(int)
diff --git a/3rdparty/libjpeg-turbo/src/jdlhuff.c b/3rdparty/libjpeg-turbo/src/jdlhuff.c
new file mode 100644
index 0000000000..9964830dba
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jdlhuff.c
@@ -0,0 +1,302 @@
+/*
+ * jdlhuff.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains Huffman entropy decoding routines for lossless JPEG.
+ *
+ * Much of the complexity here has to do with supporting input suspension.
+ * If the data source module demands suspension, we want to be able to back
+ * up to the start of the current MCU.  To do this, we copy state variables
+ * into local working storage, and update them back to the permanent
+ * storage only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jlossls.h"            /* Private declarations for lossless codec */
+#include "jdhuff.h"             /* Declarations shared with jd*huff.c */
+
+
+#ifdef D_LOSSLESS_SUPPORTED
+
+typedef struct {
+  int ci, yoffset, MCU_width;
+} lhd_output_ptr_info;
+
+/*
+ * Expanded entropy decoder object for Huffman decoding in lossless mode.
+ */
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  /* These fields are loaded into local variables at start of each MCU.
+   * In case of suspension, we exit WITHOUT updating them.
+   */
+  bitread_perm_state bitstate;  /* Bit buffer at start of MCU */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
+
+  /* Precalculated info set up by start_pass for use in decode_mcus: */
+
+  /* Pointers to derived tables to be used for each data unit within an MCU */
+  d_derived_tbl *cur_tbls[D_MAX_BLOCKS_IN_MCU];
+
+  /* Pointers to the proper output difference row for each group of data units
+   * within an MCU.  For each component, there are Vi groups of Hi data units.
+   */
+  JDIFFROW output_ptr[D_MAX_BLOCKS_IN_MCU];
+
+  /* Number of output pointers in use for the current MCU.  This is the sum
+   * of all Vi in the MCU.
+   */
+  int num_output_ptrs;
+
+  /* Information used for positioning the output pointers within the output
+   * difference rows.
+   */
+  lhd_output_ptr_info output_ptr_info[D_MAX_BLOCKS_IN_MCU];
+
+  /* Index of the proper output pointer for each data unit within an MCU */
+  int output_ptr_index[D_MAX_BLOCKS_IN_MCU];
+
+} lhuff_entropy_decoder;
+
+typedef lhuff_entropy_decoder *lhuff_entropy_ptr;
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass_lhuff_decoder(j_decompress_ptr cinfo)
+{
+  lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
+  int ci, dctbl, sampn, ptrn, yoffset, xoffset;
+  jpeg_component_info *compptr;
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    /* Make sure requested tables are present */
+    if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS ||
+        cinfo->dc_huff_tbl_ptrs[dctbl] == NULL)
+      ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
+    /* Compute derived values for Huffman tables */
+    /* We may do this more than once for a table, but it's not expensive */
+    jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl,
+                            &entropy->derived_tbls[dctbl]);
+  }
+
+  /* Precalculate decoding info for each sample in an MCU of this scan */
+  for (sampn = 0, ptrn = 0; sampn < cinfo->blocks_in_MCU;) {
+    compptr = cinfo->cur_comp_info[cinfo->MCU_membership[sampn]];
+    ci = compptr->component_index;
+    for (yoffset = 0; yoffset < compptr->MCU_height; yoffset++, ptrn++) {
+      /* Precalculate the setup info for each output pointer */
+      entropy->output_ptr_info[ptrn].ci = ci;
+      entropy->output_ptr_info[ptrn].yoffset = yoffset;
+      entropy->output_ptr_info[ptrn].MCU_width = compptr->MCU_width;
+      for (xoffset = 0; xoffset < compptr->MCU_width; xoffset++, sampn++) {
+        /* Precalculate the output pointer index for each sample */
+        entropy->output_ptr_index[sampn] = ptrn;
+        /* Precalculate which table to use for each sample */
+        entropy->cur_tbls[sampn] = entropy->derived_tbls[compptr->dc_tbl_no];
+      }
+    }
+  }
+  entropy->num_output_ptrs = ptrn;
+
+  /* Initialize bitread state variables */
+  entropy->bitstate.bits_left = 0;
+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
+  entropy->pub.insufficient_data = FALSE;
+}
+
+
+/*
+ * Figure F.12: extend sign bit.
+ * On some machines, a shift and add will be faster than a table lookup.
+ */
+
+#define AVOID_TABLES
+#ifdef AVOID_TABLES
+
+#define NEG_1  ((unsigned int)-1)
+#define HUFF_EXTEND(x, s) \
+  ((x) + ((((x) - (1 << ((s) - 1))) >> 31) & (((NEG_1) << (s)) + 1)))
+
+#else
+
+#define HUFF_EXTEND(x, s) \
+  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
+
+static const int extend_test[16] = {   /* entry n is 2**(n-1) */
+  0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
+  0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000
+};
+
+static const int extend_offset[16] = { /* entry n is (-1 << n) + 1 */
+  0, ((-1) << 1) + 1, ((-1) << 2) + 1, ((-1) << 3) + 1, ((-1) << 4) + 1,
+  ((-1) << 5) + 1, ((-1) << 6) + 1, ((-1) << 7) + 1, ((-1) << 8) + 1,
+  ((-1) << 9) + 1, ((-1) << 10) + 1, ((-1) << 11) + 1, ((-1) << 12) + 1,
+  ((-1) << 13) + 1, ((-1) << 14) + 1, ((-1) << 15) + 1
+};
+
+#endif /* AVOID_TABLES */
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ * Returns FALSE if must suspend.
+ */
+
+LOCAL(boolean)
+process_restart(j_decompress_ptr cinfo)
+{
+  lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+
+  /* Advance past the RSTn marker */
+  if (!(*cinfo->marker->read_restart_marker) (cinfo))
+    return FALSE;
+
+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
+   * against a marker.  In that case we will end up treating the next data
+   * segment as empty, and we can avoid producing bogus output pixels by
+   * leaving the flag set.
+   */
+  if (cinfo->unread_marker == 0)
+    entropy->pub.insufficient_data = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Decode and return nMCU MCUs' worth of Huffman-compressed differences.
+ * Each MCU is also disassembled and placed accordingly in diff_buf.
+ *
+ * MCU_col_num specifies the column of the first MCU being requested within
+ * the MCU row.  This tells us where to position the output row pointers in
+ * diff_buf.
+ *
+ * Returns the number of MCUs decoded.  This may be less than nMCU MCUs if
+ * data source requested suspension.  In that case no changes have been made
+ * to permanent state.  (Exception: some output differences may already have
+ * been assigned.  This is harmless for this module, since we'll just
+ * re-assign them on the next call.)
+ */
+
+METHODDEF(JDIMENSION)
+decode_mcus(j_decompress_ptr cinfo, JDIFFIMAGE diff_buf,
+            JDIMENSION MCU_row_num, JDIMENSION MCU_col_num, JDIMENSION nMCU)
+{
+  lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
+  int sampn, ci, yoffset, MCU_width, ptrn;
+  JDIMENSION mcu_num;
+  BITREAD_STATE_VARS;
+
+  /* Set output pointer locations based on MCU_col_num */
+  for (ptrn = 0; ptrn < entropy->num_output_ptrs; ptrn++) {
+    ci = entropy->output_ptr_info[ptrn].ci;
+    yoffset = entropy->output_ptr_info[ptrn].yoffset;
+    MCU_width = entropy->output_ptr_info[ptrn].MCU_width;
+    entropy->output_ptr[ptrn] =
+      diff_buf[ci][MCU_row_num + yoffset] + (MCU_col_num * MCU_width);
+  }
+
+  /*
+   * If we've run out of data, zero out the buffers and return.
+   * By resetting the undifferencer, the output samples will be CENTERJSAMPLE.
+   *
+   * NB: We should find a way to do this without interacting with the
+   * undifferencer module directly.
+   */
+  if (entropy->pub.insufficient_data) {
+    for (ptrn = 0; ptrn < entropy->num_output_ptrs; ptrn++)
+      jzero_far((void FAR *)entropy->output_ptr[ptrn],
+                nMCU * entropy->output_ptr_info[ptrn].MCU_width *
+                sizeof(JDIFF));
+
+    (*cinfo->idct->start_pass) (cinfo);
+
+  } else {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+
+    /* Outer loop handles the number of MCUs requested */
+
+    for (mcu_num = 0; mcu_num < nMCU; mcu_num++) {
+
+      /* Inner loop handles the samples in the MCU */
+      for (sampn = 0; sampn < cinfo->blocks_in_MCU; sampn++) {
+        d_derived_tbl *dctbl = entropy->cur_tbls[sampn];
+        register int s, r;
+
+        /* Section H.2.2: decode the sample difference */
+        HUFF_DECODE(s, br_state, dctbl, return mcu_num, label1);
+        if (s) {
+          if (s == 16)  /* special case: always output 32768 */
+            s = 32768;
+          else {        /* normal case: fetch subsequent bits */
+            CHECK_BIT_BUFFER(br_state, s, return mcu_num);
+            r = GET_BITS(s);
+            s = HUFF_EXTEND(r, s);
+          }
+        }
+
+        /* Output the sample difference */
+        *entropy->output_ptr[entropy->output_ptr_index[sampn]]++ = (JDIFF)s;
+      }
+
+      /* Completed MCU, so update state */
+      BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+    }
+  }
+
+ return nMCU;
+}
+
+
+/*
+ * Module initialization routine for lossless mode Huffman entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_lhuff_decoder(j_decompress_ptr cinfo)
+{
+  lhuff_entropy_ptr entropy;
+  int i;
+
+  entropy = (lhuff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(lhuff_entropy_decoder));
+  cinfo->entropy = (struct jpeg_entropy_decoder *)entropy;
+  entropy->pub.start_pass = start_pass_lhuff_decoder;
+  entropy->pub.decode_mcus = decode_mcus;
+  entropy->pub.process_restart = process_restart;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->derived_tbls[i] = NULL;
+  }
+}
+
+#endif /* D_LOSSLESS_SUPPORTED */
diff --git a/3rdparty/libjpeg-turbo/src/jdlossls.c b/3rdparty/libjpeg-turbo/src/jdlossls.c
new file mode 100644
index 0000000000..4d15e6bbaf
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jdlossls.c
@@ -0,0 +1,289 @@
+/*
+ * jdlossls.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1998, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains prediction, sample undifferencing, point transform, and
+ * sample scaling routines for the lossless JPEG decompressor.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jlossls.h"
+
+#ifdef D_LOSSLESS_SUPPORTED
+
+
+/**************** Sample undifferencing (reconstruction) *****************/
+
+/*
+ * In order to avoid a performance penalty for checking which predictor is
+ * being used and which row is being processed for each call of the
+ * undifferencer, and to promote optimization, we have separate undifferencing
+ * functions for each predictor selection value.
+ *
+ * We are able to avoid duplicating source code by implementing the predictors
+ * and undifferencers as macros.  Each of the undifferencing functions is
+ * simply a wrapper around an UNDIFFERENCE macro with the appropriate PREDICTOR
+ * macro passed as an argument.
+ */
+
+/* Predictor for the first column of the first row: 2^(P-Pt-1) */
+#define INITIAL_PREDICTORx  (1 << (cinfo->data_precision - cinfo->Al - 1))
+
+/* Predictor for the first column of the remaining rows: Rb */
+#define INITIAL_PREDICTOR2  prev_row[0]
+
+
+/*
+ * 1-Dimensional undifferencer routine.
+ *
+ * This macro implements the 1-D horizontal predictor (1).  INITIAL_PREDICTOR
+ * is used as the special case predictor for the first column, which must be
+ * either INITIAL_PREDICTOR2 or INITIAL_PREDICTORx.  The remaining samples
+ * use PREDICTOR1.
+ *
+ * The reconstructed sample is supposed to be calculated modulo 2^16, so we
+ * logically AND the result with 0xFFFF.
+ */
+
+#define UNDIFFERENCE_1D(INITIAL_PREDICTOR) \
+  int Ra; \
+  \
+  Ra = (*diff_buf++ + INITIAL_PREDICTOR) & 0xFFFF; \
+  *undiff_buf++ = Ra; \
+  \
+  while (--width) { \
+    Ra = (*diff_buf++ + PREDICTOR1) & 0xFFFF; \
+    *undiff_buf++ = Ra; \
+  }
+
+
+/*
+ * 2-Dimensional undifferencer routine.
+ *
+ * This macro implements the 2-D horizontal predictors (#2-7).  PREDICTOR2 is
+ * used as the special case predictor for the first column.  The remaining
+ * samples use PREDICTOR, which is a function of Ra, Rb, and Rc.
+ *
+ * Because prev_row and output_buf may point to the same storage area (in an
+ * interleaved image with Vi=1, for example), we must take care to buffer Rb/Rc
+ * before writing the current reconstructed sample value into output_buf.
+ *
+ * The reconstructed sample is supposed to be calculated modulo 2^16, so we
+ * logically AND the result with 0xFFFF.
+ */
+
+#define UNDIFFERENCE_2D(PREDICTOR) \
+  int Ra, Rb, Rc; \
+  \
+  Rb = *prev_row++; \
+  Ra = (*diff_buf++ + PREDICTOR2) & 0xFFFF; \
+  *undiff_buf++ = Ra; \
+  \
+  while (--width) { \
+    Rc = Rb; \
+    Rb = *prev_row++; \
+    Ra = (*diff_buf++ + PREDICTOR) & 0xFFFF; \
+    *undiff_buf++ = Ra; \
+  }
+
+
+/*
+ * Undifferencers for the second and subsequent rows in a scan or restart
+ * interval.  The first sample in the row is undifferenced using the vertical
+ * predictor (2).  The rest of the samples are undifferenced using the
+ * predictor specified in the scan header.
+ */
+
+METHODDEF(void)
+jpeg_undifference1(j_decompress_ptr cinfo, int comp_index,
+                   JDIFFROW diff_buf, JDIFFROW prev_row,
+                   JDIFFROW undiff_buf, JDIMENSION width)
+{
+  UNDIFFERENCE_1D(INITIAL_PREDICTOR2);
+}
+
+METHODDEF(void)
+jpeg_undifference2(j_decompress_ptr cinfo, int comp_index,
+                   JDIFFROW diff_buf, JDIFFROW prev_row,
+                   JDIFFROW undiff_buf, JDIMENSION width)
+{
+  UNDIFFERENCE_2D(PREDICTOR2);
+  (void)(Rc);
+}
+
+METHODDEF(void)
+jpeg_undifference3(j_decompress_ptr cinfo, int comp_index,
+                   JDIFFROW diff_buf, JDIFFROW prev_row,
+                   JDIFFROW undiff_buf, JDIMENSION width)
+{
+  UNDIFFERENCE_2D(PREDICTOR3);
+}
+
+METHODDEF(void)
+jpeg_undifference4(j_decompress_ptr cinfo, int comp_index,
+                   JDIFFROW diff_buf, JDIFFROW prev_row,
+                   JDIFFROW undiff_buf, JDIMENSION width)
+{
+  UNDIFFERENCE_2D(PREDICTOR4);
+}
+
+METHODDEF(void)
+jpeg_undifference5(j_decompress_ptr cinfo, int comp_index,
+                   JDIFFROW diff_buf, JDIFFROW prev_row,
+                   JDIFFROW undiff_buf, JDIMENSION width)
+{
+  UNDIFFERENCE_2D(PREDICTOR5);
+}
+
+METHODDEF(void)
+jpeg_undifference6(j_decompress_ptr cinfo, int comp_index,
+                   JDIFFROW diff_buf, JDIFFROW prev_row,
+                   JDIFFROW undiff_buf, JDIMENSION width)
+{
+  UNDIFFERENCE_2D(PREDICTOR6);
+}
+
+METHODDEF(void)
+jpeg_undifference7(j_decompress_ptr cinfo, int comp_index,
+                   JDIFFROW diff_buf, JDIFFROW prev_row,
+                   JDIFFROW undiff_buf, JDIMENSION width)
+{
+  UNDIFFERENCE_2D(PREDICTOR7);
+  (void)(Rc);
+}
+
+
+/*
+ * Undifferencer for the first row in a scan or restart interval.  The first
+ * sample in the row is undifferenced using the special predictor constant
+ * x=2^(P-Pt-1).  The rest of the samples are undifferenced using the
+ * 1-D horizontal predictor (1).
+ */
+
+METHODDEF(void)
+jpeg_undifference_first_row(j_decompress_ptr cinfo, int comp_index,
+                            JDIFFROW diff_buf, JDIFFROW prev_row,
+                            JDIFFROW undiff_buf, JDIMENSION width)
+{
+  lossless_decomp_ptr losslessd = (lossless_decomp_ptr)cinfo->idct;
+
+  UNDIFFERENCE_1D(INITIAL_PREDICTORx);
+
+  /*
+   * Now that we have undifferenced the first row, we want to use the
+   * undifferencer that corresponds to the predictor specified in the
+   * scan header.
+   */
+  switch (cinfo->Ss) {
+  case 1:
+    losslessd->predict_undifference[comp_index] = jpeg_undifference1;
+    break;
+  case 2:
+    losslessd->predict_undifference[comp_index] = jpeg_undifference2;
+    break;
+  case 3:
+    losslessd->predict_undifference[comp_index] = jpeg_undifference3;
+    break;
+  case 4:
+    losslessd->predict_undifference[comp_index] = jpeg_undifference4;
+    break;
+  case 5:
+    losslessd->predict_undifference[comp_index] = jpeg_undifference5;
+    break;
+  case 6:
+    losslessd->predict_undifference[comp_index] = jpeg_undifference6;
+    break;
+  case 7:
+    losslessd->predict_undifference[comp_index] = jpeg_undifference7;
+    break;
+  }
+}
+
+
+/*********************** Sample upscaling by 2^Pt ************************/
+
+METHODDEF(void)
+simple_upscale(j_decompress_ptr cinfo,
+               JDIFFROW diff_buf, _JSAMPROW output_buf, JDIMENSION width)
+{
+  do {
+    *output_buf++ = (_JSAMPLE)(*diff_buf++ << cinfo->Al);
+  } while (--width);
+}
+
+METHODDEF(void)
+noscale(j_decompress_ptr cinfo,
+        JDIFFROW diff_buf, _JSAMPROW output_buf, JDIMENSION width)
+{
+  do {
+    *output_buf++ = (_JSAMPLE)(*diff_buf++);
+  } while (--width);
+}
+
+
+/*
+ * Initialize for an input processing pass.
+ */
+
+METHODDEF(void)
+start_pass_lossless(j_decompress_ptr cinfo)
+{
+  lossless_decomp_ptr losslessd = (lossless_decomp_ptr)cinfo->idct;
+  int ci;
+
+  /* Check that the scan parameters Ss, Se, Ah, Al are OK for lossless JPEG.
+   *
+   * Ss is the predictor selection value (psv).  Legal values for sequential
+   * lossless JPEG are: 1 <= psv <= 7.
+   *
+   * Se and Ah are not used and should be zero.
+   *
+   * Al specifies the point transform (Pt).
+   * Legal values are: 0 <= Pt <= (data precision - 1).
+   */
+  if (cinfo->Ss < 1 || cinfo->Ss > 7 ||
+      cinfo->Se != 0 || cinfo->Ah != 0 ||
+      cinfo->Al < 0 || cinfo->Al >= cinfo->data_precision)
+    ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+             cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+
+  /* Set undifference functions to first row function */
+  for (ci = 0; ci < cinfo->num_components; ci++)
+    losslessd->predict_undifference[ci] = jpeg_undifference_first_row;
+
+  /* Set scaler function based on Pt */
+  if (cinfo->Al)
+    losslessd->scaler_scale = simple_upscale;
+  else
+    losslessd->scaler_scale = noscale;
+}
+
+
+/*
+ * Initialize the lossless decompressor.
+ */
+
+GLOBAL(void)
+_jinit_lossless_decompressor(j_decompress_ptr cinfo)
+{
+  lossless_decomp_ptr losslessd;
+
+  /* Create subobject in permanent pool */
+  losslessd = (lossless_decomp_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                sizeof(jpeg_lossless_decompressor));
+  cinfo->idct = (struct jpeg_inverse_dct *)losslessd;
+  losslessd->pub.start_pass = start_pass_lossless;
+}
+
+#endif /* D_LOSSLESS_SUPPORTED */
diff --git a/3rdparty/libjpeg-turbo/src/jdmainct.c b/3rdparty/libjpeg-turbo/src/jdmainct.c
index f466b259f0..c672b4baf5 100644
--- a/3rdparty/libjpeg-turbo/src/jdmainct.c
+++ b/3rdparty/libjpeg-turbo/src/jdmainct.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2016, D. R. Commander.
+ * Copyright (C) 2010, 2016, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -18,15 +18,17 @@
 
 #include "jinclude.h"
 #include "jdmainct.h"
-#include "jconfigint.h"
 
 
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
 /*
  * In the current system design, the main buffer need never be a full-image
- * buffer; any full-height buffers will be found inside the coefficient or
- * postprocessing controllers.  Nonetheless, the main controller is not
- * trivial.  Its responsibility is to provide context rows for upsampling/
- * rescaling, and doing this in an efficient fashion is a bit tricky.
+ * buffer; any full-height buffers will be found inside the coefficient,
+ * difference, or postprocessing controllers.  Nonetheless, the main controller
+ * is not trivial.  Its responsibility is to provide context rows for
+ * upsampling/rescaling, and doing this in an efficient fashion is a bit
+ * tricky.
  *
  * Postprocessor input data is counted in "row groups".  A row group
  * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
@@ -38,20 +40,20 @@
  * row group (times any additional scale factor that the upsampler is
  * applying).
  *
- * The coefficient controller will deliver data to us one iMCU row at a time;
- * each iMCU row contains v_samp_factor * DCT_scaled_size sample rows, or
- * exactly min_DCT_scaled_size row groups.  (This amount of data corresponds
- * to one row of MCUs when the image is fully interleaved.)  Note that the
- * number of sample rows varies across components, but the number of row
- * groups does not.  Some garbage sample rows may be included in the last iMCU
- * row at the bottom of the image.
+ * The coefficient or difference controller will deliver data to us one iMCU
+ * row at a time; each iMCU row contains v_samp_factor * DCT_scaled_size sample
+ * rows, or exactly min_DCT_scaled_size row groups.  (This amount of data
+ * corresponds to one row of MCUs when the image is fully interleaved.)  Note
+ * that the number of sample rows varies across components, but the number of
+ * row groups does not.  Some garbage sample rows may be included in the last
+ * iMCU row at the bottom of the image.
  *
  * Depending on the vertical scaling algorithm used, the upsampler may need
  * access to the sample row(s) above and below its current input row group.
  * The upsampler is required to set need_context_rows TRUE at global selection
  * time if so.  When need_context_rows is FALSE, this controller can simply
- * obtain one iMCU row at a time from the coefficient controller and dole it
- * out as row groups to the postprocessor.
+ * obtain one iMCU row at a time from the coefficient or difference controller
+ * and dole it out as row groups to the postprocessor.
  *
  * When need_context_rows is TRUE, this controller guarantees that the buffer
  * passed to postprocessing contains at least one row group's worth of samples
@@ -114,16 +116,16 @@
 
 /* Forward declarations */
 METHODDEF(void) process_data_simple_main(j_decompress_ptr cinfo,
-                                         JSAMPARRAY output_buf,
+                                         _JSAMPARRAY output_buf,
                                          JDIMENSION *out_row_ctr,
                                          JDIMENSION out_rows_avail);
 METHODDEF(void) process_data_context_main(j_decompress_ptr cinfo,
-                                          JSAMPARRAY output_buf,
+                                          _JSAMPARRAY output_buf,
                                           JDIMENSION *out_row_ctr,
                                           JDIMENSION out_rows_avail);
 #ifdef QUANT_2PASS_SUPPORTED
 METHODDEF(void) process_data_crank_post(j_decompress_ptr cinfo,
-                                        JSAMPARRAY output_buf,
+                                        _JSAMPARRAY output_buf,
                                         JDIMENSION *out_row_ctr,
                                         JDIMENSION out_rows_avail);
 #endif
@@ -139,14 +141,15 @@ alloc_funny_pointers(j_decompress_ptr cinfo)
   int ci, rgroup;
   int M = cinfo->_min_DCT_scaled_size;
   jpeg_component_info *compptr;
-  JSAMPARRAY xbuf;
+  _JSAMPARRAY xbuf;
 
   /* Get top-level space for component array pointers.
    * We alloc both arrays with one call to save a few cycles.
    */
-  main_ptr->xbuffer[0] = (JSAMPIMAGE)
+  main_ptr->xbuffer[0] = (_JSAMPIMAGE)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                cinfo->num_components * 2 * sizeof(JSAMPARRAY));
+                                cinfo->num_components * 2 *
+                                sizeof(_JSAMPARRAY));
   main_ptr->xbuffer[1] = main_ptr->xbuffer[0] + cinfo->num_components;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
@@ -156,9 +159,9 @@ alloc_funny_pointers(j_decompress_ptr cinfo)
     /* Get space for pointer lists --- M+4 row groups in each list.
      * We alloc both pointer lists with one call to save a few cycles.
      */
-    xbuf = (JSAMPARRAY)
+    xbuf = (_JSAMPARRAY)
       (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                  2 * (rgroup * (M + 4)) * sizeof(JSAMPROW));
+                                  2 * (rgroup * (M + 4)) * sizeof(_JSAMPROW));
     xbuf += rgroup;             /* want one row group at negative offsets */
     main_ptr->xbuffer[0][ci] = xbuf;
     xbuf += rgroup * (M + 4);
@@ -180,7 +183,7 @@ make_funny_pointers(j_decompress_ptr cinfo)
   int ci, i, rgroup;
   int M = cinfo->_min_DCT_scaled_size;
   jpeg_component_info *compptr;
-  JSAMPARRAY buf, xbuf0, xbuf1;
+  _JSAMPARRAY buf, xbuf0, xbuf1;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
@@ -220,7 +223,7 @@ set_bottom_pointers(j_decompress_ptr cinfo)
   my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
   int ci, i, rgroup, iMCUheight, rows_left;
   jpeg_component_info *compptr;
-  JSAMPARRAY xbuf;
+  _JSAMPARRAY xbuf;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
@@ -259,14 +262,14 @@ start_pass_main(j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
   switch (pass_mode) {
   case JBUF_PASS_THRU:
     if (cinfo->upsample->need_context_rows) {
-      main_ptr->pub.process_data = process_data_context_main;
+      main_ptr->pub._process_data = process_data_context_main;
       make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
       main_ptr->whichptr = 0;   /* Read first iMCU row into xbuffer[0] */
       main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
       main_ptr->iMCU_row_ctr = 0;
     } else {
       /* Simple case with no context needed */
-      main_ptr->pub.process_data = process_data_simple_main;
+      main_ptr->pub._process_data = process_data_simple_main;
     }
     main_ptr->buffer_full = FALSE;      /* Mark buffer empty */
     main_ptr->rowgroup_ctr = 0;
@@ -274,7 +277,7 @@ start_pass_main(j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
 #ifdef QUANT_2PASS_SUPPORTED
   case JBUF_CRANK_DEST:
     /* For last pass of 2-pass quantization, just crank the postprocessor */
-    main_ptr->pub.process_data = process_data_crank_post;
+    main_ptr->pub._process_data = process_data_crank_post;
     break;
 #endif
   default:
@@ -290,7 +293,7 @@ start_pass_main(j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
  */
 
 METHODDEF(void)
-process_data_simple_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+process_data_simple_main(j_decompress_ptr cinfo, _JSAMPARRAY output_buf,
                          JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 {
   my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
@@ -298,7 +301,7 @@ process_data_simple_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
 
   /* Read input data if we haven't filled the main buffer yet */
   if (!main_ptr->buffer_full) {
-    if (!(*cinfo->coef->decompress_data) (cinfo, main_ptr->buffer))
+    if (!(*cinfo->coef->_decompress_data) (cinfo, main_ptr->buffer))
       return;                   /* suspension forced, can do nothing more */
     main_ptr->buffer_full = TRUE;       /* OK, we have an iMCU row to work with */
   }
@@ -311,9 +314,9 @@ process_data_simple_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
    */
 
   /* Feed the postprocessor */
-  (*cinfo->post->post_process_data) (cinfo, main_ptr->buffer,
-                                     &main_ptr->rowgroup_ctr, rowgroups_avail,
-                                     output_buf, out_row_ctr, out_rows_avail);
+  (*cinfo->post->_post_process_data) (cinfo, main_ptr->buffer,
+                                      &main_ptr->rowgroup_ctr, rowgroups_avail,
+                                      output_buf, out_row_ctr, out_rows_avail);
 
   /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
   if (main_ptr->rowgroup_ctr >= rowgroups_avail) {
@@ -329,15 +332,15 @@ process_data_simple_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
  */
 
 METHODDEF(void)
-process_data_context_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+process_data_context_main(j_decompress_ptr cinfo, _JSAMPARRAY output_buf,
                           JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 {
   my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
 
   /* Read input data if we haven't filled the main buffer yet */
   if (!main_ptr->buffer_full) {
-    if (!(*cinfo->coef->decompress_data) (cinfo,
-                                          main_ptr->xbuffer[main_ptr->whichptr]))
+    if (!(*cinfo->coef->_decompress_data) (cinfo,
+                                           main_ptr->xbuffer[main_ptr->whichptr]))
       return;                   /* suspension forced, can do nothing more */
     main_ptr->buffer_full = TRUE;       /* OK, we have an iMCU row to work with */
     main_ptr->iMCU_row_ctr++;   /* count rows received */
@@ -351,11 +354,11 @@ process_data_context_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
   switch (main_ptr->context_state) {
   case CTX_POSTPONED_ROW:
     /* Call postprocessor using previously set pointers for postponed row */
-    (*cinfo->post->post_process_data) (cinfo,
-                                       main_ptr->xbuffer[main_ptr->whichptr],
-                                       &main_ptr->rowgroup_ctr,
-                                       main_ptr->rowgroups_avail, output_buf,
-                                       out_row_ctr, out_rows_avail);
+    (*cinfo->post->_post_process_data) (cinfo,
+                                        main_ptr->xbuffer[main_ptr->whichptr],
+                                        &main_ptr->rowgroup_ctr,
+                                        main_ptr->rowgroups_avail, output_buf,
+                                        out_row_ctr, out_rows_avail);
     if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
       return;                   /* Need to suspend */
     main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
@@ -375,11 +378,11 @@ process_data_context_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
     FALLTHROUGH                 /*FALLTHROUGH*/
   case CTX_PROCESS_IMCU:
     /* Call postprocessor using previously set pointers */
-    (*cinfo->post->post_process_data) (cinfo,
-                                       main_ptr->xbuffer[main_ptr->whichptr],
-                                       &main_ptr->rowgroup_ctr,
-                                       main_ptr->rowgroups_avail, output_buf,
-                                       out_row_ctr, out_rows_avail);
+    (*cinfo->post->_post_process_data) (cinfo,
+                                        main_ptr->xbuffer[main_ptr->whichptr],
+                                        &main_ptr->rowgroup_ctr,
+                                        main_ptr->rowgroups_avail, output_buf,
+                                        out_row_ctr, out_rows_avail);
     if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
       return;                   /* Need to suspend */
     /* After the first iMCU, change wraparound pointers to normal state */
@@ -406,12 +409,12 @@ process_data_context_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
 #ifdef QUANT_2PASS_SUPPORTED
 
 METHODDEF(void)
-process_data_crank_post(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+process_data_crank_post(j_decompress_ptr cinfo, _JSAMPARRAY output_buf,
                         JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 {
-  (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE)NULL,
-                                     (JDIMENSION *)NULL, (JDIMENSION)0,
-                                     output_buf, out_row_ctr, out_rows_avail);
+  (*cinfo->post->_post_process_data) (cinfo, (_JSAMPIMAGE)NULL,
+                                      (JDIMENSION *)NULL, (JDIMENSION)0,
+                                      output_buf, out_row_ctr, out_rows_avail);
 }
 
 #endif /* QUANT_2PASS_SUPPORTED */
@@ -422,12 +425,15 @@ process_data_crank_post(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
  */
 
 GLOBAL(void)
-jinit_d_main_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
+_jinit_d_main_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
 {
   my_main_ptr main_ptr;
   int ci, rgroup, ngroups;
   jpeg_component_info *compptr;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   main_ptr = (my_main_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(my_main_controller));
@@ -453,9 +459,11 @@ jinit_d_main_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
        ci++, compptr++) {
     rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
       cinfo->_min_DCT_scaled_size; /* height of a row group of component */
-    main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray)
+    main_ptr->buffer[ci] = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
                         ((j_common_ptr)cinfo, JPOOL_IMAGE,
                          compptr->width_in_blocks * compptr->_DCT_scaled_size,
                          (JDIMENSION)(rgroup * ngroups));
   }
 }
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
diff --git a/3rdparty/libjpeg-turbo/src/jdmainct.h b/3rdparty/libjpeg-turbo/src/jdmainct.h
index 37b201ca88..914ad11f69 100644
--- a/3rdparty/libjpeg-turbo/src/jdmainct.h
+++ b/3rdparty/libjpeg-turbo/src/jdmainct.h
@@ -3,22 +3,27 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  */
 
 #define JPEG_INTERNALS
 #include "jpeglib.h"
-#include "jpegcomp.h"
+#include "jpegapicomp.h"
+#include "jsamplecomp.h"
 
 
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
 /* Private buffer controller object */
 
 typedef struct {
   struct jpeg_d_main_controller pub; /* public fields */
 
   /* Pointer to allocated workspace (M or M+2 row groups). */
-  JSAMPARRAY buffer[MAX_COMPONENTS];
+  _JSAMPARRAY buffer[MAX_COMPONENTS];
 
   boolean buffer_full;          /* Have we gotten an iMCU row from decoder? */
   JDIMENSION rowgroup_ctr;      /* counts row groups output to postprocessor */
@@ -26,7 +31,7 @@ typedef struct {
   /* Remaining fields are only used in the context case. */
 
   /* These are the master pointers to the funny-order pointer lists. */
-  JSAMPIMAGE xbuffer[2];        /* pointers to weird pointer lists */
+  _JSAMPIMAGE xbuffer[2];       /* pointers to weird pointer lists */
 
   int whichptr;                 /* indicates which pointer set is now in use */
   int context_state;            /* process_data state machine status */
@@ -53,7 +58,7 @@ set_wraparound_pointers(j_decompress_ptr cinfo)
   int ci, i, rgroup;
   int M = cinfo->_min_DCT_scaled_size;
   jpeg_component_info *compptr;
-  JSAMPARRAY xbuf0, xbuf1;
+  _JSAMPARRAY xbuf0, xbuf1;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
@@ -69,3 +74,5 @@ set_wraparound_pointers(j_decompress_ptr cinfo)
     }
   }
 }
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
diff --git a/3rdparty/libjpeg-turbo/src/jdmarker.c b/3rdparty/libjpeg-turbo/src/jdmarker.c
index f7eba615fd..acd28ce62c 100644
--- a/3rdparty/libjpeg-turbo/src/jdmarker.c
+++ b/3rdparty/libjpeg-turbo/src/jdmarker.c
@@ -3,6 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2012, 2015, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
@@ -237,7 +239,8 @@ get_soi(j_decompress_ptr cinfo)
 
 
 LOCAL(boolean)
-get_sof(j_decompress_ptr cinfo, boolean is_prog, boolean is_arith)
+get_sof(j_decompress_ptr cinfo, boolean is_prog, boolean is_lossless,
+        boolean is_arith)
 /* Process a SOFn marker */
 {
   JLONG length;
@@ -246,6 +249,7 @@ get_sof(j_decompress_ptr cinfo, boolean is_prog, boolean is_arith)
   INPUT_VARS(cinfo);
 
   cinfo->progressive_mode = is_prog;
+  cinfo->master->lossless = is_lossless;
   cinfo->arith_code = is_arith;
 
   INPUT_2BYTES(cinfo, length, return FALSE);
@@ -990,32 +994,40 @@ read_markers(j_decompress_ptr cinfo)
 
     case M_SOF0:                /* Baseline */
     case M_SOF1:                /* Extended sequential, Huffman */
-      if (!get_sof(cinfo, FALSE, FALSE))
+      if (!get_sof(cinfo, FALSE, FALSE, FALSE))
         return JPEG_SUSPENDED;
       break;
 
     case M_SOF2:                /* Progressive, Huffman */
-      if (!get_sof(cinfo, TRUE, FALSE))
+      if (!get_sof(cinfo, TRUE, FALSE, FALSE))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF3:                /* Lossless, Huffman */
+      if (!get_sof(cinfo, FALSE, TRUE, FALSE))
         return JPEG_SUSPENDED;
       break;
 
     case M_SOF9:                /* Extended sequential, arithmetic */
-      if (!get_sof(cinfo, FALSE, TRUE))
+      if (!get_sof(cinfo, FALSE, FALSE, TRUE))
         return JPEG_SUSPENDED;
       break;
 
     case M_SOF10:               /* Progressive, arithmetic */
-      if (!get_sof(cinfo, TRUE, TRUE))
+      if (!get_sof(cinfo, TRUE, FALSE, TRUE))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF11:               /* Lossless, arithmetic */
+      if (!get_sof(cinfo, FALSE, TRUE, TRUE))
         return JPEG_SUSPENDED;
       break;
 
     /* Currently unsupported SOFn types */
-    case M_SOF3:                /* Lossless, Huffman */
     case M_SOF5:                /* Differential sequential, Huffman */
     case M_SOF6:                /* Differential progressive, Huffman */
     case M_SOF7:                /* Differential lossless, Huffman */
     case M_JPG:                 /* Reserved for JPEG extensions */
-    case M_SOF11:               /* Lossless, arithmetic */
     case M_SOF13:               /* Differential sequential, arithmetic */
     case M_SOF14:               /* Differential progressive, arithmetic */
     case M_SOF15:               /* Differential lossless, arithmetic */
diff --git a/3rdparty/libjpeg-turbo/src/jdmaster.c b/3rdparty/libjpeg-turbo/src/jdmaster.c
index a3690bf560..80a4842ac1 100644
--- a/3rdparty/libjpeg-turbo/src/jdmaster.c
+++ b/3rdparty/libjpeg-turbo/src/jdmaster.c
@@ -4,8 +4,10 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 2002-2009 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2016, 2019, 2022, D. R. Commander.
+ * Copyright (C) 2009-2011, 2016, 2019, 2022-2023, D. R. Commander.
  * Copyright (C) 2013, Linaro Limited.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
@@ -20,7 +22,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jpegcomp.h"
+#include "jpegapicomp.h"
 #include "jdmaster.h"
 
 
@@ -33,6 +35,9 @@ LOCAL(boolean)
 use_merged_upsample(j_decompress_ptr cinfo)
 {
 #ifdef UPSAMPLE_MERGING_SUPPORTED
+  /* Colorspace conversion is not supported with lossless JPEG images */
+  if (cinfo->master->lossless)
+    return FALSE;
   /* Merging is the equivalent of plain box-filter upsampling */
   if (cinfo->do_fancy_upsampling || cinfo->CCIR601_sampling)
     return FALSE;
@@ -97,154 +102,154 @@ jpeg_core_output_dimensions(j_decompress_ptr cinfo)
   int ci;
   jpeg_component_info *compptr;
 
-  /* Compute actual output image dimensions and DCT scaling choices. */
-  if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom) {
-    /* Provide 1/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 1;
-    cinfo->_min_DCT_v_scaled_size = 1;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 2) {
-    /* Provide 2/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 2L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 2L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 2;
-    cinfo->_min_DCT_v_scaled_size = 2;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 3) {
-    /* Provide 3/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 3L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 3L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 3;
-    cinfo->_min_DCT_v_scaled_size = 3;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 4) {
-    /* Provide 4/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 4L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 4L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 4;
-    cinfo->_min_DCT_v_scaled_size = 4;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 5) {
-    /* Provide 5/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 5L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 5L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 5;
-    cinfo->_min_DCT_v_scaled_size = 5;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 6) {
-    /* Provide 6/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 6L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 6L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 6;
-    cinfo->_min_DCT_v_scaled_size = 6;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 7) {
-    /* Provide 7/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 7L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 7L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 7;
-    cinfo->_min_DCT_v_scaled_size = 7;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 8) {
-    /* Provide 8/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 8L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 8L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 8;
-    cinfo->_min_DCT_v_scaled_size = 8;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 9) {
-    /* Provide 9/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 9L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 9L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 9;
-    cinfo->_min_DCT_v_scaled_size = 9;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 10) {
-    /* Provide 10/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 10L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 10L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 10;
-    cinfo->_min_DCT_v_scaled_size = 10;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 11) {
-    /* Provide 11/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 11L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 11L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 11;
-    cinfo->_min_DCT_v_scaled_size = 11;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 12) {
-    /* Provide 12/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 12L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 12L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 12;
-    cinfo->_min_DCT_v_scaled_size = 12;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 13) {
-    /* Provide 13/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 13L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 13L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 13;
-    cinfo->_min_DCT_v_scaled_size = 13;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 14) {
-    /* Provide 14/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 14L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 14L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 14;
-    cinfo->_min_DCT_v_scaled_size = 14;
-  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 15) {
-    /* Provide 15/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 15L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 15L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 15;
-    cinfo->_min_DCT_v_scaled_size = 15;
-  } else {
-    /* Provide 16/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width * 16L, (long)DCTSIZE);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height * 16L, (long)DCTSIZE);
-    cinfo->_min_DCT_h_scaled_size = 16;
-    cinfo->_min_DCT_v_scaled_size = 16;
+  if (!cinfo->master->lossless) {
+    /* Compute actual output image dimensions and DCT scaling choices. */
+    if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom) {
+      /* Provide 1/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 1;
+      cinfo->_min_DCT_v_scaled_size = 1;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 2) {
+      /* Provide 2/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 2L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 2L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 2;
+      cinfo->_min_DCT_v_scaled_size = 2;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 3) {
+      /* Provide 3/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 3L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 3L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 3;
+      cinfo->_min_DCT_v_scaled_size = 3;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 4) {
+      /* Provide 4/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 4L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 4L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 4;
+      cinfo->_min_DCT_v_scaled_size = 4;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 5) {
+      /* Provide 5/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 5L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 5L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 5;
+      cinfo->_min_DCT_v_scaled_size = 5;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 6) {
+      /* Provide 6/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 6L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 6L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 6;
+      cinfo->_min_DCT_v_scaled_size = 6;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 7) {
+      /* Provide 7/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 7L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 7L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 7;
+      cinfo->_min_DCT_v_scaled_size = 7;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 8) {
+      /* Provide 8/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 8L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 8L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 8;
+      cinfo->_min_DCT_v_scaled_size = 8;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 9) {
+      /* Provide 9/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 9L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 9L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 9;
+      cinfo->_min_DCT_v_scaled_size = 9;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 10) {
+      /* Provide 10/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 10L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 10L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 10;
+      cinfo->_min_DCT_v_scaled_size = 10;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 11) {
+      /* Provide 11/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 11L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 11L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 11;
+      cinfo->_min_DCT_v_scaled_size = 11;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 12) {
+      /* Provide 12/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 12L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 12L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 12;
+      cinfo->_min_DCT_v_scaled_size = 12;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 13) {
+      /* Provide 13/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 13L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 13L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 13;
+      cinfo->_min_DCT_v_scaled_size = 13;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 14) {
+      /* Provide 14/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 14L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 14L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 14;
+      cinfo->_min_DCT_v_scaled_size = 14;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 15) {
+      /* Provide 15/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 15L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 15L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 15;
+      cinfo->_min_DCT_v_scaled_size = 15;
+    } else {
+      /* Provide 16/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 16L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 16L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 16;
+      cinfo->_min_DCT_v_scaled_size = 16;
+    }
+
+    /* Recompute dimensions of components */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      compptr->_DCT_h_scaled_size = cinfo->_min_DCT_h_scaled_size;
+      compptr->_DCT_v_scaled_size = cinfo->_min_DCT_v_scaled_size;
+    }
+  } else
+#endif /* !IDCT_SCALING_SUPPORTED */
+  {
+    /* Hardwire it to "no scaling" */
+    cinfo->output_width = cinfo->image_width;
+    cinfo->output_height = cinfo->image_height;
+    /* jdinput.c has already initialized DCT_scaled_size,
+     * and has computed unscaled downsampled_width and downsampled_height.
+     */
   }
-
-  /* Recompute dimensions of components */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    compptr->_DCT_h_scaled_size = cinfo->_min_DCT_h_scaled_size;
-    compptr->_DCT_v_scaled_size = cinfo->_min_DCT_v_scaled_size;
-  }
-
-#else /* !IDCT_SCALING_SUPPORTED */
-
-  /* Hardwire it to "no scaling" */
-  cinfo->output_width = cinfo->image_width;
-  cinfo->output_height = cinfo->image_height;
-  /* jdinput.c has already initialized DCT_scaled_size,
-   * and has computed unscaled downsampled_width and downsampled_height.
-   */
-
-#endif /* IDCT_SCALING_SUPPORTED */
 }
 
 
@@ -273,54 +278,56 @@ jpeg_calc_output_dimensions(j_decompress_ptr cinfo)
 
 #ifdef IDCT_SCALING_SUPPORTED
 
-  /* In selecting the actual DCT scaling for each component, we try to
-   * scale up the chroma components via IDCT scaling rather than upsampling.
-   * This saves time if the upsampler gets to use 1:1 scaling.
-   * Note this code adapts subsampling ratios which are powers of 2.
-   */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    int ssize = cinfo->_min_DCT_scaled_size;
-    while (ssize < DCTSIZE &&
-           ((cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) %
-            (compptr->h_samp_factor * ssize * 2) == 0) &&
-           ((cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size) %
-            (compptr->v_samp_factor * ssize * 2) == 0)) {
-      ssize = ssize * 2;
-    }
+  if (!cinfo->master->lossless) {
+    /* In selecting the actual DCT scaling for each component, we try to
+     * scale up the chroma components via IDCT scaling rather than upsampling.
+     * This saves time if the upsampler gets to use 1:1 scaling.
+     * Note this code adapts subsampling ratios which are powers of 2.
+     */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      int ssize = cinfo->_min_DCT_scaled_size;
+      while (ssize < DCTSIZE &&
+             ((cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) %
+              (compptr->h_samp_factor * ssize * 2) == 0) &&
+             ((cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size) %
+              (compptr->v_samp_factor * ssize * 2) == 0)) {
+        ssize = ssize * 2;
+      }
 #if JPEG_LIB_VERSION >= 70
-    compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = ssize;
+      compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = ssize;
 #else
-    compptr->DCT_scaled_size = ssize;
+      compptr->DCT_scaled_size = ssize;
 #endif
-  }
-
-  /* Recompute downsampled dimensions of components;
-   * application needs to know these if using raw downsampled data.
-   */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Size in samples, after IDCT scaling */
-    compptr->downsampled_width = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_width *
-                    (long)(compptr->h_samp_factor * compptr->_DCT_scaled_size),
-                    (long)(cinfo->max_h_samp_factor * DCTSIZE));
-    compptr->downsampled_height = (JDIMENSION)
-      jdiv_round_up((long)cinfo->image_height *
-                    (long)(compptr->v_samp_factor * compptr->_DCT_scaled_size),
-                    (long)(cinfo->max_v_samp_factor * DCTSIZE));
-  }
-
-#else /* !IDCT_SCALING_SUPPORTED */
-
-  /* Hardwire it to "no scaling" */
-  cinfo->output_width = cinfo->image_width;
-  cinfo->output_height = cinfo->image_height;
-  /* jdinput.c has already initialized DCT_scaled_size to DCTSIZE,
-   * and has computed unscaled downsampled_width and downsampled_height.
-   */
+    }
 
+    /* Recompute downsampled dimensions of components;
+     * application needs to know these if using raw downsampled data.
+     */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      /* Size in samples, after IDCT scaling */
+      compptr->downsampled_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width *
+                      (long)(compptr->h_samp_factor *
+                             compptr->_DCT_scaled_size),
+                      (long)(cinfo->max_h_samp_factor * DCTSIZE));
+      compptr->downsampled_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height *
+                      (long)(compptr->v_samp_factor *
+                             compptr->_DCT_scaled_size),
+                      (long)(cinfo->max_v_samp_factor * DCTSIZE));
+    }
+  } else
 #endif /* IDCT_SCALING_SUPPORTED */
+  {
+    /* Hardwire it to "no scaling" */
+    cinfo->output_width = cinfo->image_width;
+    cinfo->output_height = cinfo->image_height;
+    /* jdinput.c has already initialized DCT_scaled_size to DCTSIZE,
+     * and has computed unscaled downsampled_width and downsampled_height.
+     */
+  }
 
   /* Report number of components in selected colorspace. */
   /* Probably this should be in the color conversion module... */
@@ -409,27 +416,83 @@ prepare_range_limit_table(j_decompress_ptr cinfo)
 /* Allocate and fill in the sample_range_limit table */
 {
   JSAMPLE *table;
+  J12SAMPLE *table12;
+#ifdef D_LOSSLESS_SUPPORTED
+  J16SAMPLE *table16;
+#endif
   int i;
 
-  table = (JSAMPLE *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                (5 * (MAXJSAMPLE + 1) + CENTERJSAMPLE) * sizeof(JSAMPLE));
-  table += (MAXJSAMPLE + 1);    /* allow negative subscripts of simple table */
-  cinfo->sample_range_limit = table;
-  /* First segment of "simple" table: limit[x] = 0 for x < 0 */
-  memset(table - (MAXJSAMPLE + 1), 0, (MAXJSAMPLE + 1) * sizeof(JSAMPLE));
-  /* Main part of "simple" table: limit[x] = x */
-  for (i = 0; i <= MAXJSAMPLE; i++)
-    table[i] = (JSAMPLE)i;
-  table += CENTERJSAMPLE;       /* Point to where post-IDCT table starts */
-  /* End of simple table, rest of first half of post-IDCT table */
-  for (i = CENTERJSAMPLE; i < 2 * (MAXJSAMPLE + 1); i++)
-    table[i] = MAXJSAMPLE;
-  /* Second half of post-IDCT table */
-  memset(table + (2 * (MAXJSAMPLE + 1)), 0,
-         (2 * (MAXJSAMPLE + 1) - CENTERJSAMPLE) * sizeof(JSAMPLE));
-  memcpy(table + (4 * (MAXJSAMPLE + 1) - CENTERJSAMPLE),
-         cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE));
+  if (cinfo->data_precision == 16) {
+#ifdef D_LOSSLESS_SUPPORTED
+    table16 = (J16SAMPLE *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                  (5 * (MAXJ16SAMPLE + 1) + CENTERJ16SAMPLE) *
+                  sizeof(J16SAMPLE));
+    table16 += (MAXJ16SAMPLE + 1);  /* allow negative subscripts of simple
+                                       table */
+    cinfo->sample_range_limit = (JSAMPLE *)table16;
+    /* First segment of "simple" table: limit[x] = 0 for x < 0 */
+    memset(table16 - (MAXJ16SAMPLE + 1), 0,
+           (MAXJ16SAMPLE + 1) * sizeof(J16SAMPLE));
+    /* Main part of "simple" table: limit[x] = x */
+    for (i = 0; i <= MAXJ16SAMPLE; i++)
+      table16[i] = (J16SAMPLE)i;
+    table16 += CENTERJ16SAMPLE; /* Point to where post-IDCT table starts */
+    /* End of simple table, rest of first half of post-IDCT table */
+    for (i = CENTERJ16SAMPLE; i < 2 * (MAXJ16SAMPLE + 1); i++)
+      table16[i] = MAXJ16SAMPLE;
+    /* Second half of post-IDCT table */
+    memset(table16 + (2 * (MAXJ16SAMPLE + 1)), 0,
+           (2 * (MAXJ16SAMPLE + 1) - CENTERJ16SAMPLE) * sizeof(J16SAMPLE));
+    memcpy(table16 + (4 * (MAXJ16SAMPLE + 1) - CENTERJ16SAMPLE),
+           cinfo->sample_range_limit, CENTERJ16SAMPLE * sizeof(J16SAMPLE));
+#else
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+  } else if (cinfo->data_precision == 12) {
+    table12 = (J12SAMPLE *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                  (5 * (MAXJ12SAMPLE + 1) + CENTERJ12SAMPLE) *
+                  sizeof(J12SAMPLE));
+    table12 += (MAXJ12SAMPLE + 1);  /* allow negative subscripts of simple
+                                       table */
+    cinfo->sample_range_limit = (JSAMPLE *)table12;
+    /* First segment of "simple" table: limit[x] = 0 for x < 0 */
+    memset(table12 - (MAXJ12SAMPLE + 1), 0,
+           (MAXJ12SAMPLE + 1) * sizeof(J12SAMPLE));
+    /* Main part of "simple" table: limit[x] = x */
+    for (i = 0; i <= MAXJ12SAMPLE; i++)
+      table12[i] = (J12SAMPLE)i;
+    table12 += CENTERJ12SAMPLE; /* Point to where post-IDCT table starts */
+    /* End of simple table, rest of first half of post-IDCT table */
+    for (i = CENTERJ12SAMPLE; i < 2 * (MAXJ12SAMPLE + 1); i++)
+      table12[i] = MAXJ12SAMPLE;
+    /* Second half of post-IDCT table */
+    memset(table12 + (2 * (MAXJ12SAMPLE + 1)), 0,
+           (2 * (MAXJ12SAMPLE + 1) - CENTERJ12SAMPLE) * sizeof(J12SAMPLE));
+    memcpy(table12 + (4 * (MAXJ12SAMPLE + 1) - CENTERJ12SAMPLE),
+           cinfo->sample_range_limit, CENTERJ12SAMPLE * sizeof(J12SAMPLE));
+  } else {
+    table = (JSAMPLE *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                  (5 * (MAXJSAMPLE + 1) + CENTERJSAMPLE) * sizeof(JSAMPLE));
+    table += (MAXJSAMPLE + 1);  /* allow negative subscripts of simple table */
+    cinfo->sample_range_limit = table;
+    /* First segment of "simple" table: limit[x] = 0 for x < 0 */
+    memset(table - (MAXJSAMPLE + 1), 0, (MAXJSAMPLE + 1) * sizeof(JSAMPLE));
+    /* Main part of "simple" table: limit[x] = x */
+    for (i = 0; i <= MAXJSAMPLE; i++)
+      table[i] = (JSAMPLE)i;
+    table += CENTERJSAMPLE;     /* Point to where post-IDCT table starts */
+    /* End of simple table, rest of first half of post-IDCT table */
+    for (i = CENTERJSAMPLE; i < 2 * (MAXJSAMPLE + 1); i++)
+      table[i] = MAXJSAMPLE;
+    /* Second half of post-IDCT table */
+    memset(table + (2 * (MAXJSAMPLE + 1)), 0,
+           (2 * (MAXJSAMPLE + 1) - CENTERJSAMPLE) * sizeof(JSAMPLE));
+    memcpy(table + (4 * (MAXJSAMPLE + 1) - CENTERJSAMPLE),
+           cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE));
+  }
 }
 
 
@@ -452,6 +515,17 @@ master_selection(j_decompress_ptr cinfo)
   long samplesperrow;
   JDIMENSION jd_samplesperrow;
 
+  /* Disable IDCT scaling and raw (downsampled) data output in lossless mode.
+   * IDCT scaling is not useful in lossless mode, and it must be disabled in
+   * order to properly calculate the output dimensions.  Raw data output isn't
+   * particularly useful without subsampling and has not been tested in
+   * lossless mode.
+   */
+  if (cinfo->master->lossless) {
+    cinfo->raw_data_out = FALSE;
+    cinfo->scale_num = cinfo->scale_denom = 1;
+  }
+
   /* Initialize dimensions and other stuff */
   jpeg_calc_output_dimensions(cinfo);
   prepare_range_limit_table(cinfo);
@@ -480,7 +554,8 @@ master_selection(j_decompress_ptr cinfo)
     if (cinfo->raw_data_out)
       ERREXIT(cinfo, JERR_NOTIMPL);
     /* 2-pass quantizer only works in 3-component color space. */
-    if (cinfo->out_color_components != 3) {
+    if (cinfo->out_color_components != 3 ||
+        cinfo->out_color_space == JCS_RGB565) {
       cinfo->enable_1pass_quant = TRUE;
       cinfo->enable_external_quant = FALSE;
       cinfo->enable_2pass_quant = FALSE;
@@ -495,7 +570,12 @@ master_selection(j_decompress_ptr cinfo)
 
     if (cinfo->enable_1pass_quant) {
 #ifdef QUANT_1PASS_SUPPORTED
-      jinit_1pass_quantizer(cinfo);
+      if (cinfo->data_precision == 16)
+        ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+      else if (cinfo->data_precision == 12)
+        j12init_1pass_quantizer(cinfo);
+      else
+        jinit_1pass_quantizer(cinfo);
       master->quantizer_1pass = cinfo->cquantize;
 #else
       ERREXIT(cinfo, JERR_NOT_COMPILED);
@@ -505,7 +585,12 @@ master_selection(j_decompress_ptr cinfo)
     /* We use the 2-pass code to map to external colormaps. */
     if (cinfo->enable_2pass_quant || cinfo->enable_external_quant) {
 #ifdef QUANT_2PASS_SUPPORTED
-      jinit_2pass_quantizer(cinfo);
+      if (cinfo->data_precision == 16)
+        ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+      else if (cinfo->data_precision == 12)
+        j12init_2pass_quantizer(cinfo);
+      else
+        jinit_2pass_quantizer(cinfo);
       master->quantizer_2pass = cinfo->cquantize;
 #else
       ERREXIT(cinfo, JERR_NOT_COMPILED);
@@ -520,42 +605,122 @@ master_selection(j_decompress_ptr cinfo)
   if (!cinfo->raw_data_out) {
     if (master->using_merged_upsample) {
 #ifdef UPSAMPLE_MERGING_SUPPORTED
-      jinit_merged_upsampler(cinfo); /* does color conversion too */
+      if (cinfo->data_precision == 16)
+        ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+      else if (cinfo->data_precision == 12)
+        j12init_merged_upsampler(cinfo); /* does color conversion too */
+      else
+        jinit_merged_upsampler(cinfo); /* does color conversion too */
 #else
       ERREXIT(cinfo, JERR_NOT_COMPILED);
 #endif
     } else {
-      jinit_color_deconverter(cinfo);
-      jinit_upsampler(cinfo);
-    }
-    jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
-  }
-  /* Inverse DCT */
-  jinit_inverse_dct(cinfo);
-  /* Entropy decoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code) {
-#ifdef D_ARITH_CODING_SUPPORTED
-    jinit_arith_decoder(cinfo);
+      if (cinfo->data_precision == 16) {
+#ifdef D_LOSSLESS_SUPPORTED
+        j16init_color_deconverter(cinfo);
+        j16init_upsampler(cinfo);
 #else
-    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+        ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+      } else if (cinfo->data_precision == 12) {
+        j12init_color_deconverter(cinfo);
+        j12init_upsampler(cinfo);
+      } else {
+        jinit_color_deconverter(cinfo);
+        jinit_upsampler(cinfo);
+      }
+    }
+    if (cinfo->data_precision == 16)
+#ifdef D_LOSSLESS_SUPPORTED
+      j16init_d_post_controller(cinfo, cinfo->enable_2pass_quant);
+#else
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+    else if (cinfo->data_precision == 12)
+      j12init_d_post_controller(cinfo, cinfo->enable_2pass_quant);
+    else
+      jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
+  }
+
+  if (cinfo->master->lossless) {
+#ifdef D_LOSSLESS_SUPPORTED
+    /* Prediction, sample undifferencing, point transform, and sample size
+     * scaling
+     */
+    if (cinfo->data_precision == 16)
+      j16init_lossless_decompressor(cinfo);
+    else if (cinfo->data_precision == 12)
+      j12init_lossless_decompressor(cinfo);
+    else
+      jinit_lossless_decompressor(cinfo);
+    /* Entropy decoding: either Huffman or arithmetic coding. */
+    if (cinfo->arith_code) {
+      ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+    } else {
+      jinit_lhuff_decoder(cinfo);
+    }
+
+    /* Initialize principal buffer controllers. */
+    use_c_buffer = cinfo->inputctl->has_multiple_scans ||
+                   cinfo->buffered_image;
+    if (cinfo->data_precision == 16)
+      j16init_d_diff_controller(cinfo, use_c_buffer);
+    else if (cinfo->data_precision == 12)
+      j12init_d_diff_controller(cinfo, use_c_buffer);
+    else
+      jinit_d_diff_controller(cinfo, use_c_buffer);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
 #endif
   } else {
-    if (cinfo->progressive_mode) {
-#ifdef D_PROGRESSIVE_SUPPORTED
-      jinit_phuff_decoder(cinfo);
+    if (cinfo->data_precision == 16)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+    /* Inverse DCT */
+    if (cinfo->data_precision == 12)
+      j12init_inverse_dct(cinfo);
+    else
+      jinit_inverse_dct(cinfo);
+    /* Entropy decoding: either Huffman or arithmetic coding. */
+    if (cinfo->arith_code) {
+#ifdef D_ARITH_CODING_SUPPORTED
+      jinit_arith_decoder(cinfo);
 #else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
 #endif
-    } else
-      jinit_huff_decoder(cinfo);
+    } else {
+      if (cinfo->progressive_mode) {
+#ifdef D_PROGRESSIVE_SUPPORTED
+        jinit_phuff_decoder(cinfo);
+#else
+        ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+      } else
+        jinit_huff_decoder(cinfo);
+    }
+
+    /* Initialize principal buffer controllers. */
+    use_c_buffer = cinfo->inputctl->has_multiple_scans ||
+                   cinfo->buffered_image;
+    if (cinfo->data_precision == 12)
+      j12init_d_coef_controller(cinfo, use_c_buffer);
+    else
+      jinit_d_coef_controller(cinfo, use_c_buffer);
   }
 
-  /* Initialize principal buffer controllers. */
-  use_c_buffer = cinfo->inputctl->has_multiple_scans || cinfo->buffered_image;
-  jinit_d_coef_controller(cinfo, use_c_buffer);
-
-  if (!cinfo->raw_data_out)
-    jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
+  if (!cinfo->raw_data_out) {
+    if (cinfo->data_precision == 16)
+#ifdef D_LOSSLESS_SUPPORTED
+      j16init_d_main_controller(cinfo,
+                                FALSE /* never need full buffer here */);
+#else
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+    else if (cinfo->data_precision == 12)
+      j12init_d_main_controller(cinfo,
+                                FALSE /* never need full buffer here */);
+    else
+      jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
+  }
 
   /* We can now tell the memory manager to allocate virtual arrays. */
   (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo);
diff --git a/3rdparty/libjpeg-turbo/src/jdmerge.c b/3rdparty/libjpeg-turbo/src/jdmerge.c
index 3a456d6581..49f2006fc0 100644
--- a/3rdparty/libjpeg-turbo/src/jdmerge.c
+++ b/3rdparty/libjpeg-turbo/src/jdmerge.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009, 2011, 2014-2015, 2020, D. R. Commander.
+ * Copyright (C) 2009, 2011, 2014-2015, 2020, 2022, D. R. Commander.
  * Copyright (C) 2013, Linaro Limited.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -42,7 +42,6 @@
 #include "jpeglib.h"
 #include "jdmerge.h"
 #include "jsimd.h"
-#include "jconfigint.h"
 
 #ifdef UPSAMPLE_MERGING_SUPPORTED
 
@@ -168,20 +167,20 @@ build_ycc_rgb_table(j_decompress_ptr cinfo)
 
   upsample->Cr_r_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                (MAXJSAMPLE + 1) * sizeof(int));
+                                (_MAXJSAMPLE + 1) * sizeof(int));
   upsample->Cb_b_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                (MAXJSAMPLE + 1) * sizeof(int));
+                                (_MAXJSAMPLE + 1) * sizeof(int));
   upsample->Cr_g_tab = (JLONG *)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                (MAXJSAMPLE + 1) * sizeof(JLONG));
+                                (_MAXJSAMPLE + 1) * sizeof(JLONG));
   upsample->Cb_g_tab = (JLONG *)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                (MAXJSAMPLE + 1) * sizeof(JLONG));
+                                (_MAXJSAMPLE + 1) * sizeof(JLONG));
 
-  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
-    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
-    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+  for (i = 0, x = -_CENTERJSAMPLE; i <= _MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0.._MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - _CENTERJSAMPLE */
     /* Cr=>R value is nearest int to 1.40200 * x */
     upsample->Cr_r_tab[i] = (int)
                     RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
@@ -220,14 +219,14 @@ start_pass_merged_upsample(j_decompress_ptr cinfo)
  */
 
 METHODDEF(void)
-merged_2v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+merged_2v_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
                    JDIMENSION *in_row_group_ctr,
-                   JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
+                   JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
                    JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 /* 2:1 vertical sampling case: may need a spare row. */
 {
   my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
-  JSAMPROW work_ptrs[2];
+  _JSAMPROW work_ptrs[2];
   JDIMENSION num_rows;          /* number of rows returned to caller */
 
   if (upsample->spare_full) {
@@ -235,8 +234,8 @@ merged_2v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     JDIMENSION size = upsample->out_row_width;
     if (cinfo->out_color_space == JCS_RGB565)
       size = cinfo->output_width * 2;
-    jcopy_sample_rows(&upsample->spare_row, 0, output_buf + *out_row_ctr, 0, 1,
-                      size);
+    _jcopy_sample_rows(&upsample->spare_row, 0, output_buf + *out_row_ctr, 0,
+                       1, size);
     num_rows = 1;
     upsample->spare_full = FALSE;
   } else {
@@ -271,9 +270,9 @@ merged_2v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 
 METHODDEF(void)
-merged_1v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+merged_1v_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
                    JDIMENSION *in_row_group_ctr,
-                   JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
+                   JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
                    JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 /* 1:1 vertical sampling case: much easier, never need a spare row. */
 {
@@ -303,8 +302,8 @@ merged_1v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  */
 
 METHODDEF(void)
-h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                     JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
+h2v1_merged_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                     JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
 {
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
@@ -348,8 +347,8 @@ h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  */
 
 METHODDEF(void)
-h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                     JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
+h2v2_merged_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                     JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
 {
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
@@ -475,8 +474,8 @@ static INLINE boolean is_big_endian(void)
 
 
 METHODDEF(void)
-h2v1_merged_upsample_565(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
+h2v1_merged_upsample_565(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                         JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
 {
   if (is_big_endian())
     h2v1_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr,
@@ -488,8 +487,8 @@ h2v1_merged_upsample_565(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 
 METHODDEF(void)
-h2v1_merged_upsample_565D(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                          JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
+h2v1_merged_upsample_565D(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                          JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
 {
   if (is_big_endian())
     h2v1_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr,
@@ -501,8 +500,8 @@ h2v1_merged_upsample_565D(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 
 METHODDEF(void)
-h2v2_merged_upsample_565(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
+h2v2_merged_upsample_565(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                         JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
 {
   if (is_big_endian())
     h2v2_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr,
@@ -514,8 +513,8 @@ h2v2_merged_upsample_565(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 
 METHODDEF(void)
-h2v2_merged_upsample_565D(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                          JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
+h2v2_merged_upsample_565D(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                          JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
 {
   if (is_big_endian())
     h2v2_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr,
@@ -535,10 +534,13 @@ h2v2_merged_upsample_565D(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  */
 
 GLOBAL(void)
-jinit_merged_upsampler(j_decompress_ptr cinfo)
+_jinit_merged_upsampler(j_decompress_ptr cinfo)
 {
   my_merged_upsample_ptr upsample;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   upsample = (my_merged_upsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(my_merged_upsampler));
@@ -549,10 +551,12 @@ jinit_merged_upsampler(j_decompress_ptr cinfo)
   upsample->out_row_width = cinfo->output_width * cinfo->out_color_components;
 
   if (cinfo->max_v_samp_factor == 2) {
-    upsample->pub.upsample = merged_2v_upsample;
+    upsample->pub._upsample = merged_2v_upsample;
+#ifdef WITH_SIMD
     if (jsimd_can_h2v2_merged_upsample())
       upsample->upmethod = jsimd_h2v2_merged_upsample;
     else
+#endif
       upsample->upmethod = h2v2_merged_upsample;
     if (cinfo->out_color_space == JCS_RGB565) {
       if (cinfo->dither_mode != JDITHER_NONE) {
@@ -562,14 +566,16 @@ jinit_merged_upsampler(j_decompress_ptr cinfo)
       }
     }
     /* Allocate a spare row buffer */
-    upsample->spare_row = (JSAMPROW)
+    upsample->spare_row = (_JSAMPROW)
       (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                (size_t)(upsample->out_row_width * sizeof(JSAMPLE)));
+                (size_t)(upsample->out_row_width * sizeof(_JSAMPLE)));
   } else {
-    upsample->pub.upsample = merged_1v_upsample;
+    upsample->pub._upsample = merged_1v_upsample;
+#ifdef WITH_SIMD
     if (jsimd_can_h2v1_merged_upsample())
       upsample->upmethod = jsimd_h2v1_merged_upsample;
     else
+#endif
       upsample->upmethod = h2v1_merged_upsample;
     if (cinfo->out_color_space == JCS_RGB565) {
       if (cinfo->dither_mode != JDITHER_NONE) {
diff --git a/3rdparty/libjpeg-turbo/src/jdmerge.h b/3rdparty/libjpeg-turbo/src/jdmerge.h
index b583396b10..73cbd60549 100644
--- a/3rdparty/libjpeg-turbo/src/jdmerge.h
+++ b/3rdparty/libjpeg-turbo/src/jdmerge.h
@@ -4,13 +4,14 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2020, D. R. Commander.
+ * Copyright (C) 2020, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  */
 
 #define JPEG_INTERNALS
 #include "jpeglib.h"
+#include "jsamplecomp.h"
 
 #ifdef UPSAMPLE_MERGING_SUPPORTED
 
@@ -21,8 +22,8 @@ typedef struct {
   struct jpeg_upsampler pub;    /* public fields */
 
   /* Pointer to routine to do actual upsampling/conversion of one row group */
-  void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                    JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+  void (*upmethod) (j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                    JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf);
 
   /* Private state for YCC->RGB conversion */
   int *Cr_r_tab;                /* => table for Cr to R conversion */
@@ -35,7 +36,7 @@ typedef struct {
    * application provides just a one-row buffer; we also use the spare
    * to discard the dummy last row if the image height is odd.
    */
-  JSAMPROW spare_row;
+  _JSAMPROW spare_row;
   boolean spare_full;           /* T if spare buffer is occupied */
 
   JDIMENSION out_row_width;     /* samples per output row */
diff --git a/3rdparty/libjpeg-turbo/src/jdmrg565.c b/3rdparty/libjpeg-turbo/src/jdmrg565.c
index 980a4e216e..0c719b912c 100644
--- a/3rdparty/libjpeg-turbo/src/jdmrg565.c
+++ b/3rdparty/libjpeg-turbo/src/jdmrg565.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2013, Linaro Limited.
- * Copyright (C) 2014-2015, 2018, 2020, D. R. Commander.
+ * Copyright (C) 2014-2015, 2018, 2020, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -15,18 +15,19 @@
 
 INLINE
 LOCAL(void)
-h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo,
+                                  _JSAMPIMAGE input_buf,
                                   JDIMENSION in_row_group_ctr,
-                                  JSAMPARRAY output_buf)
+                                  _JSAMPARRAY output_buf)
 {
   my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
-  register JSAMPROW outptr;
-  JSAMPROW inptr0, inptr1, inptr2;
+  register _JSAMPROW outptr;
+  _JSAMPROW inptr0, inptr1, inptr2;
   JDIMENSION col;
   /* copy these pointers into registers if possible */
-  register JSAMPLE *range_limit = cinfo->sample_range_limit;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   int *Crrtab = upsample->Cr_r_tab;
   int *Cbbtab = upsample->Cb_b_tab;
   JLONG *Crgtab = upsample->Cr_g_tab;
@@ -86,18 +87,18 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 INLINE
 LOCAL(void)
 h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
-                                   JSAMPIMAGE input_buf,
+                                   _JSAMPIMAGE input_buf,
                                    JDIMENSION in_row_group_ctr,
-                                   JSAMPARRAY output_buf)
+                                   _JSAMPARRAY output_buf)
 {
   my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
-  register JSAMPROW outptr;
-  JSAMPROW inptr0, inptr1, inptr2;
+  register _JSAMPROW outptr;
+  _JSAMPROW inptr0, inptr1, inptr2;
   JDIMENSION col;
   /* copy these pointers into registers if possible */
-  register JSAMPLE *range_limit = cinfo->sample_range_limit;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   int *Crrtab = upsample->Cr_r_tab;
   int *Cbbtab = upsample->Cb_b_tab;
   JLONG *Crgtab = upsample->Cr_g_tab;
@@ -159,18 +160,18 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
 
 INLINE
 LOCAL(void)
-h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
                                   JDIMENSION in_row_group_ctr,
-                                  JSAMPARRAY output_buf)
+                                  _JSAMPARRAY output_buf)
 {
   my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
-  register JSAMPROW outptr0, outptr1;
-  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  register _JSAMPROW outptr0, outptr1;
+  _JSAMPROW inptr00, inptr01, inptr1, inptr2;
   JDIMENSION col;
   /* copy these pointers into registers if possible */
-  register JSAMPLE *range_limit = cinfo->sample_range_limit;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   int *Crrtab = upsample->Cr_r_tab;
   int *Cbbtab = upsample->Cb_b_tab;
   JLONG *Crgtab = upsample->Cr_g_tab;
@@ -255,18 +256,18 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 INLINE
 LOCAL(void)
 h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
-                                   JSAMPIMAGE input_buf,
+                                   _JSAMPIMAGE input_buf,
                                    JDIMENSION in_row_group_ctr,
-                                   JSAMPARRAY output_buf)
+                                   _JSAMPARRAY output_buf)
 {
   my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
-  register JSAMPROW outptr0, outptr1;
-  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  register _JSAMPROW outptr0, outptr1;
+  _JSAMPROW inptr00, inptr01, inptr1, inptr2;
   JDIMENSION col;
   /* copy these pointers into registers if possible */
-  register JSAMPLE *range_limit = cinfo->sample_range_limit;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   int *Crrtab = upsample->Cr_r_tab;
   int *Cbbtab = upsample->Cb_b_tab;
   JLONG *Crgtab = upsample->Cr_g_tab;
diff --git a/3rdparty/libjpeg-turbo/src/jdmrgext.c b/3rdparty/libjpeg-turbo/src/jdmrgext.c
index 9bf4f1a307..8139e0a3ed 100644
--- a/3rdparty/libjpeg-turbo/src/jdmrgext.c
+++ b/3rdparty/libjpeg-turbo/src/jdmrgext.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2011, 2015, 2020, D. R. Commander.
+ * Copyright (C) 2011, 2015, 2020, 2022-2023, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -21,18 +21,18 @@
 
 INLINE
 LOCAL(void)
-h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+h2v1_merged_upsample_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
                               JDIMENSION in_row_group_ctr,
-                              JSAMPARRAY output_buf)
+                              _JSAMPARRAY output_buf)
 {
   my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
-  register JSAMPROW outptr;
-  JSAMPROW inptr0, inptr1, inptr2;
+  register _JSAMPROW outptr;
+  _JSAMPROW inptr0, inptr1, inptr2;
   JDIMENSION col;
   /* copy these pointers into registers if possible */
-  register JSAMPLE *range_limit = cinfo->sample_range_limit;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   int *Crrtab = upsample->Cr_r_tab;
   int *Cbbtab = upsample->Cb_b_tab;
   JLONG *Crgtab = upsample->Cr_g_tab;
@@ -57,7 +57,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr[RGB_GREEN] = range_limit[y + cgreen];
     outptr[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr[RGB_ALPHA] = 0xFF;
+    outptr[RGB_ALPHA] = _MAXJSAMPLE;
 #endif
     outptr += RGB_PIXELSIZE;
     y  = *inptr0++;
@@ -65,7 +65,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr[RGB_GREEN] = range_limit[y + cgreen];
     outptr[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr[RGB_ALPHA] = 0xFF;
+    outptr[RGB_ALPHA] = _MAXJSAMPLE;
 #endif
     outptr += RGB_PIXELSIZE;
   }
@@ -81,7 +81,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr[RGB_GREEN] = range_limit[y + cgreen];
     outptr[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr[RGB_ALPHA] = 0xFF;
+    outptr[RGB_ALPHA] = _MAXJSAMPLE;
 #endif
   }
 }
@@ -93,18 +93,18 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 INLINE
 LOCAL(void)
-h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+h2v2_merged_upsample_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
                               JDIMENSION in_row_group_ctr,
-                              JSAMPARRAY output_buf)
+                              _JSAMPARRAY output_buf)
 {
   my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
   register int y, cred, cgreen, cblue;
   int cb, cr;
-  register JSAMPROW outptr0, outptr1;
-  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  register _JSAMPROW outptr0, outptr1;
+  _JSAMPROW inptr00, inptr01, inptr1, inptr2;
   JDIMENSION col;
   /* copy these pointers into registers if possible */
-  register JSAMPLE *range_limit = cinfo->sample_range_limit;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   int *Crrtab = upsample->Cr_r_tab;
   int *Cbbtab = upsample->Cb_b_tab;
   JLONG *Crgtab = upsample->Cr_g_tab;
@@ -131,7 +131,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr0[RGB_GREEN] = range_limit[y + cgreen];
     outptr0[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr0[RGB_ALPHA] = 0xFF;
+    outptr0[RGB_ALPHA] = _MAXJSAMPLE;
 #endif
     outptr0 += RGB_PIXELSIZE;
     y  = *inptr00++;
@@ -139,7 +139,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr0[RGB_GREEN] = range_limit[y + cgreen];
     outptr0[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr0[RGB_ALPHA] = 0xFF;
+    outptr0[RGB_ALPHA] = _MAXJSAMPLE;
 #endif
     outptr0 += RGB_PIXELSIZE;
     y  = *inptr01++;
@@ -147,7 +147,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr1[RGB_GREEN] = range_limit[y + cgreen];
     outptr1[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr1[RGB_ALPHA] = 0xFF;
+    outptr1[RGB_ALPHA] = _MAXJSAMPLE;
 #endif
     outptr1 += RGB_PIXELSIZE;
     y  = *inptr01++;
@@ -155,7 +155,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr1[RGB_GREEN] = range_limit[y + cgreen];
     outptr1[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr1[RGB_ALPHA] = 0xFF;
+    outptr1[RGB_ALPHA] = _MAXJSAMPLE;
 #endif
     outptr1 += RGB_PIXELSIZE;
   }
@@ -171,14 +171,14 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr0[RGB_GREEN] = range_limit[y + cgreen];
     outptr0[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr0[RGB_ALPHA] = 0xFF;
+    outptr0[RGB_ALPHA] = _MAXJSAMPLE;
 #endif
     y  = *inptr01;
     outptr1[RGB_RED] =   range_limit[y + cred];
     outptr1[RGB_GREEN] = range_limit[y + cgreen];
     outptr1[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
-    outptr1[RGB_ALPHA] = 0xFF;
+    outptr1[RGB_ALPHA] = _MAXJSAMPLE;
 #endif
   }
 }
diff --git a/3rdparty/libjpeg-turbo/src/jdphuff.c b/3rdparty/libjpeg-turbo/src/jdphuff.c
index 9680ebcbd0..bf97333a34 100644
--- a/3rdparty/libjpeg-turbo/src/jdphuff.c
+++ b/3rdparty/libjpeg-turbo/src/jdphuff.c
@@ -3,6 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2015-2016, 2018-2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
@@ -23,7 +25,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdhuff.h"             /* Declarations shared with jdhuff.c */
+#include "jdhuff.h"             /* Declarations shared with jd*huff.c */
 #include <limits.h>
 
 
diff --git a/3rdparty/libjpeg-turbo/src/jdpostct.c b/3rdparty/libjpeg-turbo/src/jdpostct.c
index 6a2cf5c1b3..d38495f5f3 100644
--- a/3rdparty/libjpeg-turbo/src/jdpostct.c
+++ b/3rdparty/libjpeg-turbo/src/jdpostct.c
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022-2023, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -22,8 +22,11 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsamplecomp.h"
 
 
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
 /* Private buffer controller object */
 
 typedef struct {
@@ -35,7 +38,7 @@ typedef struct {
    * for one-pass operation, a strip buffer is sufficient.
    */
   jvirt_sarray_ptr whole_image; /* virtual array, or NULL if one-pass */
-  JSAMPARRAY buffer;            /* strip buffer, or current strip of virtual */
+  _JSAMPARRAY buffer;           /* strip buffer, or current strip of virtual */
   JDIMENSION strip_height;      /* buffer size in rows */
   /* for two-pass mode only: */
   JDIMENSION starting_row;      /* row # of first row in current strip */
@@ -46,26 +49,28 @@ typedef my_post_controller *my_post_ptr;
 
 
 /* Forward declarations */
+#if BITS_IN_JSAMPLE != 16
 METHODDEF(void) post_process_1pass(j_decompress_ptr cinfo,
-                                   JSAMPIMAGE input_buf,
+                                   _JSAMPIMAGE input_buf,
                                    JDIMENSION *in_row_group_ctr,
                                    JDIMENSION in_row_groups_avail,
-                                   JSAMPARRAY output_buf,
+                                   _JSAMPARRAY output_buf,
                                    JDIMENSION *out_row_ctr,
                                    JDIMENSION out_rows_avail);
-#ifdef QUANT_2PASS_SUPPORTED
+#endif
+#if defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
 METHODDEF(void) post_process_prepass(j_decompress_ptr cinfo,
-                                     JSAMPIMAGE input_buf,
+                                     _JSAMPIMAGE input_buf,
                                      JDIMENSION *in_row_group_ctr,
                                      JDIMENSION in_row_groups_avail,
-                                     JSAMPARRAY output_buf,
+                                     _JSAMPARRAY output_buf,
                                      JDIMENSION *out_row_ctr,
                                      JDIMENSION out_rows_avail);
 METHODDEF(void) post_process_2pass(j_decompress_ptr cinfo,
-                                   JSAMPIMAGE input_buf,
+                                   _JSAMPIMAGE input_buf,
                                    JDIMENSION *in_row_group_ctr,
                                    JDIMENSION in_row_groups_avail,
-                                   JSAMPARRAY output_buf,
+                                   _JSAMPARRAY output_buf,
                                    JDIMENSION *out_row_ctr,
                                    JDIMENSION out_rows_avail);
 #endif
@@ -82,39 +87,42 @@ start_pass_dpost(j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
 
   switch (pass_mode) {
   case JBUF_PASS_THRU:
+#if BITS_IN_JSAMPLE != 16
     if (cinfo->quantize_colors) {
       /* Single-pass processing with color quantization. */
-      post->pub.post_process_data = post_process_1pass;
+      post->pub._post_process_data = post_process_1pass;
       /* We could be doing buffered-image output before starting a 2-pass
        * color quantization; in that case, jinit_d_post_controller did not
        * allocate a strip buffer.  Use the virtual-array buffer as workspace.
        */
       if (post->buffer == NULL) {
-        post->buffer = (*cinfo->mem->access_virt_sarray)
+        post->buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
           ((j_common_ptr)cinfo, post->whole_image,
            (JDIMENSION)0, post->strip_height, TRUE);
       }
-    } else {
+    } else
+#endif
+    {
       /* For single-pass processing without color quantization,
        * I have no work to do; just call the upsampler directly.
        */
-      post->pub.post_process_data = cinfo->upsample->upsample;
+      post->pub._post_process_data = cinfo->upsample->_upsample;
     }
     break;
-#ifdef QUANT_2PASS_SUPPORTED
+#if defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
   case JBUF_SAVE_AND_PASS:
     /* First pass of 2-pass quantization */
     if (post->whole_image == NULL)
       ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    post->pub.post_process_data = post_process_prepass;
+    post->pub._post_process_data = post_process_prepass;
     break;
   case JBUF_CRANK_DEST:
     /* Second pass of 2-pass quantization */
     if (post->whole_image == NULL)
       ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    post->pub.post_process_data = post_process_2pass;
+    post->pub._post_process_data = post_process_2pass;
     break;
-#endif /* QUANT_2PASS_SUPPORTED */
+#endif /* defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16 */
   default:
     ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
     break;
@@ -128,10 +136,12 @@ start_pass_dpost(j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
  * This is used for color precision reduction as well as one-pass quantization.
  */
 
+#if BITS_IN_JSAMPLE != 16
+
 METHODDEF(void)
-post_process_1pass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+post_process_1pass(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
                    JDIMENSION *in_row_group_ctr,
-                   JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
+                   JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
                    JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 {
   my_post_ptr post = (my_post_ptr)cinfo->post;
@@ -143,27 +153,29 @@ post_process_1pass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   if (max_rows > post->strip_height)
     max_rows = post->strip_height;
   num_rows = 0;
-  (*cinfo->upsample->upsample) (cinfo, input_buf, in_row_group_ctr,
-                                in_row_groups_avail, post->buffer, &num_rows,
-                                max_rows);
+  (*cinfo->upsample->_upsample) (cinfo, input_buf, in_row_group_ctr,
+                                 in_row_groups_avail, post->buffer, &num_rows,
+                                 max_rows);
   /* Quantize and emit data. */
-  (*cinfo->cquantize->color_quantize) (cinfo, post->buffer,
-                                       output_buf + *out_row_ctr,
-                                       (int)num_rows);
+  (*cinfo->cquantize->_color_quantize) (cinfo, post->buffer,
+                                        output_buf + *out_row_ctr,
+                                        (int)num_rows);
   *out_row_ctr += num_rows;
 }
 
+#endif
 
-#ifdef QUANT_2PASS_SUPPORTED
+
+#if defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
 
 /*
  * Process some data in the first pass of 2-pass quantization.
  */
 
 METHODDEF(void)
-post_process_prepass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+post_process_prepass(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
                      JDIMENSION *in_row_group_ctr,
-                     JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
+                     JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
                      JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 {
   my_post_ptr post = (my_post_ptr)cinfo->post;
@@ -171,23 +183,23 @@ post_process_prepass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
   /* Reposition virtual buffer if at start of strip. */
   if (post->next_row == 0) {
-    post->buffer = (*cinfo->mem->access_virt_sarray)
+    post->buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
         ((j_common_ptr)cinfo, post->whole_image,
          post->starting_row, post->strip_height, TRUE);
   }
 
   /* Upsample some data (up to a strip height's worth). */
   old_next_row = post->next_row;
-  (*cinfo->upsample->upsample) (cinfo, input_buf, in_row_group_ctr,
-                                in_row_groups_avail, post->buffer,
-                                &post->next_row, post->strip_height);
+  (*cinfo->upsample->_upsample) (cinfo, input_buf, in_row_group_ctr,
+                                 in_row_groups_avail, post->buffer,
+                                 &post->next_row, post->strip_height);
 
   /* Allow quantizer to scan new data.  No data is emitted, */
   /* but we advance out_row_ctr so outer loop can tell when we're done. */
   if (post->next_row > old_next_row) {
     num_rows = post->next_row - old_next_row;
-    (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
-                                         (JSAMPARRAY)NULL, (int)num_rows);
+    (*cinfo->cquantize->_color_quantize) (cinfo, post->buffer + old_next_row,
+                                          (_JSAMPARRAY)NULL, (int)num_rows);
     *out_row_ctr += num_rows;
   }
 
@@ -204,9 +216,9 @@ post_process_prepass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  */
 
 METHODDEF(void)
-post_process_2pass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+post_process_2pass(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
                    JDIMENSION *in_row_group_ctr,
-                   JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
+                   JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
                    JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
 {
   my_post_ptr post = (my_post_ptr)cinfo->post;
@@ -214,7 +226,7 @@ post_process_2pass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
   /* Reposition virtual buffer if at start of strip. */
   if (post->next_row == 0) {
-    post->buffer = (*cinfo->mem->access_virt_sarray)
+    post->buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
         ((j_common_ptr)cinfo, post->whole_image,
          post->starting_row, post->strip_height, FALSE);
   }
@@ -230,9 +242,9 @@ post_process_2pass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     num_rows = max_rows;
 
   /* Quantize and emit data. */
-  (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + post->next_row,
-                                       output_buf + *out_row_ctr,
-                                       (int)num_rows);
+  (*cinfo->cquantize->_color_quantize) (cinfo, post->buffer + post->next_row,
+                                        output_buf + *out_row_ctr,
+                                        (int)num_rows);
   *out_row_ctr += num_rows;
 
   /* Advance if we filled the strip. */
@@ -243,7 +255,7 @@ post_process_2pass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   }
 }
 
-#endif /* QUANT_2PASS_SUPPORTED */
+#endif /* defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16 */
 
 
 /*
@@ -251,10 +263,13 @@ post_process_2pass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
  */
 
 GLOBAL(void)
-jinit_d_post_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
+_jinit_d_post_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
 {
   my_post_ptr post;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   post = (my_post_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(my_post_controller));
@@ -265,6 +280,7 @@ jinit_d_post_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
 
   /* Create the quantization buffer, if needed */
   if (cinfo->quantize_colors) {
+#if BITS_IN_JSAMPLE != 16
     /* The buffer strip height is max_v_samp_factor, which is typically
      * an efficient number of rows for upsampling to return.
      * (In the presence of output rescaling, we might want to be smarter?)
@@ -285,10 +301,15 @@ jinit_d_post_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
 #endif /* QUANT_2PASS_SUPPORTED */
     } else {
       /* One-pass color quantization: just make a strip buffer. */
-      post->buffer = (*cinfo->mem->alloc_sarray)
+      post->buffer = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
         ((j_common_ptr)cinfo, JPOOL_IMAGE,
          cinfo->output_width * cinfo->out_color_components,
          post->strip_height);
     }
+#else
+    ERREXIT(cinfo, JERR_NOTIMPL);
+#endif
   }
 }
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
diff --git a/3rdparty/libjpeg-turbo/src/jdsample.c b/3rdparty/libjpeg-turbo/src/jdsample.c
index eaad72a030..cc8015c97d 100644
--- a/3rdparty/libjpeg-turbo/src/jdsample.c
+++ b/3rdparty/libjpeg-turbo/src/jdsample.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2010, 2015-2016, D. R. Commander.
+ * Copyright (C) 2010, 2015-2016, 2022, D. R. Commander.
  * Copyright (C) 2014, MIPS Technologies, Inc., California.
  * Copyright (C) 2015, Google, Inc.
  * Copyright (C) 2019-2020, Arm Limited.
@@ -28,10 +28,12 @@
 #include "jinclude.h"
 #include "jdsample.h"
 #include "jsimd.h"
-#include "jpegcomp.h"
+#include "jpegapicomp.h"
 
 
 
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
 /*
  * Initialize for an upsampling pass.
  */
@@ -57,9 +59,9 @@ start_pass_upsample(j_decompress_ptr cinfo)
  */
 
 METHODDEF(void)
-sep_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+sep_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
              JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail,
-             JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+             _JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
              JDIMENSION out_rows_avail)
 {
   my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
@@ -95,9 +97,10 @@ sep_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   if (num_rows > out_rows_avail)
     num_rows = out_rows_avail;
 
-  (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
-                                     (JDIMENSION)upsample->next_row_out,
-                                     output_buf + *out_row_ctr, (int)num_rows);
+  (*cinfo->cconvert->_color_convert) (cinfo, upsample->color_buf,
+                                      (JDIMENSION)upsample->next_row_out,
+                                      output_buf + *out_row_ctr,
+                                      (int)num_rows);
 
   /* Adjust counts */
   *out_row_ctr += num_rows;
@@ -124,7 +127,7 @@ sep_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
 METHODDEF(void)
 fullsize_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                  JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
+                  _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
 {
   *output_data_ptr = input_data;
 }
@@ -137,7 +140,7 @@ fullsize_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 
 METHODDEF(void)
 noop_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-              JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
+              _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
 {
   *output_data_ptr = NULL;      /* safety check */
 }
@@ -156,14 +159,14 @@ noop_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 
 METHODDEF(void)
 int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-             JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
+             _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
 {
   my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
-  JSAMPARRAY output_data = *output_data_ptr;
-  register JSAMPROW inptr, outptr;
-  register JSAMPLE invalue;
+  _JSAMPARRAY output_data = *output_data_ptr;
+  register _JSAMPROW inptr, outptr;
+  register _JSAMPLE invalue;
   register int h;
-  JSAMPROW outend;
+  _JSAMPROW outend;
   int h_expand, v_expand;
   int inrow, outrow;
 
@@ -184,8 +187,8 @@ int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
     }
     /* Generate any additional output rows by duplicating the first one */
     if (v_expand > 1) {
-      jcopy_sample_rows(output_data, outrow, output_data, outrow + 1,
-                        v_expand - 1, cinfo->output_width);
+      _jcopy_sample_rows(output_data, outrow, output_data, outrow + 1,
+                         v_expand - 1, cinfo->output_width);
     }
     inrow++;
     outrow += v_expand;
@@ -200,12 +203,12 @@ int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 
 METHODDEF(void)
 h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-              JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
+              _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
 {
-  JSAMPARRAY output_data = *output_data_ptr;
-  register JSAMPROW inptr, outptr;
-  register JSAMPLE invalue;
-  JSAMPROW outend;
+  _JSAMPARRAY output_data = *output_data_ptr;
+  register _JSAMPROW inptr, outptr;
+  register _JSAMPLE invalue;
+  _JSAMPROW outend;
   int inrow;
 
   for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
@@ -228,12 +231,12 @@ h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 
 METHODDEF(void)
 h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-              JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
+              _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
 {
-  JSAMPARRAY output_data = *output_data_ptr;
-  register JSAMPROW inptr, outptr;
-  register JSAMPLE invalue;
-  JSAMPROW outend;
+  _JSAMPARRAY output_data = *output_data_ptr;
+  register _JSAMPROW inptr, outptr;
+  register _JSAMPLE invalue;
+  _JSAMPROW outend;
   int inrow, outrow;
 
   inrow = outrow = 0;
@@ -246,8 +249,8 @@ h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
       *outptr++ = invalue;
       *outptr++ = invalue;
     }
-    jcopy_sample_rows(output_data, outrow, output_data, outrow + 1, 1,
-                      cinfo->output_width);
+    _jcopy_sample_rows(output_data, outrow, output_data, outrow + 1, 1,
+                       cinfo->output_width);
     inrow++;
     outrow += 2;
   }
@@ -271,10 +274,10 @@ h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 
 METHODDEF(void)
 h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
+                    _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
 {
-  JSAMPARRAY output_data = *output_data_ptr;
-  register JSAMPROW inptr, outptr;
+  _JSAMPARRAY output_data = *output_data_ptr;
+  register _JSAMPROW inptr, outptr;
   register int invalue;
   register JDIMENSION colctr;
   int inrow;
@@ -284,20 +287,20 @@ h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
     outptr = output_data[inrow];
     /* Special case for first column */
     invalue = *inptr++;
-    *outptr++ = (JSAMPLE)invalue;
-    *outptr++ = (JSAMPLE)((invalue * 3 + inptr[0] + 2) >> 2);
+    *outptr++ = (_JSAMPLE)invalue;
+    *outptr++ = (_JSAMPLE)((invalue * 3 + inptr[0] + 2) >> 2);
 
     for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
       /* General case: 3/4 * nearer pixel + 1/4 * further pixel */
       invalue = (*inptr++) * 3;
-      *outptr++ = (JSAMPLE)((invalue + inptr[-2] + 1) >> 2);
-      *outptr++ = (JSAMPLE)((invalue + inptr[0] + 2) >> 2);
+      *outptr++ = (_JSAMPLE)((invalue + inptr[-2] + 1) >> 2);
+      *outptr++ = (_JSAMPLE)((invalue + inptr[0] + 2) >> 2);
     }
 
     /* Special case for last column */
     invalue = *inptr;
-    *outptr++ = (JSAMPLE)((invalue * 3 + inptr[-1] + 1) >> 2);
-    *outptr++ = (JSAMPLE)invalue;
+    *outptr++ = (_JSAMPLE)((invalue * 3 + inptr[-1] + 1) >> 2);
+    *outptr++ = (_JSAMPLE)invalue;
   }
 }
 
@@ -311,10 +314,10 @@ h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 
 METHODDEF(void)
 h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
+                    _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
 {
-  JSAMPARRAY output_data = *output_data_ptr;
-  JSAMPROW inptr0, inptr1, outptr;
+  _JSAMPARRAY output_data = *output_data_ptr;
+  _JSAMPROW inptr0, inptr1, outptr;
 #if BITS_IN_JSAMPLE == 8
   int thiscolsum, bias;
 #else
@@ -339,7 +342,7 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 
       for (colctr = 0; colctr < compptr->downsampled_width; colctr++) {
         thiscolsum = (*inptr0++) * 3 + (*inptr1++);
-        *outptr++ = (JSAMPLE)((thiscolsum + bias) >> 2);
+        *outptr++ = (_JSAMPLE)((thiscolsum + bias) >> 2);
       }
     }
     inrow++;
@@ -357,10 +360,10 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 
 METHODDEF(void)
 h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
+                    _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
 {
-  JSAMPARRAY output_data = *output_data_ptr;
-  register JSAMPROW inptr0, inptr1, outptr;
+  _JSAMPARRAY output_data = *output_data_ptr;
+  register _JSAMPROW inptr0, inptr1, outptr;
 #if BITS_IN_JSAMPLE == 8
   register int thiscolsum, lastcolsum, nextcolsum;
 #else
@@ -383,22 +386,22 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
       /* Special case for first column */
       thiscolsum = (*inptr0++) * 3 + (*inptr1++);
       nextcolsum = (*inptr0++) * 3 + (*inptr1++);
-      *outptr++ = (JSAMPLE)((thiscolsum * 4 + 8) >> 4);
-      *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
+      *outptr++ = (_JSAMPLE)((thiscolsum * 4 + 8) >> 4);
+      *outptr++ = (_JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
       lastcolsum = thiscolsum;  thiscolsum = nextcolsum;
 
       for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
         /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
         /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
         nextcolsum = (*inptr0++) * 3 + (*inptr1++);
-        *outptr++ = (JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
-        *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
+        *outptr++ = (_JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
+        *outptr++ = (_JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
         lastcolsum = thiscolsum;  thiscolsum = nextcolsum;
       }
 
       /* Special case for last column */
-      *outptr++ = (JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
-      *outptr++ = (JSAMPLE)((thiscolsum * 4 + 7) >> 4);
+      *outptr++ = (_JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
+      *outptr++ = (_JSAMPLE)((thiscolsum * 4 + 7) >> 4);
     }
     inrow++;
   }
@@ -410,7 +413,7 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jinit_upsampler(j_decompress_ptr cinfo)
+_jinit_upsampler(j_decompress_ptr cinfo)
 {
   my_upsample_ptr upsample;
   int ci;
@@ -418,13 +421,16 @@ jinit_upsampler(j_decompress_ptr cinfo)
   boolean need_buffer, do_fancy;
   int h_in_group, v_in_group, h_out_group, v_out_group;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   if (!cinfo->master->jinit_upsampler_no_alloc) {
     upsample = (my_upsample_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                   sizeof(my_upsampler));
     cinfo->upsample = (struct jpeg_upsampler *)upsample;
     upsample->pub.start_pass = start_pass_upsample;
-    upsample->pub.upsample = sep_upsample;
+    upsample->pub._upsample = sep_upsample;
     upsample->pub.need_context_rows = FALSE; /* until we find out differently */
   } else
     upsample = (my_upsample_ptr)cinfo->upsample;
@@ -464,21 +470,25 @@ jinit_upsampler(j_decompress_ptr cinfo)
     } else if (h_in_group * 2 == h_out_group && v_in_group == v_out_group) {
       /* Special cases for 2h1v upsampling */
       if (do_fancy && compptr->downsampled_width > 2) {
+#ifdef WITH_SIMD
         if (jsimd_can_h2v1_fancy_upsample())
           upsample->methods[ci] = jsimd_h2v1_fancy_upsample;
         else
+#endif
           upsample->methods[ci] = h2v1_fancy_upsample;
       } else {
+#ifdef WITH_SIMD
         if (jsimd_can_h2v1_upsample())
           upsample->methods[ci] = jsimd_h2v1_upsample;
         else
+#endif
           upsample->methods[ci] = h2v1_upsample;
       }
     } else if (h_in_group == h_out_group &&
                v_in_group * 2 == v_out_group && do_fancy) {
       /* Non-fancy upsampling is handled by the generic method */
-#if defined(__arm__) || defined(__aarch64__) || \
-    defined(_M_ARM) || defined(_M_ARM64)
+#if defined(WITH_SIMD) && (defined(__arm__) || defined(__aarch64__) || \
+                           defined(_M_ARM) || defined(_M_ARM64))
       if (jsimd_can_h1v2_fancy_upsample())
         upsample->methods[ci] = jsimd_h1v2_fancy_upsample;
       else
@@ -489,21 +499,25 @@ jinit_upsampler(j_decompress_ptr cinfo)
                v_in_group * 2 == v_out_group) {
       /* Special cases for 2h2v upsampling */
       if (do_fancy && compptr->downsampled_width > 2) {
+#ifdef WITH_SIMD
         if (jsimd_can_h2v2_fancy_upsample())
           upsample->methods[ci] = jsimd_h2v2_fancy_upsample;
         else
+#endif
           upsample->methods[ci] = h2v2_fancy_upsample;
         upsample->pub.need_context_rows = TRUE;
       } else {
+#ifdef WITH_SIMD
         if (jsimd_can_h2v2_upsample())
           upsample->methods[ci] = jsimd_h2v2_upsample;
         else
+#endif
           upsample->methods[ci] = h2v2_upsample;
       }
     } else if ((h_out_group % h_in_group) == 0 &&
                (v_out_group % v_in_group) == 0) {
       /* Generic integral-factors upsampling method */
-#if defined(__mips__)
+#if defined(WITH_SIMD) && defined(__mips__)
       if (jsimd_can_int_upsample())
         upsample->methods[ci] = jsimd_int_upsample;
       else
@@ -514,7 +528,7 @@ jinit_upsampler(j_decompress_ptr cinfo)
     } else
       ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
     if (need_buffer && !cinfo->master->jinit_upsampler_no_alloc) {
-      upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+      upsample->color_buf[ci] = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
         ((j_common_ptr)cinfo, JPOOL_IMAGE,
          (JDIMENSION)jround_up((long)cinfo->output_width,
                                (long)cinfo->max_h_samp_factor),
@@ -522,3 +536,5 @@ jinit_upsampler(j_decompress_ptr cinfo)
     }
   }
 }
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
diff --git a/3rdparty/libjpeg-turbo/src/jdsample.h b/3rdparty/libjpeg-turbo/src/jdsample.h
index a6bf08a032..a8a9298094 100644
--- a/3rdparty/libjpeg-turbo/src/jdsample.h
+++ b/3rdparty/libjpeg-turbo/src/jdsample.h
@@ -3,19 +3,22 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  */
 
 #define JPEG_INTERNALS
 #include "jpeglib.h"
+#include "jsamplecomp.h"
 
 
 /* Pointer to routine to upsample a single component */
 typedef void (*upsample1_ptr) (j_decompress_ptr cinfo,
                                jpeg_component_info *compptr,
-                               JSAMPARRAY input_data,
-                               JSAMPARRAY *output_data_ptr);
+                               _JSAMPARRAY input_data,
+                               _JSAMPARRAY *output_data_ptr);
 
 /* Private subobject */
 
@@ -29,7 +32,7 @@ typedef struct {
    * ie do not need rescaling.  The corresponding entry of color_buf[] is
    * simply set to point to the input data array, thereby avoiding copying.
    */
-  JSAMPARRAY color_buf[MAX_COMPONENTS];
+  _JSAMPARRAY color_buf[MAX_COMPONENTS];
 
   /* Per-component upsampling method pointers */
   upsample1_ptr methods[MAX_COMPONENTS];
diff --git a/3rdparty/libjpeg-turbo/src/jdtrans.c b/3rdparty/libjpeg-turbo/src/jdtrans.c
index d7ec4b83b3..719813f676 100644
--- a/3rdparty/libjpeg-turbo/src/jdtrans.c
+++ b/3rdparty/libjpeg-turbo/src/jdtrans.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2020, D. R. Commander.
+ * Copyright (C) 2020, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -16,7 +16,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jpegcomp.h"
+#include "jpegapicomp.h"
 
 
 /* Forward declarations */
@@ -48,6 +48,9 @@ LOCAL(void) transdecode_master_selection(j_decompress_ptr cinfo);
 GLOBAL(jvirt_barray_ptr *)
 jpeg_read_coefficients(j_decompress_ptr cinfo)
 {
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
   if (cinfo->global_state == DSTATE_READY) {
     /* First call: initialize active modules */
     transdecode_master_selection(cinfo);
@@ -127,7 +130,10 @@ transdecode_master_selection(j_decompress_ptr cinfo)
   }
 
   /* Always get a full-image coefficient buffer. */
-  jinit_d_coef_controller(cinfo, TRUE);
+  if (cinfo->data_precision == 12)
+    j12init_d_coef_controller(cinfo, TRUE);
+  else
+    jinit_d_coef_controller(cinfo, TRUE);
 
   /* We can now tell the memory manager to allocate virtual arrays. */
   (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo);
diff --git a/3rdparty/libjpeg-turbo/src/jerror.c b/3rdparty/libjpeg-turbo/src/jerror.c
index d544702937..3a75fec02c 100644
--- a/3rdparty/libjpeg-turbo/src/jerror.c
+++ b/3rdparty/libjpeg-turbo/src/jerror.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2022, D. R. Commander.
+ * Copyright (C) 2022, 2024, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -46,7 +46,7 @@
 
 #define JMESSAGE(code, string)  string,
 
-const char * const jpeg_std_message_table[] = {
+static const char * const jpeg_std_message_table[] = {
 #include "jerror.h"
   NULL
 };
@@ -189,9 +189,9 @@ format_message(j_common_ptr cinfo, char *buffer)
 
   /* Format the message into the passed buffer */
   if (isstring)
-    snprintf(buffer, JMSG_LENGTH_MAX, msgtext, err->msg_parm.s);
+    SNPRINTF(buffer, JMSG_LENGTH_MAX, msgtext, err->msg_parm.s);
   else
-    snprintf(buffer, JMSG_LENGTH_MAX, msgtext,
+    SNPRINTF(buffer, JMSG_LENGTH_MAX, msgtext,
              err->msg_parm.i[0], err->msg_parm.i[1],
              err->msg_parm.i[2], err->msg_parm.i[3],
              err->msg_parm.i[4], err->msg_parm.i[5],
@@ -229,23 +229,17 @@ reset_error_mgr(j_common_ptr cinfo)
 GLOBAL(struct jpeg_error_mgr *)
 jpeg_std_error(struct jpeg_error_mgr *err)
 {
+  memset(err, 0, sizeof(struct jpeg_error_mgr));
+
   err->error_exit = error_exit;
   err->emit_message = emit_message;
   err->output_message = output_message;
   err->format_message = format_message;
   err->reset_error_mgr = reset_error_mgr;
 
-  err->trace_level = 0;         /* default = no tracing */
-  err->num_warnings = 0;        /* no warnings emitted yet */
-  err->msg_code = 0;            /* may be useful as a flag for "no error" */
-
   /* Initialize message table pointers */
   err->jpeg_message_table = jpeg_std_message_table;
   err->last_jpeg_message = (int)JMSG_LASTMSGCODE - 1;
 
-  err->addon_message_table = NULL;
-  err->first_addon_message = 0; /* for safety */
-  err->last_addon_message = 0;
-
   return err;
 }
diff --git a/3rdparty/libjpeg-turbo/src/jerror.h b/3rdparty/libjpeg-turbo/src/jerror.h
index eb44a1140a..71ba03e2a3 100644
--- a/3rdparty/libjpeg-turbo/src/jerror.h
+++ b/3rdparty/libjpeg-turbo/src/jerror.h
@@ -4,8 +4,10 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1997, Thomas G. Lane.
  * Modified 1997-2009 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2014, 2017, 2021-2022, D. R. Commander.
+ * Copyright (C) 2014, 2017, 2021-2023, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -53,7 +55,8 @@ JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
 #if JPEG_LIB_VERSION >= 70
 JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
 #endif
-JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
+JMESSAGE(JERR_BAD_DCT_COEF,
+         "DCT coefficient (lossy) or spatial difference (lossless) out of range")
 JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported")
 #if JPEG_LIB_VERSION >= 70
 JMESSAGE(JERR_BAD_DROP_SAMPLING,
@@ -69,9 +72,9 @@ JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
 JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
 JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
 JMESSAGE(JERR_BAD_PROGRESSION,
-         "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
+         "Invalid progressive/lossless parameters Ss=%d Se=%d Ah=%d Al=%d")
 JMESSAGE(JERR_BAD_PROG_SCRIPT,
-         "Invalid progressive parameters at scan script entry %d")
+         "Invalid progressive/lossless parameters at scan script entry %d")
 JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
 JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
 JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
@@ -108,7 +111,7 @@ JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
 #if JPEG_LIB_VERSION >= 70
 JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
 #endif
-JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
+JMESSAGE(JERR_NO_BACKING_STORE, "Memory limit exceeded")
 JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
 JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
 JMESSAGE(JERR_NO_QUANT_TABLE, "Quantization table 0x%02x was not defined")
@@ -180,7 +183,7 @@ JMESSAGE(JTRC_THUMB_PALETTE,
 JMESSAGE(JTRC_THUMB_RGB,
          "JFIF extension marker: RGB thumbnail image, length %u")
 JMESSAGE(JTRC_UNKNOWN_IDS,
-         "Unrecognized component IDs %d %d %d, assuming YCbCr")
+         "Unrecognized component IDs %d %d %d, assuming YCbCr (lossy) or RGB (lossless)")
 JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
 JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
 JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
@@ -211,6 +214,8 @@ JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker")
 JMESSAGE(JERR_BAD_DROP_SAMPLING,
          "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
 #endif
+JMESSAGE(JERR_BAD_RESTART,
+         "Invalid restart interval %d; must be an integer multiple of the number of MCUs in an MCU row (%d)")
 
 #ifdef JMAKE_ENUM_LIST
 
diff --git a/3rdparty/libjpeg-turbo/src/jfdctfst.c b/3rdparty/libjpeg-turbo/src/jfdctfst.c
index 4c9ce0de8f..26070d19a6 100644
--- a/3rdparty/libjpeg-turbo/src/jfdctfst.c
+++ b/3rdparty/libjpeg-turbo/src/jfdctfst.c
@@ -114,7 +114,7 @@
  */
 
 GLOBAL(void)
-jpeg_fdct_ifast(DCTELEM *data)
+_jpeg_fdct_ifast(DCTELEM *data)
 {
   DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   DCTELEM tmp10, tmp11, tmp12, tmp13;
diff --git a/3rdparty/libjpeg-turbo/src/jfdctint.c b/3rdparty/libjpeg-turbo/src/jfdctint.c
index c95a3a7fb8..974013fa40 100644
--- a/3rdparty/libjpeg-turbo/src/jfdctint.c
+++ b/3rdparty/libjpeg-turbo/src/jfdctint.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015, 2020, D. R. Commander.
+ * Copyright (C) 2015, 2020, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -140,7 +140,7 @@
  */
 
 GLOBAL(void)
-jpeg_fdct_islow(DCTELEM *data)
+_jpeg_fdct_islow(DCTELEM *data)
 {
   JLONG tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   JLONG tmp10, tmp11, tmp12, tmp13;
diff --git a/3rdparty/libjpeg-turbo/src/jidctflt.c b/3rdparty/libjpeg-turbo/src/jidctflt.c
index 5aee74e232..ee3a31a616 100644
--- a/3rdparty/libjpeg-turbo/src/jidctflt.c
+++ b/3rdparty/libjpeg-turbo/src/jidctflt.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1998, Thomas G. Lane.
  * Modified 2010 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2014, D. R. Commander.
+ * Copyright (C) 2014, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -69,9 +69,9 @@
  */
 
 GLOBAL(void)
-jpeg_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+_jpeg_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
 {
   FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
@@ -79,8 +79,8 @@ jpeg_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   JCOEFPTR inptr;
   FLOAT_MULT_TYPE *quantptr;
   FAST_FLOAT *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   int ctr;
   FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
 #define _0_125  ((FLOAT_MULT_TYPE)0.125)
@@ -192,7 +192,7 @@ jpeg_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
     /* Even part */
 
     /* Apply signed->unsigned and prepare float->int conversion */
-    z5 = wsptr[0] + ((FAST_FLOAT)CENTERJSAMPLE + (FAST_FLOAT)0.5);
+    z5 = wsptr[0] + ((FAST_FLOAT)_CENTERJSAMPLE + (FAST_FLOAT)0.5);
     tmp10 = z5 + wsptr[4];
     tmp11 = z5 - wsptr[4];
 
diff --git a/3rdparty/libjpeg-turbo/src/jidctfst.c b/3rdparty/libjpeg-turbo/src/jidctfst.c
index 89a20c937b..68119b9942 100644
--- a/3rdparty/libjpeg-turbo/src/jidctfst.c
+++ b/3rdparty/libjpeg-turbo/src/jidctfst.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1998, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015, D. R. Commander.
+ * Copyright (C) 2015, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -64,10 +64,10 @@
  * The dequantized coefficients are not integers because the AA&N scaling
  * factors have been incorporated.  We represent them scaled up by PASS1_BITS,
  * so that the first and second IDCT rounds have the same input scaling.
- * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
+ * For 8-bit samples, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
  * avoid a descaling shift; this compromises accuracy rather drastically
  * for small quantization table entries, but it saves a lot of shifts.
- * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
+ * For 12-bit samples, there's no hope of using 16x16 multiplies anyway,
  * so we use a much larger scaling factor to preserve accuracy.
  *
  * A final compromise is to represent the multiplicative constants to only
@@ -168,9 +168,9 @@
  */
 
 GLOBAL(void)
-jpeg_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+_jpeg_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
 {
   DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   DCTELEM tmp10, tmp11, tmp12, tmp13;
@@ -178,8 +178,8 @@ jpeg_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   JCOEFPTR inptr;
   IFAST_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[DCTSIZE2];      /* buffers data between passes */
   SHIFT_TEMPS                   /* for DESCALE */
@@ -296,7 +296,7 @@ jpeg_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
         wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
-      JSAMPLE dcval =
+      _JSAMPLE dcval =
         range_limit[IDESCALE(wsptr[0], PASS1_BITS + 3) & RANGE_MASK];
 
       outptr[0] = dcval;
diff --git a/3rdparty/libjpeg-turbo/src/jidctint.c b/3rdparty/libjpeg-turbo/src/jidctint.c
index bb08748019..c58592d626 100644
--- a/3rdparty/libjpeg-turbo/src/jidctint.c
+++ b/3rdparty/libjpeg-turbo/src/jidctint.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Modification developed 2002-2018 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015, 2020, D. R. Commander.
+ * Copyright (C) 2015, 2020, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -170,9 +170,9 @@
  */
 
 GLOBAL(void)
-jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+_jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
 {
   JLONG tmp0, tmp1, tmp2, tmp3;
   JLONG tmp10, tmp11, tmp12, tmp13;
@@ -180,8 +180,8 @@ jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[DCTSIZE2];      /* buffers data between passes */
   SHIFT_TEMPS
@@ -314,8 +314,8 @@ jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
         wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
-      JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
-                                               PASS1_BITS + 3) & RANGE_MASK];
+      _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
+                                                PASS1_BITS + 3) & RANGE_MASK];
 
       outptr[0] = dcval;
       outptr[1] = dcval;
@@ -424,17 +424,17 @@ jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_7x7(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-              JCOEFPTR coef_block, JSAMPARRAY output_buf,
-              JDIMENSION output_col)
+_jpeg_idct_7x7(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
 {
   JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
   JLONG z1, z2, z3;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[7 * 7];         /* buffers data between passes */
   SHIFT_TEMPS
@@ -573,17 +573,17 @@ jpeg_idct_7x7(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-              JCOEFPTR coef_block, JSAMPARRAY output_buf,
-              JDIMENSION output_col)
+_jpeg_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
 {
   JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
   JLONG z1, z2, z3;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[6 * 6];         /* buffers data between passes */
   SHIFT_TEMPS
@@ -694,17 +694,17 @@ jpeg_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_5x5(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-              JCOEFPTR coef_block, JSAMPARRAY output_buf,
-              JDIMENSION output_col)
+_jpeg_idct_5x5(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
 {
   JLONG tmp0, tmp1, tmp10, tmp11, tmp12;
   JLONG z1, z2, z3;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[5 * 5];         /* buffers data between passes */
   SHIFT_TEMPS
@@ -809,16 +809,16 @@ jpeg_idct_5x5(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_3x3(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-              JCOEFPTR coef_block, JSAMPARRAY output_buf,
-              JDIMENSION output_col)
+_jpeg_idct_3x3(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
 {
   JLONG tmp0, tmp2, tmp10, tmp12;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[3 * 3];         /* buffers data between passes */
   SHIFT_TEMPS
@@ -899,17 +899,17 @@ jpeg_idct_3x3(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_9x9(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-              JCOEFPTR coef_block, JSAMPARRAY output_buf,
-              JDIMENSION output_col)
+_jpeg_idct_9x9(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
 {
   JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
   JLONG z1, z2, z3, z4;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[8 * 9];         /* buffers data between passes */
   SHIFT_TEMPS
@@ -1070,9 +1070,9 @@ jpeg_idct_9x9(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+_jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
 {
   JLONG tmp10, tmp11, tmp12, tmp13, tmp14;
   JLONG tmp20, tmp21, tmp22, tmp23, tmp24;
@@ -1080,8 +1080,8 @@ jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[8 * 10];        /* buffers data between passes */
   SHIFT_TEMPS
@@ -1265,9 +1265,9 @@ jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_11x11(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+_jpeg_idct_11x11(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
 {
   JLONG tmp10, tmp11, tmp12, tmp13, tmp14;
   JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
@@ -1275,8 +1275,8 @@ jpeg_idct_11x11(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[8 * 11];        /* buffers data between passes */
   SHIFT_TEMPS
@@ -1459,9 +1459,9 @@ jpeg_idct_11x11(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+_jpeg_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
 {
   JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
   JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
@@ -1469,8 +1469,8 @@ jpeg_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[8 * 12];        /* buffers data between passes */
   SHIFT_TEMPS
@@ -1675,9 +1675,9 @@ jpeg_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_13x13(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+_jpeg_idct_13x13(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
 {
   JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
   JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
@@ -1685,8 +1685,8 @@ jpeg_idct_13x13(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[8 * 13];        /* buffers data between passes */
   SHIFT_TEMPS
@@ -1903,9 +1903,9 @@ jpeg_idct_13x13(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_14x14(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+_jpeg_idct_14x14(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
 {
   JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
   JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
@@ -1913,8 +1913,8 @@ jpeg_idct_14x14(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[8 * 14];        /* buffers data between passes */
   SHIFT_TEMPS
@@ -2129,9 +2129,9 @@ jpeg_idct_14x14(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_15x15(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+_jpeg_idct_15x15(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
 {
   JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
   JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
@@ -2139,8 +2139,8 @@ jpeg_idct_15x15(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[8 * 15];        /* buffers data between passes */
   SHIFT_TEMPS
@@ -2371,9 +2371,9 @@ jpeg_idct_15x15(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                JDIMENSION output_col)
+_jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
 {
   JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
   JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
@@ -2381,8 +2381,8 @@ jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr,
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[8 * 16];        /* buffers data between passes */
   SHIFT_TEMPS
diff --git a/3rdparty/libjpeg-turbo/src/jidctred.c b/3rdparty/libjpeg-turbo/src/jidctred.c
index 1dd65a94d9..6521e3ebbf 100644
--- a/3rdparty/libjpeg-turbo/src/jidctred.c
+++ b/3rdparty/libjpeg-turbo/src/jidctred.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1998, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015, D. R. Commander.
+ * Copyright (C) 2015, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -118,17 +118,17 @@
  */
 
 GLOBAL(void)
-jpeg_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-              JCOEFPTR coef_block, JSAMPARRAY output_buf,
-              JDIMENSION output_col)
+_jpeg_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
 {
   JLONG tmp0, tmp2, tmp10, tmp12;
   JLONG z1, z2, z3, z4;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[DCTSIZE * 4];   /* buffers data between passes */
   SHIFT_TEMPS
@@ -210,8 +210,8 @@ jpeg_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 &&
         wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
-      JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
-                                               PASS1_BITS + 3) & RANGE_MASK];
+      _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
+                                                PASS1_BITS + 3) & RANGE_MASK];
 
       outptr[0] = dcval;
       outptr[1] = dcval;
@@ -276,16 +276,16 @@ jpeg_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-              JCOEFPTR coef_block, JSAMPARRAY output_buf,
-              JDIMENSION output_col)
+_jpeg_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
 {
   JLONG tmp0, tmp10, z1;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE *quantptr;
   int *wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
   int workspace[DCTSIZE * 2];   /* buffers data between passes */
   SHIFT_TEMPS
@@ -345,8 +345,8 @@ jpeg_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 #ifndef NO_ZERO_ROW_TEST
     if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
-      JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
-                                               PASS1_BITS + 3) & RANGE_MASK];
+      _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
+                                                PASS1_BITS + 3) & RANGE_MASK];
 
       outptr[0] = dcval;
       outptr[1] = dcval;
@@ -387,13 +387,13 @@ jpeg_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
  */
 
 GLOBAL(void)
-jpeg_idct_1x1(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-              JCOEFPTR coef_block, JSAMPARRAY output_buf,
-              JDIMENSION output_col)
+_jpeg_idct_1x1(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
 {
   int dcval;
   ISLOW_MULT_TYPE *quantptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   SHIFT_TEMPS
 
   /* We hardly need an inverse DCT routine for this: just take the
diff --git a/3rdparty/libjpeg-turbo/src/jinclude.h b/3rdparty/libjpeg-turbo/src/jinclude.h
index 120614b25c..56e7a4b296 100644
--- a/3rdparty/libjpeg-turbo/src/jinclude.h
+++ b/3rdparty/libjpeg-turbo/src/jinclude.h
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1994, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2022, D. R. Commander.
+ * Copyright (C) 2022-2023, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -45,6 +45,18 @@
  */
 
 
+#ifdef _MSC_VER
+
+#define SNPRINTF(str, n, format, ...) \
+  _snprintf_s(str, n, _TRUNCATE, format, ##__VA_ARGS__)
+
+#else
+
+#define SNPRINTF  snprintf
+
+#endif
+
+
 #ifndef NO_GETENV
 
 #ifdef _MSC_VER
@@ -111,6 +123,8 @@ static INLINE int GETENV_S(char *buffer, size_t buffer_size, const char *name)
 
 #else
 
+#include <errno.h>
+
 /* This provides a similar interface to the Microsoft _putenv_s() function, but
  * other than parameter validation, it has no advantages over setenv().
  */
diff --git a/3rdparty/libjpeg-turbo/src/jlossls.h b/3rdparty/libjpeg-turbo/src/jlossls.h
new file mode 100644
index 0000000000..ce41704134
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jlossls.h
@@ -0,0 +1,101 @@
+/*
+ * jlossls.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1998, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This include file contains common declarations for the lossless JPEG
+ * codec modules.
+ */
+
+#ifndef JLOSSLS_H
+#define JLOSSLS_H
+
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+
+#define JPEG_INTERNALS
+#include "jpeglib.h"
+#include "jsamplecomp.h"
+
+
+#define ALLOC_DARRAY(pool_id, diffsperrow, numrows) \
+  (JDIFFARRAY)(*cinfo->mem->alloc_sarray) \
+    ((j_common_ptr)cinfo, pool_id, \
+     (diffsperrow) * sizeof(JDIFF) / sizeof(_JSAMPLE), numrows)
+
+
+/*
+ * Table H.1: Predictors for lossless coding.
+ */
+
+#define PREDICTOR1  Ra
+#define PREDICTOR2  Rb
+#define PREDICTOR3  Rc
+#define PREDICTOR4  (int)((JLONG)Ra + (JLONG)Rb - (JLONG)Rc)
+#define PREDICTOR5  (int)((JLONG)Ra + RIGHT_SHIFT((JLONG)Rb - (JLONG)Rc, 1))
+#define PREDICTOR6  (int)((JLONG)Rb + RIGHT_SHIFT((JLONG)Ra - (JLONG)Rc, 1))
+#define PREDICTOR7  (int)RIGHT_SHIFT((JLONG)Ra + (JLONG)Rb, 1)
+
+#endif
+
+
+#ifdef C_LOSSLESS_SUPPORTED
+
+typedef void (*predict_difference_method_ptr) (j_compress_ptr cinfo, int ci,
+                                               _JSAMPROW input_buf,
+                                               _JSAMPROW prev_row,
+                                               JDIFFROW diff_buf,
+                                               JDIMENSION width);
+
+/* Lossless compressor */
+typedef struct {
+  struct jpeg_forward_dct pub;  /* public fields */
+
+  /* It is useful to allow each component to have a separate diff method. */
+  predict_difference_method_ptr predict_difference[MAX_COMPONENTS];
+
+  /* MCU rows left in the restart interval for each component */
+  unsigned int restart_rows_to_go[MAX_COMPONENTS];
+
+  /* Sample scaling */
+  void (*scaler_scale) (j_compress_ptr cinfo, _JSAMPROW input_buf,
+                        _JSAMPROW output_buf, JDIMENSION width);
+} jpeg_lossless_compressor;
+
+typedef jpeg_lossless_compressor *lossless_comp_ptr;
+
+#endif /* C_LOSSLESS_SUPPORTED */
+
+
+#ifdef D_LOSSLESS_SUPPORTED
+
+typedef void (*predict_undifference_method_ptr) (j_decompress_ptr cinfo,
+                                                 int comp_index,
+                                                 JDIFFROW diff_buf,
+                                                 JDIFFROW prev_row,
+                                                 JDIFFROW undiff_buf,
+                                                 JDIMENSION width);
+
+/* Lossless decompressor */
+typedef struct {
+  struct jpeg_inverse_dct pub;  /* public fields */
+
+  /* It is useful to allow each component to have a separate undiff method. */
+  predict_undifference_method_ptr predict_undifference[MAX_COMPONENTS];
+
+  /* Sample scaling */
+  void (*scaler_scale) (j_decompress_ptr cinfo, JDIFFROW diff_buf,
+                        _JSAMPROW output_buf, JDIMENSION width);
+} jpeg_lossless_decompressor;
+
+typedef jpeg_lossless_decompressor *lossless_decomp_ptr;
+
+#endif /* D_LOSSLESS_SUPPORTED */
+
+#endif /* JLOSSLS_H */
diff --git a/3rdparty/libjpeg-turbo/src/jmemmgr.c b/3rdparty/libjpeg-turbo/src/jmemmgr.c
index 8f5a4ab1c7..dca8f5c22c 100644
--- a/3rdparty/libjpeg-turbo/src/jmemmgr.c
+++ b/3rdparty/libjpeg-turbo/src/jmemmgr.c
@@ -68,10 +68,13 @@ round_up_pow2(size_t a, size_t b)
  * There isn't any really portable way to determine the worst-case alignment
  * requirement.  This module assumes that the alignment requirement is
  * multiples of ALIGN_SIZE.
- * By default, we define ALIGN_SIZE as sizeof(double).  This is necessary on
- * some workstations (where doubles really do need 8-byte alignment) and will
- * work fine on nearly everything.  If your machine has lesser alignment needs,
- * you can save a few bytes by making ALIGN_SIZE smaller.
+ * By default, we define ALIGN_SIZE as the maximum of sizeof(double) and
+ * sizeof(void *).  This is necessary on some workstations (where doubles
+ * really do need 8-byte alignment) and will work fine on nearly everything.
+ * We use the maximum of sizeof(double) and sizeof(void *) since sizeof(double)
+ * may be insufficient, for example, on CHERI-enabled platforms with 16-byte
+ * pointers and a 16-byte alignment requirement.  If your machine has lesser
+ * alignment needs, you can save a few bytes by making ALIGN_SIZE smaller.
  * The only place I know of where this will NOT work is certain Macintosh
  * 680x0 compilers that define double as a 10-byte IEEE extended float.
  * Doing 10-byte alignment is counterproductive because longwords won't be
@@ -81,7 +84,7 @@ round_up_pow2(size_t a, size_t b)
 
 #ifndef ALIGN_SIZE              /* so can override from jconfig.h */
 #ifndef WITH_SIMD
-#define ALIGN_SIZE  sizeof(double)
+#define ALIGN_SIZE  MAX(sizeof(void *), sizeof(double))
 #else
 #define ALIGN_SIZE  32 /* Most of the SIMD instructions we support require
                           16-byte (128-bit) alignment, but AVX2 requires
@@ -152,7 +155,9 @@ typedef my_memory_mgr *my_mem_ptr;
  */
 
 struct jvirt_sarray_control {
-  JSAMPARRAY mem_buffer;        /* => the in-memory buffer */
+  JSAMPARRAY mem_buffer;        /* => the in-memory buffer (if
+                                   cinfo->data_precision is 12, then this is
+                                   actually a J12SAMPARRAY) */
   JDIMENSION rows_in_array;     /* total virtual array height */
   JDIMENSION samplesperrow;     /* width of array (and of memory buffer) */
   JDIMENSION maxaccess;         /* max rows accessed by access_virt_sarray */
@@ -348,9 +353,10 @@ alloc_small(j_common_ptr cinfo, int pool_id, size_t sizeofobject)
  * request is large enough that it may as well be passed directly to
  * jpeg_get_large; the pool management just links everything together
  * so that we can free it all on demand.
- * Note: the major use of "large" objects is in JSAMPARRAY and JBLOCKARRAY
- * structures.  The routines that create these structures (see below)
- * deliberately bunch rows together to ensure a large request size.
+ * Note: the major use of "large" objects is in
+ * JSAMPARRAY/J12SAMPARRAY/J16SAMPARRAY and JBLOCKARRAY structures.  The
+ * routines that create these structures (see below) deliberately bunch rows
+ * together to ensure a large request size.
  */
 
 METHODDEF(void *)
@@ -434,9 +440,22 @@ alloc_sarray(j_common_ptr cinfo, int pool_id, JDIMENSION samplesperrow,
   JSAMPROW workspace;
   JDIMENSION rowsperchunk, currow, i;
   long ltemp;
+  J12SAMPARRAY result12;
+  J12SAMPROW workspace12;
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+  J16SAMPARRAY result16;
+  J16SAMPROW workspace16;
+#endif
+  int data_precision = cinfo->is_decompressor ?
+                        ((j_decompress_ptr)cinfo)->data_precision :
+                        ((j_compress_ptr)cinfo)->data_precision;
+  size_t sample_size = data_precision == 16 ?
+                       sizeof(J16SAMPLE) : (data_precision == 12 ?
+                                            sizeof(J12SAMPLE) :
+                                            sizeof(JSAMPLE));
 
   /* Make sure each row is properly aligned */
-  if ((ALIGN_SIZE % sizeof(JSAMPLE)) != 0)
+  if ((ALIGN_SIZE % sample_size) != 0)
     out_of_memory(cinfo, 5);    /* safety check */
 
   if (samplesperrow > MAX_ALLOC_CHUNK) {
@@ -445,11 +464,11 @@ alloc_sarray(j_common_ptr cinfo, int pool_id, JDIMENSION samplesperrow,
     out_of_memory(cinfo, 9);
   }
   samplesperrow = (JDIMENSION)round_up_pow2(samplesperrow, (2 * ALIGN_SIZE) /
-                                                           sizeof(JSAMPLE));
+                                                           sample_size);
 
   /* Calculate max # of rows allowed in one allocation chunk */
   ltemp = (MAX_ALLOC_CHUNK - sizeof(large_pool_hdr)) /
-          ((long)samplesperrow * sizeof(JSAMPLE));
+          ((long)samplesperrow * (long)sample_size);
   if (ltemp <= 0)
     ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
   if (ltemp < (long)numrows)
@@ -458,24 +477,68 @@ alloc_sarray(j_common_ptr cinfo, int pool_id, JDIMENSION samplesperrow,
     rowsperchunk = numrows;
   mem->last_rowsperchunk = rowsperchunk;
 
-  /* Get space for row pointers (small object) */
-  result = (JSAMPARRAY)alloc_small(cinfo, pool_id,
-                                   (size_t)(numrows * sizeof(JSAMPROW)));
+  if (data_precision == 16) {
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+    /* Get space for row pointers (small object) */
+    result16 = (J16SAMPARRAY)alloc_small(cinfo, pool_id,
+                                         (size_t)(numrows *
+                                                  sizeof(J16SAMPROW)));
 
-  /* Get the rows themselves (large objects) */
-  currow = 0;
-  while (currow < numrows) {
-    rowsperchunk = MIN(rowsperchunk, numrows - currow);
-    workspace = (JSAMPROW)alloc_large(cinfo, pool_id,
-      (size_t)((size_t)rowsperchunk * (size_t)samplesperrow *
-               sizeof(JSAMPLE)));
-    for (i = rowsperchunk; i > 0; i--) {
-      result[currow++] = workspace;
-      workspace += samplesperrow;
+    /* Get the rows themselves (large objects) */
+    currow = 0;
+    while (currow < numrows) {
+      rowsperchunk = MIN(rowsperchunk, numrows - currow);
+      workspace16 = (J16SAMPROW)alloc_large(cinfo, pool_id,
+        (size_t)((size_t)rowsperchunk * (size_t)samplesperrow * sample_size));
+      for (i = rowsperchunk; i > 0; i--) {
+        result16[currow++] = workspace16;
+        workspace16 += samplesperrow;
+      }
     }
-  }
 
-  return result;
+    return (JSAMPARRAY)result16;
+#else
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, data_precision);
+    return NULL;
+#endif
+  } else if (data_precision == 12) {
+    /* Get space for row pointers (small object) */
+    result12 = (J12SAMPARRAY)alloc_small(cinfo, pool_id,
+                                         (size_t)(numrows *
+                                                  sizeof(J12SAMPROW)));
+
+    /* Get the rows themselves (large objects) */
+    currow = 0;
+    while (currow < numrows) {
+      rowsperchunk = MIN(rowsperchunk, numrows - currow);
+      workspace12 = (J12SAMPROW)alloc_large(cinfo, pool_id,
+        (size_t)((size_t)rowsperchunk * (size_t)samplesperrow * sample_size));
+      for (i = rowsperchunk; i > 0; i--) {
+        result12[currow++] = workspace12;
+        workspace12 += samplesperrow;
+      }
+    }
+
+    return (JSAMPARRAY)result12;
+  } else {
+    /* Get space for row pointers (small object) */
+    result = (JSAMPARRAY)alloc_small(cinfo, pool_id,
+                                     (size_t)(numrows * sizeof(JSAMPROW)));
+
+    /* Get the rows themselves (large objects) */
+    currow = 0;
+    while (currow < numrows) {
+      rowsperchunk = MIN(rowsperchunk, numrows - currow);
+      workspace = (JSAMPROW)alloc_large(cinfo, pool_id,
+        (size_t)((size_t)rowsperchunk * (size_t)samplesperrow * sample_size));
+      for (i = rowsperchunk; i > 0; i--) {
+        result[currow++] = workspace;
+        workspace += samplesperrow;
+      }
+    }
+
+    return result;
+  }
 }
 
 
@@ -637,6 +700,13 @@ realize_virt_arrays(j_common_ptr cinfo)
   size_t minheights, max_minheights;
   jvirt_sarray_ptr sptr;
   jvirt_barray_ptr bptr;
+  int data_precision = cinfo->is_decompressor ?
+                        ((j_decompress_ptr)cinfo)->data_precision :
+                        ((j_compress_ptr)cinfo)->data_precision;
+  size_t sample_size = data_precision == 16 ?
+                       sizeof(J16SAMPLE) : (data_precision == 12 ?
+                                            sizeof(J12SAMPLE) :
+                                            sizeof(JSAMPLE));
 
   /* Compute the minimum space needed (maxaccess rows in each buffer)
    * and the maximum space needed (full image height in each buffer).
@@ -647,10 +717,10 @@ realize_virt_arrays(j_common_ptr cinfo)
   for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
     if (sptr->mem_buffer == NULL) { /* if not realized yet */
       size_t new_space = (long)sptr->rows_in_array *
-                         (long)sptr->samplesperrow * sizeof(JSAMPLE);
+                         (long)sptr->samplesperrow * sample_size;
 
       space_per_minheight += (long)sptr->maxaccess *
-                             (long)sptr->samplesperrow * sizeof(JSAMPLE);
+                             (long)sptr->samplesperrow * sample_size;
       if (SIZE_MAX - maximum_space < new_space)
         out_of_memory(cinfo, 10);
       maximum_space += new_space;
@@ -705,7 +775,7 @@ realize_virt_arrays(j_common_ptr cinfo)
         jpeg_open_backing_store(cinfo, &sptr->b_s_info,
                                 (long)sptr->rows_in_array *
                                 (long)sptr->samplesperrow *
-                                (long)sizeof(JSAMPLE));
+                                (long)sample_size);
         sptr->b_s_open = TRUE;
       }
       sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
@@ -748,8 +818,15 @@ do_sarray_io(j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
 /* Do backing store read or write of a virtual sample array */
 {
   long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+  int data_precision = cinfo->is_decompressor ?
+                        ((j_decompress_ptr)cinfo)->data_precision :
+                        ((j_compress_ptr)cinfo)->data_precision;
+  size_t sample_size = data_precision == 16 ?
+                       sizeof(J16SAMPLE) : (data_precision == 12 ?
+                                            sizeof(J12SAMPLE) :
+                                            sizeof(JSAMPLE));
 
-  bytesperrow = (long)ptr->samplesperrow * sizeof(JSAMPLE);
+  bytesperrow = (long)ptr->samplesperrow * (long)sample_size;
   file_offset = ptr->cur_start_row * bytesperrow;
   /* Loop to read or write each allocation chunk in mem_buffer */
   for (i = 0; i < (long)ptr->rows_in_mem; i += ptr->rowsperchunk) {
@@ -763,14 +840,42 @@ do_sarray_io(j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
     if (rows <= 0)              /* this chunk might be past end of file! */
       break;
     byte_count = rows * bytesperrow;
-    if (writing)
-      (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info,
-                                            (void *)ptr->mem_buffer[i],
-                                            file_offset, byte_count);
-    else
-      (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info,
-                                           (void *)ptr->mem_buffer[i],
-                                           file_offset, byte_count);
+    if (data_precision == 16) {
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+      J16SAMPARRAY mem_buffer16 = (J16SAMPARRAY)ptr->mem_buffer;
+
+      if (writing)
+        (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info,
+                                              (void *)mem_buffer16[i],
+                                              file_offset, byte_count);
+      else
+        (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info,
+                                             (void *)mem_buffer16[i],
+                                             file_offset, byte_count);
+#else
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, data_precision);
+#endif
+    } else if (data_precision == 12) {
+      J12SAMPARRAY mem_buffer12 = (J12SAMPARRAY)ptr->mem_buffer;
+
+      if (writing)
+        (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info,
+                                              (void *)mem_buffer12[i],
+                                              file_offset, byte_count);
+      else
+        (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info,
+                                             (void *)mem_buffer12[i],
+                                             file_offset, byte_count);
+    } else {
+      if (writing)
+        (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info,
+                                              (void *)ptr->mem_buffer[i],
+                                              file_offset, byte_count);
+      else
+        (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info,
+                                             (void *)ptr->mem_buffer[i],
+                                             file_offset, byte_count);
+    }
     file_offset += byte_count;
   }
 }
@@ -818,6 +923,13 @@ access_virt_sarray(j_common_ptr cinfo, jvirt_sarray_ptr ptr,
 {
   JDIMENSION end_row = start_row + num_rows;
   JDIMENSION undef_row;
+  int data_precision = cinfo->is_decompressor ?
+                        ((j_decompress_ptr)cinfo)->data_precision :
+                        ((j_compress_ptr)cinfo)->data_precision;
+  size_t sample_size = data_precision == 16 ?
+                       sizeof(J16SAMPLE) : (data_precision == 12 ?
+                                            sizeof(J12SAMPLE) :
+                                            sizeof(JSAMPLE));
 
   /* debugging check */
   if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
@@ -873,7 +985,7 @@ access_virt_sarray(j_common_ptr cinfo, jvirt_sarray_ptr ptr,
     if (writable)
       ptr->first_undef_row = end_row;
     if (ptr->pre_zero) {
-      size_t bytesperrow = (size_t)ptr->samplesperrow * sizeof(JSAMPLE);
+      size_t bytesperrow = (size_t)ptr->samplesperrow * sample_size;
       undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
       end_row -= ptr->cur_start_row;
       while (undef_row < end_row) {
diff --git a/3rdparty/libjpeg-turbo/src/jmemsys.h b/3rdparty/libjpeg-turbo/src/jmemsys.h
index 9229550afd..ac09ef4c36 100644
--- a/3rdparty/libjpeg-turbo/src/jmemsys.h
+++ b/3rdparty/libjpeg-turbo/src/jmemsys.h
@@ -99,24 +99,6 @@ EXTERN(size_t) jpeg_mem_available(j_common_ptr cinfo, size_t min_bytes_needed,
 #define TEMP_NAME_LENGTH   64   /* max length of a temporary file's name */
 
 
-#ifdef USE_MSDOS_MEMMGR         /* DOS-specific junk */
-
-typedef unsigned short XMSH;    /* type of extended-memory handles */
-typedef unsigned short EMSH;    /* type of expanded-memory handles */
-
-typedef union {
-  short file_handle;            /* DOS file handle if it's a temp file */
-  XMSH xms_handle;              /* handle if it's a chunk of XMS */
-  EMSH ems_handle;              /* handle if it's a chunk of EMS */
-} handle_union;
-
-#endif /* USE_MSDOS_MEMMGR */
-
-#ifdef USE_MAC_MEMMGR           /* Mac-specific junk */
-#include <Files.h>
-#endif /* USE_MAC_MEMMGR */
-
-
 typedef struct backing_store_struct *backing_store_ptr;
 
 typedef struct backing_store_struct {
@@ -130,22 +112,9 @@ typedef struct backing_store_struct {
   void (*close_backing_store) (j_common_ptr cinfo, backing_store_ptr info);
 
   /* Private fields for system-dependent backing-store management */
-#ifdef USE_MSDOS_MEMMGR
-  /* For the MS-DOS manager (jmemdos.c), we need: */
-  handle_union handle;          /* reference to backing-store storage object */
-  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
-#else
-#ifdef USE_MAC_MEMMGR
-  /* For the Mac manager (jmemmac.c), we need: */
-  short temp_file;              /* file reference number to temp file */
-  FSSpec tempSpec;              /* the FSSpec for the temp file */
-  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
-#else
   /* For a typical implementation with temp files, we need: */
   FILE *temp_file;              /* stdio reference to temp file */
   char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
-#endif
-#endif
 } backing_store_info;
 
 
diff --git a/3rdparty/libjpeg-turbo/src/jmorecfg.h b/3rdparty/libjpeg-turbo/src/jmorecfg.h
index b33a991914..89c7842c87 100644
--- a/3rdparty/libjpeg-turbo/src/jmorecfg.h
+++ b/3rdparty/libjpeg-turbo/src/jmorecfg.h
@@ -4,8 +4,10 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 1997-2009 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2011, 2014-2015, 2018, 2020, D. R. Commander.
+ * Copyright (C) 2009, 2011, 2014-2015, 2018, 2020, 2022, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -41,31 +43,29 @@
  * arrays is very slow on your hardware, you might want to change these.
  */
 
-#if BITS_IN_JSAMPLE == 8
-/* JSAMPLE should be the smallest type that will hold the values 0..255.
- */
+/* JSAMPLE should be the smallest type that will hold the values 0..255. */
 
 typedef unsigned char JSAMPLE;
 #define GETJSAMPLE(value)  ((int)(value))
 
-#define MAXJSAMPLE      255
-#define CENTERJSAMPLE   128
-
-#endif /* BITS_IN_JSAMPLE == 8 */
+#define MAXJSAMPLE       255
+#define CENTERJSAMPLE    128
 
 
-#if BITS_IN_JSAMPLE == 12
-/* JSAMPLE should be the smallest type that will hold the values 0..4095.
- * On nearly all machines "short" will do nicely.
- */
+/* J12SAMPLE should be the smallest type that will hold the values 0..4095. */
 
-typedef short JSAMPLE;
-#define GETJSAMPLE(value)  ((int)(value))
+typedef short J12SAMPLE;
 
-#define MAXJSAMPLE      4095
-#define CENTERJSAMPLE   2048
+#define MAXJ12SAMPLE     4095
+#define CENTERJ12SAMPLE  2048
 
-#endif /* BITS_IN_JSAMPLE == 12 */
+
+/* J16SAMPLE should be the smallest type that will hold the values 0..65535. */
+
+typedef unsigned short J16SAMPLE;
+
+#define MAXJ16SAMPLE     65535
+#define CENTERJ16SAMPLE  32768
 
 
 /* Representation of a DCT frequency coefficient.
@@ -242,14 +242,16 @@ typedef int boolean;
 
 #define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
 #define C_PROGRESSIVE_SUPPORTED     /* Progressive JPEG? (Requires MULTISCAN)*/
+#define C_LOSSLESS_SUPPORTED        /* Lossless JPEG? */
 #define ENTROPY_OPT_SUPPORTED       /* Optimization of entropy coding parms? */
 /* Note: if you selected 12-bit data precision, it is dangerous to turn off
  * ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only good for 8-bit
  * precision, so jchuff.c normally uses entropy optimization to compute
  * usable tables for higher precision.  If you don't want to do optimization,
  * you'll have to supply different default Huffman tables.
- * The exact same statements apply for progressive JPEG: the default tables
- * don't work for progressive mode.  (This may get fixed, however.)
+ * The exact same statements apply for progressive and lossless JPEG:
+ * the default tables don't work for progressive mode or lossless mode.
+ * (This may get fixed, however.)
  */
 #define INPUT_SMOOTHING_SUPPORTED   /* Input image smoothing option? */
 
@@ -257,6 +259,7 @@ typedef int boolean;
 
 #define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
 #define D_PROGRESSIVE_SUPPORTED     /* Progressive JPEG? (Requires MULTISCAN)*/
+#define D_LOSSLESS_SUPPORTED        /* Lossless JPEG? */
 #define SAVE_MARKERS_SUPPORTED      /* jpeg_save_markers() needed? */
 #define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
 #define IDCT_SCALING_SUPPORTED      /* Output rescaling via IDCT? */
diff --git a/3rdparty/libjpeg-turbo/src/jpeg_nbits_table.h b/3rdparty/libjpeg-turbo/src/jpeg_nbits.c
similarity index 99%
rename from 3rdparty/libjpeg-turbo/src/jpeg_nbits_table.h
rename to 3rdparty/libjpeg-turbo/src/jpeg_nbits.c
index fcf73878c3..c8ee6b056c 100644
--- a/3rdparty/libjpeg-turbo/src/jpeg_nbits_table.h
+++ b/3rdparty/libjpeg-turbo/src/jpeg_nbits.c
@@ -1,4 +1,32 @@
-static const unsigned char jpeg_nbits_table[65536] = {
+/*
+ * Copyright (C) 2024, D. R. Commander.
+ *
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+#include "jpeg_nbits.h"
+#include "jconfigint.h"
+
+
+#ifndef USE_CLZ_INTRINSIC
+
+#define INCLUDE_JPEG_NBITS_TABLE
+
+/* When building for x86[-64] with the SIMD extensions enabled, the C Huffman
+ * encoders can reuse jpeg_nbits_table from the SSE2 baseline Huffman encoder.
+ */
+#if (defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || \
+     defined(_M_X64)) && defined(WITH_SIMD)
+#undef INCLUDE_JPEG_NBITS_TABLE
+#endif
+
+#endif
+
+
+#ifdef INCLUDE_JPEG_NBITS_TABLE
+
+const unsigned char HIDDEN jpeg_nbits_table[65536] = {
    0,  1,  2,  2,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
    5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
    6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
@@ -4096,3 +4124,11 @@ static const unsigned char jpeg_nbits_table[65536] = {
   16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
   16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
 };
+
+#else
+
+/* Suppress compiler warnings about empty translation unit. */
+
+typedef int dummy_jpeg_nbits_table;
+
+#endif
diff --git a/3rdparty/libjpeg-turbo/src/jpeg_nbits.h b/3rdparty/libjpeg-turbo/src/jpeg_nbits.h
new file mode 100644
index 0000000000..6481a1228d
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jpeg_nbits.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2014, 2021, 2024, D. R. Commander.
+ * Copyright (C) 2014, Olle Liljenzin.
+ * Copyright (C) 2020, Arm Limited.
+ *
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+/*
+ * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
+ * used for bit counting rather than the lookup table.  This will reduce the
+ * memory footprint by 64k, which is important for some mobile applications
+ * that create many isolated instances of libjpeg-turbo (web browsers, for
+ * instance.)  This may improve performance on some mobile platforms as well.
+ * This feature is enabled by default only on Arm processors, because some x86
+ * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
+ * shown to have a significant performance impact even on the x86 chips that
+ * have a fast implementation of it.  When building for Armv6, you can
+ * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
+ * flags (this defines __thumb__).
+ */
+
+/* NOTE: Both GCC and Clang define __GNUC__ */
+#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
+    defined(_M_ARM) || defined(_M_ARM64)
+#if !defined(__thumb__) || defined(__thumb2__)
+#define USE_CLZ_INTRINSIC
+#endif
+#endif
+
+#ifdef USE_CLZ_INTRINSIC
+#if defined(_MSC_VER) && !defined(__clang__)
+#define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))
+#else
+#define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
+#endif
+#define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
+#else
+extern const unsigned char jpeg_nbits_table[65536];
+#define JPEG_NBITS(x)          (jpeg_nbits_table[x])
+#define JPEG_NBITS_NONZERO(x)  JPEG_NBITS(x)
+#endif
diff --git a/3rdparty/libjpeg-turbo/src/jpegcomp.h b/3rdparty/libjpeg-turbo/src/jpegapicomp.h
similarity index 98%
rename from 3rdparty/libjpeg-turbo/src/jpegcomp.h
rename to 3rdparty/libjpeg-turbo/src/jpegapicomp.h
index c4834ac0df..bb3912eb2f 100644
--- a/3rdparty/libjpeg-turbo/src/jpegcomp.h
+++ b/3rdparty/libjpeg-turbo/src/jpegapicomp.h
@@ -1,5 +1,5 @@
 /*
- * jpegcomp.h
+ * jpegapicomp.h
  *
  * Copyright (C) 2010, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
diff --git a/3rdparty/libjpeg-turbo/src/jpegint.h b/3rdparty/libjpeg-turbo/src/jpegint.h
index 6af9e2a179..6541420143 100644
--- a/3rdparty/libjpeg-turbo/src/jpegint.h
+++ b/3rdparty/libjpeg-turbo/src/jpegint.h
@@ -4,8 +4,10 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 1997-2009 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2016, 2019, 2021, D. R. Commander.
+ * Copyright (C) 2015-2017, 2019, 2021-2022, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
  * Copyright (C) 2021, Alex Richardson.
  * For conditions of distribution and use, see the accompanying README.ijg
@@ -17,6 +19,17 @@
  */
 
 
+/* Representation of a spatial difference value.
+ * This should be a signed value of at least 16 bits; int is usually OK.
+ */
+
+typedef int JDIFF;
+
+typedef JDIFF FAR *JDIFFROW;    /* pointer to one row of difference values */
+typedef JDIFFROW *JDIFFARRAY;   /* ptr to some rows (a 2-D diff array) */
+typedef JDIFFARRAY *JDIFFIMAGE; /* a 3-D diff array: top index is color */
+
+
 /* Declarations for both compression & decompression */
 
 typedef enum {            /* Operating modes for buffer controllers */
@@ -61,6 +74,9 @@ typedef __UINTPTR_TYPE__ JUINTPTR;
 typedef size_t JUINTPTR;
 #endif
 
+#define IsExtRGB(cs) \
+  (cs == JCS_RGB || (cs >= JCS_EXT_RGB && cs <= JCS_EXT_ARGB))
+
 /*
  * Left shift macro that handles a negative operand without causing any
  * sanitizer warnings
@@ -80,6 +96,7 @@ struct jpeg_comp_master {
   /* State variables made visible to other modules */
   boolean call_pass_startup;    /* True if pass_startup must be called */
   boolean is_last_pass;         /* True during last pass */
+  boolean lossless;             /* True if lossless mode is enabled */
 };
 
 /* Main buffer control (downsampled-data buffer) */
@@ -87,6 +104,12 @@ struct jpeg_c_main_controller {
   void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode);
   void (*process_data) (j_compress_ptr cinfo, JSAMPARRAY input_buf,
                         JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail);
+  void (*process_data_12) (j_compress_ptr cinfo, J12SAMPARRAY input_buf,
+                           JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail);
+#ifdef C_LOSSLESS_SUPPORTED
+  void (*process_data_16) (j_compress_ptr cinfo, J16SAMPARRAY input_buf,
+                           JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail);
+#endif
 };
 
 /* Compression preprocessing (downsampling input buffer control) */
@@ -97,12 +120,32 @@ struct jpeg_c_prep_controller {
                             JSAMPIMAGE output_buf,
                             JDIMENSION *out_row_group_ctr,
                             JDIMENSION out_row_groups_avail);
+  void (*pre_process_data_12) (j_compress_ptr cinfo, J12SAMPARRAY input_buf,
+                               JDIMENSION *in_row_ctr,
+                               JDIMENSION in_rows_avail,
+                               J12SAMPIMAGE output_buf,
+                               JDIMENSION *out_row_group_ctr,
+                               JDIMENSION out_row_groups_avail);
+#ifdef C_LOSSLESS_SUPPORTED
+  void (*pre_process_data_16) (j_compress_ptr cinfo, J16SAMPARRAY input_buf,
+                               JDIMENSION *in_row_ctr,
+                               JDIMENSION in_rows_avail,
+                               J16SAMPIMAGE output_buf,
+                               JDIMENSION *out_row_group_ctr,
+                               JDIMENSION out_row_groups_avail);
+#endif
 };
 
-/* Coefficient buffer control */
+/* Lossy mode: Coefficient buffer control
+ * Lossless mode: Difference buffer control
+ */
 struct jpeg_c_coef_controller {
   void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode);
   boolean (*compress_data) (j_compress_ptr cinfo, JSAMPIMAGE input_buf);
+  boolean (*compress_data_12) (j_compress_ptr cinfo, J12SAMPIMAGE input_buf);
+#ifdef C_LOSSLESS_SUPPORTED
+  boolean (*compress_data_16) (j_compress_ptr cinfo, J16SAMPIMAGE input_buf);
+#endif
 };
 
 /* Colorspace conversion */
@@ -111,6 +154,14 @@ struct jpeg_color_converter {
   void (*color_convert) (j_compress_ptr cinfo, JSAMPARRAY input_buf,
                          JSAMPIMAGE output_buf, JDIMENSION output_row,
                          int num_rows);
+  void (*color_convert_12) (j_compress_ptr cinfo, J12SAMPARRAY input_buf,
+                            J12SAMPIMAGE output_buf, JDIMENSION output_row,
+                            int num_rows);
+#ifdef C_LOSSLESS_SUPPORTED
+  void (*color_convert_16) (j_compress_ptr cinfo, J16SAMPARRAY input_buf,
+                            J16SAMPIMAGE output_buf, JDIMENSION output_row,
+                            int num_rows);
+#endif
 };
 
 /* Downsampling */
@@ -119,24 +170,47 @@ struct jpeg_downsampler {
   void (*downsample) (j_compress_ptr cinfo, JSAMPIMAGE input_buf,
                       JDIMENSION in_row_index, JSAMPIMAGE output_buf,
                       JDIMENSION out_row_group_index);
+  void (*downsample_12) (j_compress_ptr cinfo, J12SAMPIMAGE input_buf,
+                         JDIMENSION in_row_index, J12SAMPIMAGE output_buf,
+                         JDIMENSION out_row_group_index);
+#ifdef C_LOSSLESS_SUPPORTED
+  void (*downsample_16) (j_compress_ptr cinfo, J16SAMPIMAGE input_buf,
+                         JDIMENSION in_row_index, J16SAMPIMAGE output_buf,
+                         JDIMENSION out_row_group_index);
+#endif
 
   boolean need_context_rows;    /* TRUE if need rows above & below */
 };
 
-/* Forward DCT (also controls coefficient quantization) */
+/* Lossy mode: Forward DCT (also controls coefficient quantization)
+ * Lossless mode: Prediction, sample differencing, and point transform
+ */
 struct jpeg_forward_dct {
   void (*start_pass) (j_compress_ptr cinfo);
+
+  /* Lossy mode */
   /* perhaps this should be an array??? */
   void (*forward_DCT) (j_compress_ptr cinfo, jpeg_component_info *compptr,
                        JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
                        JDIMENSION start_row, JDIMENSION start_col,
                        JDIMENSION num_blocks);
+  void (*forward_DCT_12) (j_compress_ptr cinfo, jpeg_component_info *compptr,
+                          J12SAMPARRAY sample_data, JBLOCKROW coef_blocks,
+                          JDIMENSION start_row, JDIMENSION start_col,
+                          JDIMENSION num_blocks);
 };
 
 /* Entropy encoding */
 struct jpeg_entropy_encoder {
   void (*start_pass) (j_compress_ptr cinfo, boolean gather_statistics);
+
+  /* Lossy mode */
   boolean (*encode_mcu) (j_compress_ptr cinfo, JBLOCKROW *MCU_data);
+  /* Lossless mode */
+  JDIMENSION (*encode_mcus) (j_compress_ptr cinfo, JDIFFIMAGE diff_buf,
+                             JDIMENSION MCU_row_num, JDIMENSION MCU_col_num,
+                             JDIMENSION nMCU);
+
   void (*finish_pass) (j_compress_ptr cinfo);
 };
 
@@ -164,6 +238,7 @@ struct jpeg_decomp_master {
 
   /* State variables made visible to other modules */
   boolean is_dummy_pass;        /* True during 1st pass for 2-pass quant */
+  boolean lossless;             /* True if decompressing a lossless image */
 
   /* Partial decompression variables */
   JDIMENSION first_iMCU_col;
@@ -193,14 +268,36 @@ struct jpeg_d_main_controller {
   void (*start_pass) (j_decompress_ptr cinfo, J_BUF_MODE pass_mode);
   void (*process_data) (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
                         JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
+  void (*process_data_12) (j_decompress_ptr cinfo, J12SAMPARRAY output_buf,
+                           JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
+#ifdef D_LOSSLESS_SUPPORTED
+  void (*process_data_16) (j_decompress_ptr cinfo, J16SAMPARRAY output_buf,
+                           JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
+#endif
 };
 
-/* Coefficient buffer control */
+/* Lossy mode: Coefficient buffer control
+ * Lossless mode: Difference buffer control
+ */
 struct jpeg_d_coef_controller {
   void (*start_input_pass) (j_decompress_ptr cinfo);
   int (*consume_data) (j_decompress_ptr cinfo);
   void (*start_output_pass) (j_decompress_ptr cinfo);
   int (*decompress_data) (j_decompress_ptr cinfo, JSAMPIMAGE output_buf);
+  int (*decompress_data_12) (j_decompress_ptr cinfo, J12SAMPIMAGE output_buf);
+#ifdef D_LOSSLESS_SUPPORTED
+  int (*decompress_data_16) (j_decompress_ptr cinfo, J16SAMPIMAGE output_buf);
+#endif
+
+  /* These variables keep track of the current location of the input side. */
+  /* cinfo->input_iMCU_row is also used for this. */
+  JDIMENSION MCU_ctr;           /* counts MCUs processed in current row */
+  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
+
+  /* The output side's location is represented by cinfo->output_iMCU_row. */
+
+  /* Lossy mode */
   /* Pointer to array of coefficient virtual arrays, or NULL if none */
   jvirt_barray_ptr *coef_arrays;
 };
@@ -213,6 +310,20 @@ struct jpeg_d_post_controller {
                              JDIMENSION in_row_groups_avail,
                              JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
                              JDIMENSION out_rows_avail);
+  void (*post_process_data_12) (j_decompress_ptr cinfo, J12SAMPIMAGE input_buf,
+                                JDIMENSION *in_row_group_ctr,
+                                JDIMENSION in_row_groups_avail,
+                                J12SAMPARRAY output_buf,
+                                JDIMENSION *out_row_ctr,
+                                JDIMENSION out_rows_avail);
+#ifdef D_LOSSLESS_SUPPORTED
+  void (*post_process_data_16) (j_decompress_ptr cinfo, J16SAMPIMAGE input_buf,
+                                JDIMENSION *in_row_group_ctr,
+                                JDIMENSION in_row_groups_avail,
+                                J16SAMPARRAY output_buf,
+                                JDIMENSION *out_row_ctr,
+                                JDIMENSION out_rows_avail);
+#endif
 };
 
 /* Marker reading & parsing */
@@ -238,24 +349,42 @@ struct jpeg_marker_reader {
 /* Entropy decoding */
 struct jpeg_entropy_decoder {
   void (*start_pass) (j_decompress_ptr cinfo);
+
+  /* Lossy mode */
   boolean (*decode_mcu) (j_decompress_ptr cinfo, JBLOCKROW *MCU_data);
+  /* Lossless mode */
+  JDIMENSION (*decode_mcus) (j_decompress_ptr cinfo, JDIFFIMAGE diff_buf,
+                             JDIMENSION MCU_row_num, JDIMENSION MCU_col_num,
+                             JDIMENSION nMCU);
+  boolean (*process_restart) (j_decompress_ptr cinfo);
 
   /* This is here to share code between baseline and progressive decoders; */
   /* other modules probably should not use it */
   boolean insufficient_data;    /* set TRUE after emitting warning */
 };
 
-/* Inverse DCT (also performs dequantization) */
+/* Lossy mode: Inverse DCT (also performs dequantization)
+ * Lossless mode: Prediction, sample undifferencing, point transform, and
+ * sample size scaling
+ */
 typedef void (*inverse_DCT_method_ptr) (j_decompress_ptr cinfo,
                                         jpeg_component_info *compptr,
                                         JCOEFPTR coef_block,
                                         JSAMPARRAY output_buf,
                                         JDIMENSION output_col);
+typedef void (*inverse_DCT_12_method_ptr) (j_decompress_ptr cinfo,
+                                           jpeg_component_info *compptr,
+                                           JCOEFPTR coef_block,
+                                           J12SAMPARRAY output_buf,
+                                           JDIMENSION output_col);
 
 struct jpeg_inverse_dct {
   void (*start_pass) (j_decompress_ptr cinfo);
+
+  /* Lossy mode */
   /* It is useful to allow each component to have a separate IDCT method. */
   inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
+  inverse_DCT_12_method_ptr inverse_DCT_12[MAX_COMPONENTS];
 };
 
 /* Upsampling (note that upsampler must also call color converter) */
@@ -265,6 +394,16 @@ struct jpeg_upsampler {
                     JDIMENSION *in_row_group_ctr,
                     JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
                     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
+  void (*upsample_12) (j_decompress_ptr cinfo, J12SAMPIMAGE input_buf,
+                       JDIMENSION *in_row_group_ctr,
+                       JDIMENSION in_row_groups_avail, J12SAMPARRAY output_buf,
+                       JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
+#ifdef D_LOSSLESS_SUPPORTED
+  void (*upsample_16) (j_decompress_ptr cinfo, J16SAMPIMAGE input_buf,
+                       JDIMENSION *in_row_group_ctr,
+                       JDIMENSION in_row_groups_avail, J16SAMPARRAY output_buf,
+                       JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
+#endif
 
   boolean need_context_rows;    /* TRUE if need rows above & below */
 };
@@ -275,6 +414,14 @@ struct jpeg_color_deconverter {
   void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                          JDIMENSION input_row, JSAMPARRAY output_buf,
                          int num_rows);
+  void (*color_convert_12) (j_decompress_ptr cinfo, J12SAMPIMAGE input_buf,
+                            JDIMENSION input_row, J12SAMPARRAY output_buf,
+                            int num_rows);
+#ifdef D_LOSSLESS_SUPPORTED
+  void (*color_convert_16) (j_decompress_ptr cinfo, J16SAMPIMAGE input_buf,
+                            JDIMENSION input_row, J16SAMPARRAY output_buf,
+                            int num_rows);
+#endif
 };
 
 /* Color quantization or color precision reduction */
@@ -282,6 +429,8 @@ struct jpeg_color_quantizer {
   void (*start_pass) (j_decompress_ptr cinfo, boolean is_pre_scan);
   void (*color_quantize) (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
                           JSAMPARRAY output_buf, int num_rows);
+  void (*color_quantize_12) (j_decompress_ptr cinfo, J12SAMPARRAY input_buf,
+                             J12SAMPARRAY output_buf, int num_rows);
   void (*finish_pass) (j_decompress_ptr cinfo);
   void (*new_color_map) (j_decompress_ptr cinfo);
 };
@@ -323,36 +472,95 @@ EXTERN(void) jinit_c_master_control(j_compress_ptr cinfo,
                                     boolean transcode_only);
 EXTERN(void) jinit_c_main_controller(j_compress_ptr cinfo,
                                      boolean need_full_buffer);
+EXTERN(void) j12init_c_main_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
 EXTERN(void) jinit_c_prep_controller(j_compress_ptr cinfo,
                                      boolean need_full_buffer);
+EXTERN(void) j12init_c_prep_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
 EXTERN(void) jinit_c_coef_controller(j_compress_ptr cinfo,
                                      boolean need_full_buffer);
+EXTERN(void) j12init_c_coef_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
 EXTERN(void) jinit_color_converter(j_compress_ptr cinfo);
+EXTERN(void) j12init_color_converter(j_compress_ptr cinfo);
 EXTERN(void) jinit_downsampler(j_compress_ptr cinfo);
+EXTERN(void) j12init_downsampler(j_compress_ptr cinfo);
 EXTERN(void) jinit_forward_dct(j_compress_ptr cinfo);
+EXTERN(void) j12init_forward_dct(j_compress_ptr cinfo);
 EXTERN(void) jinit_huff_encoder(j_compress_ptr cinfo);
 EXTERN(void) jinit_phuff_encoder(j_compress_ptr cinfo);
 EXTERN(void) jinit_arith_encoder(j_compress_ptr cinfo);
 EXTERN(void) jinit_marker_writer(j_compress_ptr cinfo);
+#ifdef C_LOSSLESS_SUPPORTED
+EXTERN(void) j16init_c_main_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) j16init_c_prep_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) j16init_color_converter(j_compress_ptr cinfo);
+EXTERN(void) j16init_downsampler(j_compress_ptr cinfo);
+EXTERN(void) jinit_c_diff_controller(j_compress_ptr cinfo,
+                                     boolean need_full_buffer);
+EXTERN(void) j12init_c_diff_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) j16init_c_diff_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) jinit_lhuff_encoder(j_compress_ptr cinfo);
+EXTERN(void) jinit_lossless_compressor(j_compress_ptr cinfo);
+EXTERN(void) j12init_lossless_compressor(j_compress_ptr cinfo);
+EXTERN(void) j16init_lossless_compressor(j_compress_ptr cinfo);
+#endif
+
 /* Decompression module initialization routines */
 EXTERN(void) jinit_master_decompress(j_decompress_ptr cinfo);
 EXTERN(void) jinit_d_main_controller(j_decompress_ptr cinfo,
                                      boolean need_full_buffer);
+EXTERN(void) j12init_d_main_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
 EXTERN(void) jinit_d_coef_controller(j_decompress_ptr cinfo,
                                      boolean need_full_buffer);
+EXTERN(void) j12init_d_coef_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
 EXTERN(void) jinit_d_post_controller(j_decompress_ptr cinfo,
                                      boolean need_full_buffer);
+EXTERN(void) j12init_d_post_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
 EXTERN(void) jinit_input_controller(j_decompress_ptr cinfo);
 EXTERN(void) jinit_marker_reader(j_decompress_ptr cinfo);
 EXTERN(void) jinit_huff_decoder(j_decompress_ptr cinfo);
 EXTERN(void) jinit_phuff_decoder(j_decompress_ptr cinfo);
 EXTERN(void) jinit_arith_decoder(j_decompress_ptr cinfo);
 EXTERN(void) jinit_inverse_dct(j_decompress_ptr cinfo);
+EXTERN(void) j12init_inverse_dct(j_decompress_ptr cinfo);
 EXTERN(void) jinit_upsampler(j_decompress_ptr cinfo);
+EXTERN(void) j12init_upsampler(j_decompress_ptr cinfo);
 EXTERN(void) jinit_color_deconverter(j_decompress_ptr cinfo);
+EXTERN(void) j12init_color_deconverter(j_decompress_ptr cinfo);
 EXTERN(void) jinit_1pass_quantizer(j_decompress_ptr cinfo);
+EXTERN(void) j12init_1pass_quantizer(j_decompress_ptr cinfo);
 EXTERN(void) jinit_2pass_quantizer(j_decompress_ptr cinfo);
+EXTERN(void) j12init_2pass_quantizer(j_decompress_ptr cinfo);
 EXTERN(void) jinit_merged_upsampler(j_decompress_ptr cinfo);
+EXTERN(void) j12init_merged_upsampler(j_decompress_ptr cinfo);
+#ifdef D_LOSSLESS_SUPPORTED
+EXTERN(void) j16init_d_main_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) j16init_d_post_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) j16init_upsampler(j_decompress_ptr cinfo);
+EXTERN(void) j16init_color_deconverter(j_decompress_ptr cinfo);
+EXTERN(void) jinit_d_diff_controller(j_decompress_ptr cinfo,
+                                     boolean need_full_buffer);
+EXTERN(void) j12init_d_diff_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) j16init_d_diff_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) jinit_lhuff_decoder(j_decompress_ptr cinfo);
+EXTERN(void) jinit_lossless_decompressor(j_decompress_ptr cinfo);
+EXTERN(void) j12init_lossless_decompressor(j_decompress_ptr cinfo);
+EXTERN(void) j16init_lossless_decompressor(j_decompress_ptr cinfo);
+#endif
+
 /* Memory manager initialization */
 EXTERN(void) jinit_memory_mgr(j_common_ptr cinfo);
 
@@ -362,6 +570,14 @@ EXTERN(long) jround_up(long a, long b);
 EXTERN(void) jcopy_sample_rows(JSAMPARRAY input_array, int source_row,
                                JSAMPARRAY output_array, int dest_row,
                                int num_rows, JDIMENSION num_cols);
+EXTERN(void) j12copy_sample_rows(J12SAMPARRAY input_array, int source_row,
+                                 J12SAMPARRAY output_array, int dest_row,
+                                 int num_rows, JDIMENSION num_cols);
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+EXTERN(void) j16copy_sample_rows(J16SAMPARRAY input_array, int source_row,
+                                 J16SAMPARRAY output_array, int dest_row,
+                                 int num_rows, JDIMENSION num_cols);
+#endif
 EXTERN(void) jcopy_block_row(JBLOCKROW input_row, JBLOCKROW output_row,
                              JDIMENSION num_blocks);
 EXTERN(void) jzero_far(void *target, size_t bytestozero);
diff --git a/3rdparty/libjpeg-turbo/src/jpeglib.h b/3rdparty/libjpeg-turbo/src/jpeglib.h
index d7664f0630..a59e98c25e 100644
--- a/3rdparty/libjpeg-turbo/src/jpeglib.h
+++ b/3rdparty/libjpeg-turbo/src/jpeglib.h
@@ -4,8 +4,11 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Modified 2002-2009 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2013-2014, 2016-2017, 2020, D. R. Commander.
+ * Copyright (C) 2009-2011, 2013-2014, 2016-2017, 2020, 2022-2023,
+             D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -43,6 +46,13 @@ extern "C" {
  * if you want to be compatible.
  */
 
+/* NOTE: In lossless mode, an MCU contains one or more samples rather than one
+ * or more 8x8 DCT blocks, so the term "data unit" is used to generically
+ * describe a sample in lossless mode or an 8x8 DCT block in lossy mode.  To
+ * preserve backward API/ABI compatibility, the field and macro names retain
+ * the "block" terminology.
+ */
+
 #define DCTSIZE             8   /* The basic DCT block is 8x8 samples */
 #define DCTSIZE2            64  /* DCTSIZE squared; # of elements in a block */
 #define NUM_QUANT_TBLS      4   /* Quantization tables are numbered 0..3 */
@@ -57,9 +67,9 @@ extern "C" {
  * we strongly discourage changing C_MAX_BLOCKS_IN_MCU; just because Adobe
  * sometimes emits noncompliant files doesn't mean you should too.
  */
-#define C_MAX_BLOCKS_IN_MCU   10 /* compressor's limit on blocks per MCU */
+#define C_MAX_BLOCKS_IN_MCU   10 /* compressor's limit on data units/MCU */
 #ifndef D_MAX_BLOCKS_IN_MCU
-#define D_MAX_BLOCKS_IN_MCU   10 /* decompressor's limit on blocks per MCU */
+#define D_MAX_BLOCKS_IN_MCU   10 /* decompressor's limit on data units/MCU */
 #endif
 
 
@@ -70,6 +80,20 @@ typedef JSAMPLE *JSAMPROW;      /* ptr to one image row of pixel samples. */
 typedef JSAMPROW *JSAMPARRAY;   /* ptr to some rows (a 2-D sample array) */
 typedef JSAMPARRAY *JSAMPIMAGE; /* a 3-D sample array: top index is color */
 
+typedef J12SAMPLE *J12SAMPROW;      /* ptr to one image row of 12-bit pixel
+                                       samples. */
+typedef J12SAMPROW *J12SAMPARRAY;   /* ptr to some 12-bit sample rows (a 2-D
+                                       12-bit sample array) */
+typedef J12SAMPARRAY *J12SAMPIMAGE; /* a 3-D 12-bit sample array: top index is
+                                       color */
+
+typedef J16SAMPLE *J16SAMPROW;      /* ptr to one image row of 16-bit pixel
+                                       samples. */
+typedef J16SAMPROW *J16SAMPARRAY;   /* ptr to some 16-bit sample rows (a 2-D
+                                       16-bit sample array) */
+typedef J16SAMPARRAY *J16SAMPIMAGE; /* a 3-D 16-bit sample array: top index is
+                                       color */
+
 typedef JCOEF JBLOCK[DCTSIZE2]; /* one block of coefficients */
 typedef JBLOCK *JBLOCKROW;      /* pointer to one row of coefficient blocks */
 typedef JBLOCKROW *JBLOCKARRAY;         /* a 2-D array of coefficient blocks */
@@ -135,17 +159,20 @@ typedef struct {
   /* Remaining fields should be treated as private by applications. */
 
   /* These values are computed during compression or decompression startup: */
-  /* Component's size in DCT blocks.
-   * Any dummy blocks added to complete an MCU are not counted; therefore
-   * these values do not depend on whether a scan is interleaved or not.
+  /* Component's size in data units.
+   * In lossy mode, any dummy blocks added to complete an MCU are not counted;
+   * therefore these values do not depend on whether a scan is interleaved or
+   * not.  In lossless mode, these are always equal to the image width and
+   * height.
    */
   JDIMENSION width_in_blocks;
   JDIMENSION height_in_blocks;
-  /* Size of a DCT block in samples.  Always DCTSIZE for compression.
-   * For decompression this is the size of the output from one DCT block,
+  /* Size of a data unit in samples.  Always DCTSIZE for lossy compression.
+   * For lossy decompression this is the size of the output from one DCT block,
    * reflecting any scaling we choose to apply during the IDCT step.
-   * Values from 1 to 16 are supported.
-   * Note that different components may receive different IDCT scalings.
+   * Values from 1 to 16 are supported.  Note that different components may
+   * receive different IDCT scalings.  In lossless mode, this is always equal
+   * to 1.
    */
 #if JPEG_LIB_VERSION >= 70
   int DCT_h_scaled_size;
@@ -156,8 +183,10 @@ typedef struct {
   /* The downsampled dimensions are the component's actual, unpadded number
    * of samples at the main buffer (preprocessing/compression interface), thus
    * downsampled_width = ceil(image_width * Hi/Hmax)
-   * and similarly for height.  For decompression, IDCT scaling is included, so
+   * and similarly for height.  For lossy decompression, IDCT scaling is
+   * included, so
    * downsampled_width = ceil(image_width * Hi/Hmax * DCT_[h_]scaled_size/DCTSIZE)
+   * In lossless mode, these are always equal to the image width and height.
    */
   JDIMENSION downsampled_width;  /* actual width in samples */
   JDIMENSION downsampled_height; /* actual height in samples */
@@ -169,12 +198,12 @@ typedef struct {
 
   /* These values are computed before starting a scan of the component. */
   /* The decompressor output side may not use these variables. */
-  int MCU_width;                /* number of blocks per MCU, horizontally */
-  int MCU_height;               /* number of blocks per MCU, vertically */
+  int MCU_width;                /* number of data units per MCU, horizontally */
+  int MCU_height;               /* number of data units per MCU, vertically */
   int MCU_blocks;               /* MCU_width * MCU_height */
   int MCU_sample_width;         /* MCU width in samples, MCU_width*DCT_[h_]scaled_size */
-  int last_col_width;           /* # of non-dummy blocks across in last MCU */
-  int last_row_height;          /* # of non-dummy blocks down in last MCU */
+  int last_col_width;           /* # of non-dummy data units across in last MCU */
+  int last_row_height;          /* # of non-dummy data units down in last MCU */
 
   /* Saved quantization table for component; NULL if none yet saved.
    * See jdinput.c comments about the need for this information.
@@ -192,8 +221,12 @@ typedef struct {
 typedef struct {
   int comps_in_scan;            /* number of components encoded in this scan */
   int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
-  int Ss, Se;                   /* progressive JPEG spectral selection parms */
-  int Ah, Al;                   /* progressive JPEG successive approx. parms */
+  int Ss, Se;                   /* progressive JPEG spectral selection parms
+                                   (Ss is the predictor selection value in
+                                   lossless mode) */
+  int Ah, Al;                   /* progressive JPEG successive approx. parms
+                                   (Al is the point transform value in lossless
+                                   mode) */
 } jpeg_scan_info;
 
 /* The decompressor can save APPn and COM markers in a list of these: */
@@ -238,7 +271,8 @@ typedef enum {
   JCS_EXT_BGRA,           /* blue/green/red/alpha */
   JCS_EXT_ABGR,           /* alpha/blue/green/red */
   JCS_EXT_ARGB,           /* alpha/red/green/blue */
-  JCS_RGB565              /* 5-bit red/6-bit green/5-bit blue */
+  JCS_RGB565              /* 5-bit red/6-bit green/5-bit blue
+                             [decompression only] */
 } J_COLOR_SPACE;
 
 /* DCT/IDCT algorithm options. */
@@ -419,11 +453,13 @@ struct jpeg_compress_struct {
   int min_DCT_v_scaled_size;    /* smallest DCT_v_scaled_size of any component */
 #endif
 
-  JDIMENSION total_iMCU_rows;   /* # of iMCU rows to be input to coef ctlr */
-  /* The coefficient controller receives data in units of MCU rows as defined
-   * for fully interleaved scans (whether the JPEG file is interleaved or not).
-   * There are v_samp_factor * DCTSIZE sample rows of each component in an
-   * "iMCU" (interleaved MCU) row.
+  JDIMENSION total_iMCU_rows;   /* # of iMCU rows to be input to coefficient or
+                                   difference controller */
+  /* The coefficient or difference controller receives data in units of MCU
+   * rows as defined for fully interleaved scans (whether the JPEG file is
+   * interleaved or not).  In lossy mode, there are v_samp_factor * DCTSIZE
+   * sample rows of each component in an "iMCU" (interleaved MCU) row.  In
+   * lossless mode, total_iMCU_rows is always equal to the image height.
    */
 
   /*
@@ -437,12 +473,13 @@ struct jpeg_compress_struct {
   JDIMENSION MCUs_per_row;      /* # of MCUs across the image */
   JDIMENSION MCU_rows_in_scan;  /* # of MCU rows in the image */
 
-  int blocks_in_MCU;            /* # of DCT blocks per MCU */
+  int blocks_in_MCU;            /* # of data units per MCU */
   int MCU_membership[C_MAX_BLOCKS_IN_MCU];
   /* MCU_membership[i] is index in cur_comp_info of component owning */
-  /* i'th block in an MCU */
+  /* i'th data unit in an MCU */
 
-  int Ss, Se, Ah, Al;           /* progressive JPEG parameters for scan */
+  int Ss, Se, Ah, Al;           /* progressive/lossless JPEG parameters for
+                                   scan */
 
 #if JPEG_LIB_VERSION >= 80
   int block_size;               /* the basic DCT block size: 1..16 */
@@ -537,7 +574,12 @@ struct jpeg_decompress_struct {
    * The map has out_color_components rows and actual_number_of_colors columns.
    */
   int actual_number_of_colors;  /* number of entries in use */
-  JSAMPARRAY colormap;          /* The color map as a 2-D pixel array */
+  JSAMPARRAY colormap;          /* The color map as a 2-D pixel array
+                                   If data_precision is 12 or 16, then this is
+                                   actually a J12SAMPARRAY or a J16SAMPARRAY,
+                                   so callers must type-cast it in order to
+                                   read/write 12-bit or 16-bit samples from/to
+                                   the array. */
 
   /* State variables: these variables indicate the progress of decompression.
    * The application may examine these but must not modify them.
@@ -647,15 +689,21 @@ struct jpeg_decompress_struct {
 #endif
 
   JDIMENSION total_iMCU_rows;   /* # of iMCU rows in image */
-  /* The coefficient controller's input and output progress is measured in
-   * units of "iMCU" (interleaved MCU) rows.  These are the same as MCU rows
-   * in fully interleaved JPEG scans, but are used whether the scan is
-   * interleaved or not.  We define an iMCU row as v_samp_factor DCT block
-   * rows of each component.  Therefore, the IDCT output contains
+  /* The coefficient or difference controller's input and output progress is
+   * measured in units of "iMCU" (interleaved MCU) rows.  These are the same as
+   * MCU rows in fully interleaved JPEG scans, but are used whether the scan is
+   * interleaved or not.  In lossy mode, we define an iMCU row as v_samp_factor
+   * DCT block rows of each component.  Therefore, the IDCT output contains
    * v_samp_factor*DCT_[v_]scaled_size sample rows of a component per iMCU row.
+   * In lossless mode, total_iMCU_rows is always equal to the image height.
    */
 
-  JSAMPLE *sample_range_limit;  /* table for fast range-limiting */
+  JSAMPLE *sample_range_limit;  /* table for fast range-limiting
+                                   If data_precision is 12 or 16, then this is
+                                   actually a J12SAMPLE pointer or a J16SAMPLE
+                                   pointer, so callers must type-cast it in
+                                   order to read 12-bit or 16-bit samples from
+                                   the array. */
 
   /*
    * These fields are valid during any one scan.
@@ -669,12 +717,13 @@ struct jpeg_decompress_struct {
   JDIMENSION MCUs_per_row;      /* # of MCUs across the image */
   JDIMENSION MCU_rows_in_scan;  /* # of MCU rows in the image */
 
-  int blocks_in_MCU;            /* # of DCT blocks per MCU */
+  int blocks_in_MCU;            /* # of data units per MCU */
   int MCU_membership[D_MAX_BLOCKS_IN_MCU];
   /* MCU_membership[i] is index in cur_comp_info of component owning */
-  /* i'th block in an MCU */
+  /* i'th data unit in an MCU */
 
-  int Ss, Se, Ah, Al;           /* progressive JPEG parameters for scan */
+  int Ss, Se, Ah, Al;           /* progressive/lossless JPEG parameters for
+                                   scan */
 
 #if JPEG_LIB_VERSION >= 80
   /* These fields are derived from Se of first SOS marker.
@@ -835,6 +884,11 @@ struct jpeg_memory_mgr {
   void *(*alloc_small) (j_common_ptr cinfo, int pool_id, size_t sizeofobject);
   void *(*alloc_large) (j_common_ptr cinfo, int pool_id,
                         size_t sizeofobject);
+  /* If cinfo->data_precision is 12 or 16, then this method and the
+   * access_virt_sarray method actually return a J12SAMPARRAY or a
+   * J16SAMPARRAY, so callers must type-cast the return value in order to
+   * read/write 12-bit or 16-bit samples from/to the array.
+   */
   JSAMPARRAY (*alloc_sarray) (j_common_ptr cinfo, int pool_id,
                               JDIMENSION samplesperrow, JDIMENSION numrows);
   JBLOCKARRAY (*alloc_barray) (j_common_ptr cinfo, int pool_id,
@@ -916,13 +970,11 @@ EXTERN(void) jpeg_destroy_decompress(j_decompress_ptr cinfo);
 EXTERN(void) jpeg_stdio_dest(j_compress_ptr cinfo, FILE *outfile);
 EXTERN(void) jpeg_stdio_src(j_decompress_ptr cinfo, FILE *infile);
 
-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
 /* Data source and destination managers: memory buffers. */
 EXTERN(void) jpeg_mem_dest(j_compress_ptr cinfo, unsigned char **outbuffer,
                            unsigned long *outsize);
 EXTERN(void) jpeg_mem_src(j_decompress_ptr cinfo,
                           const unsigned char *inbuffer, unsigned long insize);
-#endif
 
 /* Default parameter setup for compression */
 EXTERN(void) jpeg_set_defaults(j_compress_ptr cinfo);
@@ -942,6 +994,9 @@ EXTERN(void) jpeg_add_quant_table(j_compress_ptr cinfo, int which_tbl,
                                   const unsigned int *basic_table,
                                   int scale_factor, boolean force_baseline);
 EXTERN(int) jpeg_quality_scaling(int quality);
+EXTERN(void) jpeg_enable_lossless(j_compress_ptr cinfo,
+                                  int predictor_selection_value,
+                                  int point_transform);
 EXTERN(void) jpeg_simple_progression(j_compress_ptr cinfo);
 EXTERN(void) jpeg_suppress_tables(j_compress_ptr cinfo, boolean suppress);
 EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table(j_common_ptr cinfo);
@@ -953,6 +1008,12 @@ EXTERN(void) jpeg_start_compress(j_compress_ptr cinfo,
 EXTERN(JDIMENSION) jpeg_write_scanlines(j_compress_ptr cinfo,
                                         JSAMPARRAY scanlines,
                                         JDIMENSION num_lines);
+EXTERN(JDIMENSION) jpeg12_write_scanlines(j_compress_ptr cinfo,
+                                          J12SAMPARRAY scanlines,
+                                          JDIMENSION num_lines);
+EXTERN(JDIMENSION) jpeg16_write_scanlines(j_compress_ptr cinfo,
+                                          J16SAMPARRAY scanlines,
+                                          JDIMENSION num_lines);
 EXTERN(void) jpeg_finish_compress(j_compress_ptr cinfo);
 
 #if JPEG_LIB_VERSION >= 70
@@ -963,6 +1024,9 @@ EXTERN(void) jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo);
 /* Replaces jpeg_write_scanlines when writing raw downsampled data. */
 EXTERN(JDIMENSION) jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
                                        JDIMENSION num_lines);
+EXTERN(JDIMENSION) jpeg12_write_raw_data(j_compress_ptr cinfo,
+                                         J12SAMPIMAGE data,
+                                         JDIMENSION num_lines);
 
 /* Write a special marker.  See libjpeg.txt concerning safe usage. */
 EXTERN(void) jpeg_write_marker(j_compress_ptr cinfo, int marker,
@@ -998,15 +1062,28 @@ EXTERN(boolean) jpeg_start_decompress(j_decompress_ptr cinfo);
 EXTERN(JDIMENSION) jpeg_read_scanlines(j_decompress_ptr cinfo,
                                        JSAMPARRAY scanlines,
                                        JDIMENSION max_lines);
+EXTERN(JDIMENSION) jpeg12_read_scanlines(j_decompress_ptr cinfo,
+                                         J12SAMPARRAY scanlines,
+                                         JDIMENSION max_lines);
+EXTERN(JDIMENSION) jpeg16_read_scanlines(j_decompress_ptr cinfo,
+                                         J16SAMPARRAY scanlines,
+                                         JDIMENSION max_lines);
 EXTERN(JDIMENSION) jpeg_skip_scanlines(j_decompress_ptr cinfo,
                                        JDIMENSION num_lines);
+EXTERN(JDIMENSION) jpeg12_skip_scanlines(j_decompress_ptr cinfo,
+                                         JDIMENSION num_lines);
 EXTERN(void) jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
                                 JDIMENSION *width);
+EXTERN(void) jpeg12_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
+                                  JDIMENSION *width);
 EXTERN(boolean) jpeg_finish_decompress(j_decompress_ptr cinfo);
 
 /* Replaces jpeg_read_scanlines when reading raw downsampled data. */
 EXTERN(JDIMENSION) jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
                                       JDIMENSION max_lines);
+EXTERN(JDIMENSION) jpeg12_read_raw_data(j_decompress_ptr cinfo,
+                                        J12SAMPIMAGE data,
+                                        JDIMENSION max_lines);
 
 /* Additional entry points for buffered-image mode. */
 EXTERN(boolean) jpeg_has_multiple_scans(j_decompress_ptr cinfo);
diff --git a/3rdparty/libjpeg-turbo/src/jquant1.c b/3rdparty/libjpeg-turbo/src/jquant1.c
index 73b83e16e5..2e914b919c 100644
--- a/3rdparty/libjpeg-turbo/src/jquant1.c
+++ b/3rdparty/libjpeg-turbo/src/jquant1.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2015, D. R. Commander.
+ * Copyright (C) 2009, 2015, 2022-2023, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -16,8 +16,9 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsamplecomp.h"
 
-#ifdef QUANT_1PASS_SUPPORTED
+#if defined(QUANT_1PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
 
 
 /*
@@ -66,7 +67,7 @@
  * worse, since the dither may be too much or too little at a given point.
  *
  * The normal calculation would be to form pixel value + dither, range-limit
- * this to 0..MAXJSAMPLE, and then index into the colorindex table as usual.
+ * this to 0.._MAXJSAMPLE, and then index into the colorindex table as usual.
  * We can skip the separate range-limiting step by extending the colorindex
  * table in both directions.
  */
@@ -144,13 +145,13 @@ typedef struct {
   struct jpeg_color_quantizer pub; /* public fields */
 
   /* Initially allocated colormap is saved here */
-  JSAMPARRAY sv_colormap;       /* The color map as a 2-D pixel array */
+  _JSAMPARRAY sv_colormap;      /* The color map as a 2-D pixel array */
   int sv_actual;                /* number of entries in use */
 
-  JSAMPARRAY colorindex;        /* Precomputed mapping for speed */
+  _JSAMPARRAY colorindex;       /* Precomputed mapping for speed */
   /* colorindex[i][j] = index of color closest to pixel value j in component i,
    * premultiplied as described above.  Since colormap indexes must fit into
-   * JSAMPLEs, the entries of this array will too.
+   * _JSAMPLEs, the entries of this array will too.
    */
   boolean is_padded;            /* is the colorindex padded for odither? */
 
@@ -248,24 +249,24 @@ select_ncolors(j_decompress_ptr cinfo, int Ncolors[])
 LOCAL(int)
 output_value(j_decompress_ptr cinfo, int ci, int j, int maxj)
 /* Return j'th output value, where j will range from 0 to maxj */
-/* The output values must fall in 0..MAXJSAMPLE in increasing order */
+/* The output values must fall in 0.._MAXJSAMPLE in increasing order */
 {
-  /* We always provide values 0 and MAXJSAMPLE for each component;
+  /* We always provide values 0 and _MAXJSAMPLE for each component;
    * any additional values are equally spaced between these limits.
    * (Forcing the upper and lower values to the limits ensures that
    * dithering can't produce a color outside the selected gamut.)
    */
-  return (int)(((JLONG)j * MAXJSAMPLE + maxj / 2) / maxj);
+  return (int)(((JLONG)j * _MAXJSAMPLE + maxj / 2) / maxj);
 }
 
 
 LOCAL(int)
 largest_input_value(j_decompress_ptr cinfo, int ci, int j, int maxj)
 /* Return largest input value that should map to j'th output value */
-/* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */
+/* Must have largest(j=0) >= 0, and largest(j=maxj) >= _MAXJSAMPLE */
 {
   /* Breakpoints are halfway between values returned by output_value */
-  return (int)(((JLONG)(2 * j + 1) * MAXJSAMPLE + maxj) / (2 * maxj));
+  return (int)(((JLONG)(2 * j + 1) * _MAXJSAMPLE + maxj) / (2 * maxj));
 }
 
 
@@ -277,7 +278,7 @@ LOCAL(void)
 create_colormap(j_decompress_ptr cinfo)
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
-  JSAMPARRAY colormap;          /* Created colormap */
+  _JSAMPARRAY colormap;         /* Created colormap */
   int total_colors;             /* Number of distinct output colors */
   int i, j, k, nci, blksize, blkdist, ptr, val;
 
@@ -296,7 +297,7 @@ create_colormap(j_decompress_ptr cinfo)
   /* The colors are ordered in the map in standard row-major order, */
   /* i.e. rightmost (highest-indexed) color changes most rapidly. */
 
-  colormap = (*cinfo->mem->alloc_sarray)
+  colormap = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
     ((j_common_ptr)cinfo, JPOOL_IMAGE,
      (JDIMENSION)total_colors, (JDIMENSION)cinfo->out_color_components);
 
@@ -315,7 +316,7 @@ create_colormap(j_decompress_ptr cinfo)
       for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
         /* fill in blksize entries beginning at ptr */
         for (k = 0; k < blksize; k++)
-          colormap[i][ptr + k] = (JSAMPLE)val;
+          colormap[i][ptr + k] = (_JSAMPLE)val;
       }
     }
     blkdist = blksize;          /* blksize of this color is blkdist of next */
@@ -337,25 +338,25 @@ LOCAL(void)
 create_colorindex(j_decompress_ptr cinfo)
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
-  JSAMPROW indexptr;
+  _JSAMPROW indexptr;
   int i, j, k, nci, blksize, val, pad;
 
-  /* For ordered dither, we pad the color index tables by MAXJSAMPLE in
-   * each direction (input index values can be -MAXJSAMPLE .. 2*MAXJSAMPLE).
+  /* For ordered dither, we pad the color index tables by _MAXJSAMPLE in
+   * each direction (input index values can be -_MAXJSAMPLE .. 2*_MAXJSAMPLE).
    * This is not necessary in the other dithering modes.  However, we
    * flag whether it was done in case user changes dithering mode.
    */
   if (cinfo->dither_mode == JDITHER_ORDERED) {
-    pad = MAXJSAMPLE * 2;
+    pad = _MAXJSAMPLE * 2;
     cquantize->is_padded = TRUE;
   } else {
     pad = 0;
     cquantize->is_padded = FALSE;
   }
 
-  cquantize->colorindex = (*cinfo->mem->alloc_sarray)
+  cquantize->colorindex = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
     ((j_common_ptr)cinfo, JPOOL_IMAGE,
-     (JDIMENSION)(MAXJSAMPLE + 1 + pad),
+     (JDIMENSION)(_MAXJSAMPLE + 1 + pad),
      (JDIMENSION)cinfo->out_color_components);
 
   /* blksize is number of adjacent repeated entries for a component */
@@ -368,24 +369,24 @@ create_colorindex(j_decompress_ptr cinfo)
 
     /* adjust colorindex pointers to provide padding at negative indexes. */
     if (pad)
-      cquantize->colorindex[i] += MAXJSAMPLE;
+      cquantize->colorindex[i] += _MAXJSAMPLE;
 
     /* in loop, val = index of current output value, */
     /* and k = largest j that maps to current val */
     indexptr = cquantize->colorindex[i];
     val = 0;
     k = largest_input_value(cinfo, i, 0, nci - 1);
-    for (j = 0; j <= MAXJSAMPLE; j++) {
+    for (j = 0; j <= _MAXJSAMPLE; j++) {
       while (j > k)             /* advance val if past boundary */
         k = largest_input_value(cinfo, i, ++val, nci - 1);
       /* premultiply so that no multiplication needed in main processing */
-      indexptr[j] = (JSAMPLE)(val * blksize);
+      indexptr[j] = (_JSAMPLE)(val * blksize);
     }
     /* Pad at both ends if necessary */
     if (pad)
-      for (j = 1; j <= MAXJSAMPLE; j++) {
+      for (j = 1; j <= _MAXJSAMPLE; j++) {
         indexptr[-j] = indexptr[0];
-        indexptr[MAXJSAMPLE + j] = indexptr[MAXJSAMPLE];
+        indexptr[_MAXJSAMPLE + j] = indexptr[_MAXJSAMPLE];
       }
   }
 }
@@ -406,16 +407,16 @@ make_odither_array(j_decompress_ptr cinfo, int ncolors)
   odither = (ODITHER_MATRIX_PTR)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(ODITHER_MATRIX));
-  /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
+  /* The inter-value distance for this color is _MAXJSAMPLE/(ncolors-1).
    * Hence the dither value for the matrix cell with fill order f
-   * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
+   * (f=0..N-1) should be (N-1-2*f)/(2*N) * _MAXJSAMPLE/(ncolors-1).
    * On 16-bit-int machine, be careful to avoid overflow.
    */
   den = 2 * ODITHER_CELLS * ((JLONG)(ncolors - 1));
   for (j = 0; j < ODITHER_SIZE; j++) {
     for (k = 0; k < ODITHER_SIZE; k++) {
       num = ((JLONG)(ODITHER_CELLS - 1 -
-                     2 * ((int)base_dither_matrix[j][k]))) * MAXJSAMPLE;
+                     2 * ((int)base_dither_matrix[j][k]))) * _MAXJSAMPLE;
       /* Ensure round towards zero despite C's lack of consistency
        * about rounding negative values in integer division...
        */
@@ -460,14 +461,14 @@ create_odither_tables(j_decompress_ptr cinfo)
  */
 
 METHODDEF(void)
-color_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-               JSAMPARRAY output_buf, int num_rows)
+color_quantize(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+               _JSAMPARRAY output_buf, int num_rows)
 /* General case, no dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
-  JSAMPARRAY colorindex = cquantize->colorindex;
+  _JSAMPARRAY colorindex = cquantize->colorindex;
   register int pixcode, ci;
-  register JSAMPROW ptrin, ptrout;
+  register _JSAMPROW ptrin, ptrout;
   int row;
   JDIMENSION col;
   JDIMENSION width = cinfo->output_width;
@@ -481,23 +482,23 @@ color_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
       for (ci = 0; ci < nc; ci++) {
         pixcode += colorindex[ci][*ptrin++];
       }
-      *ptrout++ = (JSAMPLE)pixcode;
+      *ptrout++ = (_JSAMPLE)pixcode;
     }
   }
 }
 
 
 METHODDEF(void)
-color_quantize3(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-                JSAMPARRAY output_buf, int num_rows)
+color_quantize3(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                _JSAMPARRAY output_buf, int num_rows)
 /* Fast path for out_color_components==3, no dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
   register int pixcode;
-  register JSAMPROW ptrin, ptrout;
-  JSAMPROW colorindex0 = cquantize->colorindex[0];
-  JSAMPROW colorindex1 = cquantize->colorindex[1];
-  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  register _JSAMPROW ptrin, ptrout;
+  _JSAMPROW colorindex0 = cquantize->colorindex[0];
+  _JSAMPROW colorindex1 = cquantize->colorindex[1];
+  _JSAMPROW colorindex2 = cquantize->colorindex[2];
   int row;
   JDIMENSION col;
   JDIMENSION width = cinfo->output_width;
@@ -509,21 +510,21 @@ color_quantize3(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
       pixcode  = colorindex0[*ptrin++];
       pixcode += colorindex1[*ptrin++];
       pixcode += colorindex2[*ptrin++];
-      *ptrout++ = (JSAMPLE)pixcode;
+      *ptrout++ = (_JSAMPLE)pixcode;
     }
   }
 }
 
 
 METHODDEF(void)
-quantize_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-                    JSAMPARRAY output_buf, int num_rows)
+quantize_ord_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                    _JSAMPARRAY output_buf, int num_rows)
 /* General case, with ordered dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
-  register JSAMPROW input_ptr;
-  register JSAMPROW output_ptr;
-  JSAMPROW colorindex_ci;
+  register _JSAMPROW input_ptr;
+  register _JSAMPROW output_ptr;
+  _JSAMPROW colorindex_ci;
   int *dither;                  /* points to active row of dither matrix */
   int row_index, col_index;     /* current indexes into dither matrix */
   int nc = cinfo->out_color_components;
@@ -534,7 +535,7 @@ quantize_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
 
   for (row = 0; row < num_rows; row++) {
     /* Initialize output values to 0 so can process components separately */
-    jzero_far((void *)output_buf[row], (size_t)(width * sizeof(JSAMPLE)));
+    jzero_far((void *)output_buf[row], (size_t)(width * sizeof(_JSAMPLE)));
     row_index = cquantize->row_index;
     for (ci = 0; ci < nc; ci++) {
       input_ptr = input_buf[row] + ci;
@@ -544,11 +545,11 @@ quantize_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
       col_index = 0;
 
       for (col = width; col > 0; col--) {
-        /* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
+        /* Form pixel value + dither, range-limit to 0.._MAXJSAMPLE,
          * select output value, accumulate into output code for this pixel.
          * Range-limiting need not be done explicitly, as we have extended
          * the colorindex table to produce the right answers for out-of-range
-         * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
+         * inputs.  The maximum dither is +- _MAXJSAMPLE; this sets the
          * required amount of padding.
          */
         *output_ptr +=
@@ -566,17 +567,17 @@ quantize_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
 
 
 METHODDEF(void)
-quantize3_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-                     JSAMPARRAY output_buf, int num_rows)
+quantize3_ord_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                     _JSAMPARRAY output_buf, int num_rows)
 /* Fast path for out_color_components==3, with ordered dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
   register int pixcode;
-  register JSAMPROW input_ptr;
-  register JSAMPROW output_ptr;
-  JSAMPROW colorindex0 = cquantize->colorindex[0];
-  JSAMPROW colorindex1 = cquantize->colorindex[1];
-  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  register _JSAMPROW input_ptr;
+  register _JSAMPROW output_ptr;
+  _JSAMPROW colorindex0 = cquantize->colorindex[0];
+  _JSAMPROW colorindex1 = cquantize->colorindex[1];
+  _JSAMPROW colorindex2 = cquantize->colorindex[2];
   int *dither0;                 /* points to active row of dither matrix */
   int *dither1;
   int *dither2;
@@ -598,7 +599,7 @@ quantize3_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
       pixcode  = colorindex0[(*input_ptr++) + dither0[col_index]];
       pixcode += colorindex1[(*input_ptr++) + dither1[col_index]];
       pixcode += colorindex2[(*input_ptr++) + dither2[col_index]];
-      *output_ptr++ = (JSAMPLE)pixcode;
+      *output_ptr++ = (_JSAMPLE)pixcode;
       col_index = (col_index + 1) & ODITHER_MASK;
     }
     row_index = (row_index + 1) & ODITHER_MASK;
@@ -608,8 +609,8 @@ quantize3_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
 
 
 METHODDEF(void)
-quantize_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-                   JSAMPARRAY output_buf, int num_rows)
+quantize_fs_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                   _JSAMPARRAY output_buf, int num_rows)
 /* General case, with Floyd-Steinberg dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
@@ -619,10 +620,10 @@ quantize_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
   LOCFSERROR bnexterr;          /* error for below/next col */
   LOCFSERROR delta;
   register FSERRPTR errorptr;   /* => fserrors[] at column before current */
-  register JSAMPROW input_ptr;
-  register JSAMPROW output_ptr;
-  JSAMPROW colorindex_ci;
-  JSAMPROW colormap_ci;
+  register _JSAMPROW input_ptr;
+  register _JSAMPROW output_ptr;
+  _JSAMPROW colorindex_ci;
+  _JSAMPROW colormap_ci;
   int pixcode;
   int nc = cinfo->out_color_components;
   int dir;                      /* 1 for left-to-right, -1 for right-to-left */
@@ -631,12 +632,12 @@ quantize_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
   int row;
   JDIMENSION col;
   JDIMENSION width = cinfo->output_width;
-  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   SHIFT_TEMPS
 
   for (row = 0; row < num_rows; row++) {
     /* Initialize output values to 0 so can process components separately */
-    jzero_far((void *)output_buf[row], (size_t)(width * sizeof(JSAMPLE)));
+    jzero_far((void *)output_buf[row], (size_t)(width * sizeof(_JSAMPLE)));
     for (ci = 0; ci < nc; ci++) {
       input_ptr = input_buf[row] + ci;
       output_ptr = output_buf[row];
@@ -670,15 +671,15 @@ quantize_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
          * Note: errorptr points to *previous* column's array entry.
          */
         cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
-        /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
-         * The maximum error is +- MAXJSAMPLE; this sets the required size
+        /* Form pixel value + error, and range-limit to 0.._MAXJSAMPLE.
+         * The maximum error is +- _MAXJSAMPLE; this sets the required size
          * of the range_limit array.
          */
         cur += *input_ptr;
         cur = range_limit[cur];
         /* Select output value, accumulate into output code for this pixel */
         pixcode = colorindex_ci[cur];
-        *output_ptr += (JSAMPLE)pixcode;
+        *output_ptr += (_JSAMPLE)pixcode;
         /* Compute actual representation error at this pixel */
         /* Note: we can do this even though we don't have the final */
         /* pixel code, because the colormap is orthogonal. */
@@ -745,22 +746,22 @@ start_pass_1_quant(j_decompress_ptr cinfo, boolean is_pre_scan)
   int i;
 
   /* Install my colormap. */
-  cinfo->colormap = cquantize->sv_colormap;
+  cinfo->colormap = (JSAMPARRAY)cquantize->sv_colormap;
   cinfo->actual_number_of_colors = cquantize->sv_actual;
 
   /* Initialize for desired dithering mode. */
   switch (cinfo->dither_mode) {
   case JDITHER_NONE:
     if (cinfo->out_color_components == 3)
-      cquantize->pub.color_quantize = color_quantize3;
+      cquantize->pub._color_quantize = color_quantize3;
     else
-      cquantize->pub.color_quantize = color_quantize;
+      cquantize->pub._color_quantize = color_quantize;
     break;
   case JDITHER_ORDERED:
     if (cinfo->out_color_components == 3)
-      cquantize->pub.color_quantize = quantize3_ord_dither;
+      cquantize->pub._color_quantize = quantize3_ord_dither;
     else
-      cquantize->pub.color_quantize = quantize_ord_dither;
+      cquantize->pub._color_quantize = quantize_ord_dither;
     cquantize->row_index = 0;   /* initialize state for ordered dither */
     /* If user changed to ordered dither from another mode,
      * we must recreate the color index table with padding.
@@ -773,7 +774,7 @@ start_pass_1_quant(j_decompress_ptr cinfo, boolean is_pre_scan)
       create_odither_tables(cinfo);
     break;
   case JDITHER_FS:
-    cquantize->pub.color_quantize = quantize_fs_dither;
+    cquantize->pub._color_quantize = quantize_fs_dither;
     cquantize->on_odd_row = FALSE; /* initialize state for F-S dither */
     /* Allocate Floyd-Steinberg workspace if didn't already. */
     if (cquantize->fserrors[0] == NULL)
@@ -818,10 +819,17 @@ new_color_map_1_quant(j_decompress_ptr cinfo)
  */
 
 GLOBAL(void)
-jinit_1pass_quantizer(j_decompress_ptr cinfo)
+_jinit_1pass_quantizer(j_decompress_ptr cinfo)
 {
   my_cquantize_ptr cquantize;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Color quantization is not supported with lossless JPEG images */
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
   cquantize = (my_cquantize_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(my_cquantizer));
@@ -835,9 +843,9 @@ jinit_1pass_quantizer(j_decompress_ptr cinfo)
   /* Make sure my internal arrays won't overflow */
   if (cinfo->out_color_components > MAX_Q_COMPS)
     ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS);
-  /* Make sure colormap indexes can be represented by JSAMPLEs */
-  if (cinfo->desired_number_of_colors > (MAXJSAMPLE + 1))
-    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE + 1);
+  /* Make sure colormap indexes can be represented by _JSAMPLEs */
+  if (cinfo->desired_number_of_colors > (_MAXJSAMPLE + 1))
+    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, _MAXJSAMPLE + 1);
 
   /* Create the colormap and color index table. */
   create_colormap(cinfo);
@@ -853,4 +861,4 @@ jinit_1pass_quantizer(j_decompress_ptr cinfo)
     alloc_fs_workspace(cinfo);
 }
 
-#endif /* QUANT_1PASS_SUPPORTED */
+#endif /* defined(QUANT_1PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16 */
diff --git a/3rdparty/libjpeg-turbo/src/jquant2.c b/3rdparty/libjpeg-turbo/src/jquant2.c
index 44efb18cad..9ba51fa887 100644
--- a/3rdparty/libjpeg-turbo/src/jquant2.c
+++ b/3rdparty/libjpeg-turbo/src/jquant2.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2014-2015, 2020, D. R. Commander.
+ * Copyright (C) 2009, 2014-2015, 2020, 2022-2023, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -23,8 +23,9 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsamplecomp.h"
 
-#ifdef QUANT_2PASS_SUPPORTED
+#if defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
 
 
 /*
@@ -106,7 +107,7 @@ static const int c_scales[3] = { R_SCALE, G_SCALE, B_SCALE };
  * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.
  */
 
-#define MAXNUMCOLORS  (MAXJSAMPLE + 1) /* maximum size of colormap */
+#define MAXNUMCOLORS  (_MAXJSAMPLE + 1) /* maximum size of colormap */
 
 /* These will do the right thing for either R,G,B or B,G,R color order,
  * but you may not like the results for other color orders.
@@ -173,7 +174,7 @@ typedef struct {
   struct jpeg_color_quantizer pub; /* public fields */
 
   /* Space for the eventually created colormap is stashed here */
-  JSAMPARRAY sv_colormap;       /* colormap allocated at init time */
+  _JSAMPARRAY sv_colormap;      /* colormap allocated at init time */
   int desired;                  /* desired # of colors = size of colormap */
 
   /* Variables for accumulating image statistics */
@@ -200,11 +201,11 @@ typedef my_cquantizer *my_cquantize_ptr;
  */
 
 METHODDEF(void)
-prescan_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-                 JSAMPARRAY output_buf, int num_rows)
+prescan_quantize(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                 _JSAMPARRAY output_buf, int num_rows)
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
-  register JSAMPROW ptr;
+  register _JSAMPROW ptr;
   register histptr histp;
   register hist3d histogram = cquantize->histogram;
   int row;
@@ -377,7 +378,7 @@ have_c2max:
    * against making long narrow boxes, and it has the side benefit that
    * a box is splittable iff norm > 0.
    * Since the differences are expressed in histogram-cell units,
-   * we have to shift back to JSAMPLE units to get consistent distances;
+   * we have to shift back to _JSAMPLE units to get consistent distances;
    * after which, we scale according to the selected distance scale factors.
    */
   dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
@@ -508,9 +509,12 @@ compute_color(j_decompress_ptr cinfo, boxptr boxp, int icolor)
       }
     }
 
-  cinfo->colormap[0][icolor] = (JSAMPLE)((c0total + (total >> 1)) / total);
-  cinfo->colormap[1][icolor] = (JSAMPLE)((c1total + (total >> 1)) / total);
-  cinfo->colormap[2][icolor] = (JSAMPLE)((c2total + (total >> 1)) / total);
+  ((_JSAMPARRAY)cinfo->colormap)[0][icolor] =
+    (_JSAMPLE)((c0total + (total >> 1)) / total);
+  ((_JSAMPARRAY)cinfo->colormap)[1][icolor] =
+    (_JSAMPLE)((c1total + (total >> 1)) / total);
+  ((_JSAMPARRAY)cinfo->colormap)[2][icolor] =
+    (_JSAMPLE)((c2total + (total >> 1)) / total);
 }
 
 
@@ -528,11 +532,11 @@ select_colors(j_decompress_ptr cinfo, int desired_colors)
   /* Initialize one box containing whole space */
   numboxes = 1;
   boxlist[0].c0min = 0;
-  boxlist[0].c0max = MAXJSAMPLE >> C0_SHIFT;
+  boxlist[0].c0max = _MAXJSAMPLE >> C0_SHIFT;
   boxlist[0].c1min = 0;
-  boxlist[0].c1max = MAXJSAMPLE >> C1_SHIFT;
+  boxlist[0].c1max = _MAXJSAMPLE >> C1_SHIFT;
   boxlist[0].c2min = 0;
-  boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT;
+  boxlist[0].c2max = _MAXJSAMPLE >> C2_SHIFT;
   /* Shrink it to actually-used volume and set its statistics */
   update_box(cinfo, &boxlist[0]);
   /* Perform median-cut to produce final box list */
@@ -623,7 +627,7 @@ select_colors(j_decompress_ptr cinfo, int desired_colors)
 
 LOCAL(int)
 find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
-                   JSAMPLE colorlist[])
+                   _JSAMPLE colorlist[])
 /* Locate the colormap entries close enough to an update box to be candidates
  * for the nearest entry to some cell(s) in the update box.  The update box
  * is specified by the center coordinates of its first cell.  The number of
@@ -665,7 +669,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
 
   for (i = 0; i < numcolors; i++) {
     /* We compute the squared-c0-distance term, then add in the other two. */
-    x = cinfo->colormap[0][i];
+    x = ((_JSAMPARRAY)cinfo->colormap)[0][i];
     if (x < minc0) {
       tdist = (x - minc0) * C0_SCALE;
       min_dist = tdist * tdist;
@@ -688,7 +692,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
       }
     }
 
-    x = cinfo->colormap[1][i];
+    x = ((_JSAMPARRAY)cinfo->colormap)[1][i];
     if (x < minc1) {
       tdist = (x - minc1) * C1_SCALE;
       min_dist += tdist * tdist;
@@ -710,7 +714,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
       }
     }
 
-    x = cinfo->colormap[2][i];
+    x = ((_JSAMPARRAY)cinfo->colormap)[2][i];
     if (x < minc2) {
       tdist = (x - minc2) * C2_SCALE;
       min_dist += tdist * tdist;
@@ -744,7 +748,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
   ncolors = 0;
   for (i = 0; i < numcolors; i++) {
     if (mindist[i] <= minmaxdist)
-      colorlist[ncolors++] = (JSAMPLE)i;
+      colorlist[ncolors++] = (_JSAMPLE)i;
   }
   return ncolors;
 }
@@ -752,7 +756,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
 
 LOCAL(void)
 find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
-                 int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
+                 int numcolors, _JSAMPLE colorlist[], _JSAMPLE bestcolor[])
 /* Find the closest colormap entry for each cell in the update box,
  * given the list of candidate colors prepared by find_nearby_colors.
  * Return the indexes of the closest entries in the bestcolor[] array.
@@ -763,7 +767,7 @@ find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
   int ic0, ic1, ic2;
   int i, icolor;
   register JLONG *bptr;         /* pointer into bestdist[] array */
-  JSAMPLE *cptr;                /* pointer into bestcolor[] array */
+  _JSAMPLE *cptr;               /* pointer into bestcolor[] array */
   JLONG dist0, dist1;           /* initial distance values */
   register JLONG dist2;         /* current distance in inner loop */
   JLONG xx0, xx1;               /* distance increments */
@@ -790,11 +794,11 @@ find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
   for (i = 0; i < numcolors; i++) {
     icolor = colorlist[i];
     /* Compute (square of) distance from minc0/c1/c2 to this color */
-    inc0 = (minc0 - cinfo->colormap[0][icolor]) * C0_SCALE;
+    inc0 = (minc0 - ((_JSAMPARRAY)cinfo->colormap)[0][icolor]) * C0_SCALE;
     dist0 = inc0 * inc0;
-    inc1 = (minc1 - cinfo->colormap[1][icolor]) * C1_SCALE;
+    inc1 = (minc1 - ((_JSAMPARRAY)cinfo->colormap)[1][icolor]) * C1_SCALE;
     dist0 += inc1 * inc1;
-    inc2 = (minc2 - cinfo->colormap[2][icolor]) * C2_SCALE;
+    inc2 = (minc2 - ((_JSAMPARRAY)cinfo->colormap)[2][icolor]) * C2_SCALE;
     dist0 += inc2 * inc2;
     /* Form the initial difference increments */
     inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
@@ -813,7 +817,7 @@ find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
         for (ic2 = BOX_C2_ELEMS - 1; ic2 >= 0; ic2--) {
           if (dist2 < *bptr) {
             *bptr = dist2;
-            *cptr = (JSAMPLE)icolor;
+            *cptr = (_JSAMPLE)icolor;
           }
           dist2 += xx2;
           xx2 += 2 * STEP_C2 * STEP_C2;
@@ -840,13 +844,13 @@ fill_inverse_cmap(j_decompress_ptr cinfo, int c0, int c1, int c2)
   hist3d histogram = cquantize->histogram;
   int minc0, minc1, minc2;      /* lower left corner of update box */
   int ic0, ic1, ic2;
-  register JSAMPLE *cptr;       /* pointer into bestcolor[] array */
+  register _JSAMPLE *cptr;      /* pointer into bestcolor[] array */
   register histptr cachep;      /* pointer into main cache array */
   /* This array lists the candidate colormap indexes. */
-  JSAMPLE colorlist[MAXNUMCOLORS];
+  _JSAMPLE colorlist[MAXNUMCOLORS];
   int numcolors;                /* number of candidate colors */
   /* This array holds the actually closest colormap index for each cell. */
-  JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+  _JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
 
   /* Convert cell coordinates to update box ID */
   c0 >>= BOX_C0_LOG;
@@ -891,13 +895,13 @@ fill_inverse_cmap(j_decompress_ptr cinfo, int c0, int c1, int c2)
  */
 
 METHODDEF(void)
-pass2_no_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-                JSAMPARRAY output_buf, int num_rows)
+pass2_no_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                _JSAMPARRAY output_buf, int num_rows)
 /* This version performs no dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
   hist3d histogram = cquantize->histogram;
-  register JSAMPROW inptr, outptr;
+  register _JSAMPROW inptr, outptr;
   register histptr cachep;
   register int c0, c1, c2;
   int row;
@@ -918,15 +922,15 @@ pass2_no_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
       if (*cachep == 0)
         fill_inverse_cmap(cinfo, c0, c1, c2);
       /* Now emit the colormap index for this cell */
-      *outptr++ = (JSAMPLE)(*cachep - 1);
+      *outptr++ = (_JSAMPLE)(*cachep - 1);
     }
   }
 }
 
 
 METHODDEF(void)
-pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-                JSAMPARRAY output_buf, int num_rows)
+pass2_fs_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                _JSAMPARRAY output_buf, int num_rows)
 /* This version performs Floyd-Steinberg dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
@@ -935,19 +939,19 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
   LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
   LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
   register FSERRPTR errorptr;   /* => fserrors[] at column before current */
-  JSAMPROW inptr;               /* => current input pixel */
-  JSAMPROW outptr;              /* => current output pixel */
+  _JSAMPROW inptr;              /* => current input pixel */
+  _JSAMPROW outptr;             /* => current output pixel */
   histptr cachep;
   int dir;                      /* +1 or -1 depending on direction */
   int dir3;                     /* 3*dir, for advancing inptr & errorptr */
   int row;
   JDIMENSION col;
   JDIMENSION width = cinfo->output_width;
-  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
   int *error_limit = cquantize->error_limiter;
-  JSAMPROW colormap0 = cinfo->colormap[0];
-  JSAMPROW colormap1 = cinfo->colormap[1];
-  JSAMPROW colormap2 = cinfo->colormap[2];
+  _JSAMPROW colormap0 = ((_JSAMPARRAY)cinfo->colormap)[0];
+  _JSAMPROW colormap1 = ((_JSAMPARRAY)cinfo->colormap)[1];
+  _JSAMPROW colormap2 = ((_JSAMPARRAY)cinfo->colormap)[2];
   SHIFT_TEMPS
 
   for (row = 0; row < num_rows; row++) {
@@ -992,8 +996,8 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
       cur0 = error_limit[cur0];
       cur1 = error_limit[cur1];
       cur2 = error_limit[cur2];
-      /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
-       * The maximum error is +- MAXJSAMPLE (or less with error limiting);
+      /* Form pixel value + error, and range-limit to 0.._MAXJSAMPLE.
+       * The maximum error is +- _MAXJSAMPLE (or less with error limiting);
        * this sets the required size of the range_limit array.
        */
       cur0 += inptr[0];
@@ -1013,7 +1017,7 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
       /* Now emit the colormap index for this cell */
       {
         register int pixcode = *cachep - 1;
-        *outptr = (JSAMPLE)pixcode;
+        *outptr = (_JSAMPLE)pixcode;
         /* Compute representation error for this pixel */
         cur0 -= colormap0[pixcode];
         cur1 -= colormap1[pixcode];
@@ -1064,7 +1068,7 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
 /*
  * Initialize the error-limiting transfer function (lookup table).
  * The raw F-S error computation can potentially compute error values of up to
- * +- MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
+ * +- _MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
  * much less, otherwise obviously wrong pixels will be created.  (Typical
  * effects include weird fringes at color-area boundaries, isolated bright
  * pixels in a dark area, etc.)  The standard advice for avoiding this problem
@@ -1073,7 +1077,7 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
  * error buildup.  However, that only prevents the error from getting
  * completely out of hand; Aaron Giles reports that error limiting improves
  * the results even with corner colors allocated.
- * A simple clamping of the error values to about +- MAXJSAMPLE/8 works pretty
+ * A simple clamping of the error values to about +- _MAXJSAMPLE/8 works pretty
  * well, but the smoother transfer function used below is even better.  Thanks
  * to Aaron Giles for this idea.
  */
@@ -1087,22 +1091,22 @@ init_error_limit(j_decompress_ptr cinfo)
   int in, out;
 
   table = (int *)(*cinfo->mem->alloc_small)
-    ((j_common_ptr)cinfo, JPOOL_IMAGE, (MAXJSAMPLE * 2 + 1) * sizeof(int));
-  table += MAXJSAMPLE;          /* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
+    ((j_common_ptr)cinfo, JPOOL_IMAGE, (_MAXJSAMPLE * 2 + 1) * sizeof(int));
+  table += _MAXJSAMPLE;         /* so can index -_MAXJSAMPLE .. +_MAXJSAMPLE */
   cquantize->error_limiter = table;
 
-#define STEPSIZE  ((MAXJSAMPLE + 1) / 16)
-  /* Map errors 1:1 up to +- MAXJSAMPLE/16 */
+#define STEPSIZE  ((_MAXJSAMPLE + 1) / 16)
+  /* Map errors 1:1 up to +- _MAXJSAMPLE/16 */
   out = 0;
   for (in = 0; in < STEPSIZE; in++, out++) {
     table[in] = out;  table[-in] = -out;
   }
-  /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */
+  /* Map errors 1:2 up to +- 3*_MAXJSAMPLE/16 */
   for (; in < STEPSIZE * 3; in++, out += (in & 1) ? 0 : 1) {
     table[in] = out;  table[-in] = -out;
   }
-  /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */
-  for (; in <= MAXJSAMPLE; in++) {
+  /* Clamp the rest to final out value (which is (_MAXJSAMPLE+1)/8) */
+  for (; in <= _MAXJSAMPLE; in++) {
     table[in] = out;  table[-in] = -out;
   }
 #undef STEPSIZE
@@ -1119,7 +1123,7 @@ finish_pass1(j_decompress_ptr cinfo)
   my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
 
   /* Select the representative colors and fill in cinfo->colormap */
-  cinfo->colormap = cquantize->sv_colormap;
+  cinfo->colormap = (JSAMPARRAY)cquantize->sv_colormap;
   select_colors(cinfo, cquantize->desired);
   /* Force next pass to zero the color index table */
   cquantize->needs_zeroed = TRUE;
@@ -1151,15 +1155,15 @@ start_pass_2_quant(j_decompress_ptr cinfo, boolean is_pre_scan)
 
   if (is_pre_scan) {
     /* Set up method pointers */
-    cquantize->pub.color_quantize = prescan_quantize;
+    cquantize->pub._color_quantize = prescan_quantize;
     cquantize->pub.finish_pass = finish_pass1;
     cquantize->needs_zeroed = TRUE; /* Always zero histogram */
   } else {
     /* Set up method pointers */
     if (cinfo->dither_mode == JDITHER_FS)
-      cquantize->pub.color_quantize = pass2_fs_dither;
+      cquantize->pub._color_quantize = pass2_fs_dither;
     else
-      cquantize->pub.color_quantize = pass2_no_dither;
+      cquantize->pub._color_quantize = pass2_no_dither;
     cquantize->pub.finish_pass = finish_pass2;
 
     /* Make sure color count is acceptable */
@@ -1215,11 +1219,14 @@ new_color_map_2_quant(j_decompress_ptr cinfo)
  */
 
 GLOBAL(void)
-jinit_2pass_quantizer(j_decompress_ptr cinfo)
+_jinit_2pass_quantizer(j_decompress_ptr cinfo)
 {
   my_cquantize_ptr cquantize;
   int i;
 
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   cquantize = (my_cquantize_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                 sizeof(my_cquantizer));
@@ -1230,7 +1237,8 @@ jinit_2pass_quantizer(j_decompress_ptr cinfo)
   cquantize->error_limiter = NULL;
 
   /* Make sure jdmaster didn't give me a case I can't handle */
-  if (cinfo->out_color_components != 3)
+  if (cinfo->out_color_components != 3 ||
+      cinfo->out_color_space == JCS_RGB565 || cinfo->master->lossless)
     ERREXIT(cinfo, JERR_NOTIMPL);
 
   /* Allocate the histogram/inverse colormap storage */
@@ -1253,10 +1261,10 @@ jinit_2pass_quantizer(j_decompress_ptr cinfo)
     /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
     if (desired < 8)
       ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
-    /* Make sure colormap indexes can be represented by JSAMPLEs */
+    /* Make sure colormap indexes can be represented by _JSAMPLEs */
     if (desired > MAXNUMCOLORS)
       ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
-    cquantize->sv_colormap = (*cinfo->mem->alloc_sarray)
+    cquantize->sv_colormap = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
       ((j_common_ptr)cinfo, JPOOL_IMAGE, (JDIMENSION)desired, (JDIMENSION)3);
     cquantize->desired = desired;
   } else
@@ -1282,4 +1290,4 @@ jinit_2pass_quantizer(j_decompress_ptr cinfo)
   }
 }
 
-#endif /* QUANT_2PASS_SUPPORTED */
+#endif /* defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16 */
diff --git a/3rdparty/libjpeg-turbo/src/jsamplecomp.h b/3rdparty/libjpeg-turbo/src/jsamplecomp.h
new file mode 100644
index 0000000000..f3f275e6e2
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/jsamplecomp.h
@@ -0,0 +1,336 @@
+/*
+ * jsamplecomp.h
+ *
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+/* In source files that must be compiled for multiple data precisions, we
+ * prefix all precision-dependent data types, macros, methods, fields, and
+ * function names with an underscore.  Including this file replaces those
+ * precision-independent tokens with their precision-dependent equivalents,
+ * based on the value of BITS_IN_JSAMPLE.
+ */
+
+#ifndef JSAMPLECOMP_H
+#define JSAMPLECOMP_H
+
+#if BITS_IN_JSAMPLE == 16
+
+/* Sample data types and macros (jmorecfg.h) */
+#define _JSAMPLE  J16SAMPLE
+
+#define _MAXJSAMPLE  MAXJ16SAMPLE
+#define _CENTERJSAMPLE   CENTERJ16SAMPLE
+
+#define _JSAMPROW  J16SAMPROW
+#define _JSAMPARRAY  J16SAMPARRAY
+#define _JSAMPIMAGE  J16SAMPIMAGE
+
+/* External functions (jpeglib.h) */
+#define _jpeg_write_scanlines  jpeg16_write_scanlines
+#define _jpeg_read_scanlines  jpeg16_read_scanlines
+
+/* Internal methods (jpegint.h) */
+
+#ifdef C_LOSSLESS_SUPPORTED
+/* Use the 16-bit method in the jpeg_c_main_controller structure. */
+#define _process_data  process_data_16
+/* Use the 16-bit method in the jpeg_c_prep_controller structure. */
+#define _pre_process_data  pre_process_data_16
+/* Use the 16-bit method in the jpeg_c_coef_controller structure. */
+#define _compress_data  compress_data_16
+/* Use the 16-bit method in the jpeg_color_converter structure. */
+#define _color_convert  color_convert_16
+/* Use the 16-bit method in the jpeg_downsampler structure. */
+#define _downsample  downsample_16
+#endif
+#ifdef D_LOSSLESS_SUPPORTED
+/* Use the 16-bit method in the jpeg_d_main_controller structure. */
+#define _process_data  process_data_16
+/* Use the 16-bit method in the jpeg_d_coef_controller structure. */
+#define _decompress_data  decompress_data_16
+/* Use the 16-bit method in the jpeg_d_post_controller structure. */
+#define _post_process_data  post_process_data_16
+/* Use the 16-bit method in the jpeg_upsampler structure. */
+#define _upsample  upsample_16
+/* Use the 16-bit method in the jpeg_color_converter structure. */
+#define _color_convert  color_convert_16
+#endif
+
+/* Global internal functions (jpegint.h) */
+#ifdef C_LOSSLESS_SUPPORTED
+#define _jinit_c_main_controller  j16init_c_main_controller
+#define _jinit_c_prep_controller  j16init_c_prep_controller
+#define _jinit_color_converter  j16init_color_converter
+#define _jinit_downsampler  j16init_downsampler
+#define _jinit_c_diff_controller  j16init_c_diff_controller
+#define _jinit_lossless_compressor  j16init_lossless_compressor
+#endif
+
+#ifdef D_LOSSLESS_SUPPORTED
+#define _jinit_d_main_controller  j16init_d_main_controller
+#define _jinit_d_post_controller  j16init_d_post_controller
+#define _jinit_upsampler  j16init_upsampler
+#define _jinit_color_deconverter  j16init_color_deconverter
+#define _jinit_merged_upsampler  j16init_merged_upsampler
+#define _jinit_d_diff_controller  j16init_d_diff_controller
+#define _jinit_lossless_decompressor  j16init_lossless_decompressor
+#endif
+
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+#define _jcopy_sample_rows  j16copy_sample_rows
+#endif
+
+/* Internal fields (cdjpeg.h) */
+
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+/* Use the 16-bit buffer in the cjpeg_source_struct and djpeg_dest_struct
+   structures. */
+#define _buffer  buffer16
+#endif
+
+/* Image I/O functions (cdjpeg.h) */
+#ifdef C_LOSSLESS_SUPPORTED
+#define _jinit_read_gif  j16init_read_gif
+#define _jinit_read_ppm  j16init_read_ppm
+#endif
+
+#ifdef D_LOSSLESS_SUPPORTED
+#define _jinit_write_ppm  j16init_write_ppm
+#endif
+
+#elif BITS_IN_JSAMPLE == 12
+
+/* Sample data types and macros (jmorecfg.h) */
+#define _JSAMPLE  J12SAMPLE
+
+#define _MAXJSAMPLE  MAXJ12SAMPLE
+#define _CENTERJSAMPLE   CENTERJ12SAMPLE
+
+#define _JSAMPROW  J12SAMPROW
+#define _JSAMPARRAY  J12SAMPARRAY
+#define _JSAMPIMAGE  J12SAMPIMAGE
+
+/* External functions (jpeglib.h) */
+#define _jpeg_write_scanlines  jpeg12_write_scanlines
+#define _jpeg_write_raw_data  jpeg12_write_raw_data
+#define _jpeg_read_scanlines  jpeg12_read_scanlines
+#define _jpeg_skip_scanlines  jpeg12_skip_scanlines
+#define _jpeg_crop_scanline  jpeg12_crop_scanline
+#define _jpeg_read_raw_data  jpeg12_read_raw_data
+
+/* Internal methods (jpegint.h) */
+
+/* Use the 12-bit method in the jpeg_c_main_controller structure. */
+#define _process_data  process_data_12
+/* Use the 12-bit method in the jpeg_c_prep_controller structure. */
+#define _pre_process_data  pre_process_data_12
+/* Use the 12-bit method in the jpeg_c_coef_controller structure. */
+#define _compress_data  compress_data_12
+/* Use the 12-bit method in the jpeg_color_converter structure. */
+#define _color_convert  color_convert_12
+/* Use the 12-bit method in the jpeg_downsampler structure. */
+#define _downsample  downsample_12
+/* Use the 12-bit method in the jpeg_forward_dct structure. */
+#define _forward_DCT  forward_DCT_12
+/* Use the 12-bit method in the jpeg_d_main_controller structure. */
+#define _process_data  process_data_12
+/* Use the 12-bit method in the jpeg_d_coef_controller structure. */
+#define _decompress_data  decompress_data_12
+/* Use the 12-bit method in the jpeg_d_post_controller structure. */
+#define _post_process_data  post_process_data_12
+/* Use the 12-bit method in the jpeg_inverse_dct structure. */
+#define _inverse_DCT_method_ptr  inverse_DCT_12_method_ptr
+#define _inverse_DCT  inverse_DCT_12
+/* Use the 12-bit method in the jpeg_upsampler structure. */
+#define _upsample  upsample_12
+/* Use the 12-bit method in the jpeg_color_converter structure. */
+#define _color_convert  color_convert_12
+/* Use the 12-bit method in the jpeg_color_quantizer structure. */
+#define _color_quantize  color_quantize_12
+
+/* Global internal functions (jpegint.h) */
+#define _jinit_c_main_controller  j12init_c_main_controller
+#define _jinit_c_prep_controller  j12init_c_prep_controller
+#define _jinit_c_coef_controller  j12init_c_coef_controller
+#define _jinit_color_converter  j12init_color_converter
+#define _jinit_downsampler  j12init_downsampler
+#define _jinit_forward_dct  j12init_forward_dct
+#ifdef C_LOSSLESS_SUPPORTED
+#define _jinit_c_diff_controller  j12init_c_diff_controller
+#define _jinit_lossless_compressor  j12init_lossless_compressor
+#endif
+
+#define _jinit_d_main_controller  j12init_d_main_controller
+#define _jinit_d_coef_controller  j12init_d_coef_controller
+#define _jinit_d_post_controller  j12init_d_post_controller
+#define _jinit_inverse_dct  j12init_inverse_dct
+#define _jinit_upsampler  j12init_upsampler
+#define _jinit_color_deconverter  j12init_color_deconverter
+#define _jinit_1pass_quantizer  j12init_1pass_quantizer
+#define _jinit_2pass_quantizer  j12init_2pass_quantizer
+#define _jinit_merged_upsampler  j12init_merged_upsampler
+#ifdef D_LOSSLESS_SUPPORTED
+#define _jinit_d_diff_controller  j12init_d_diff_controller
+#define _jinit_lossless_decompressor  j12init_lossless_decompressor
+#endif
+
+#define _jcopy_sample_rows  j12copy_sample_rows
+
+/* Global internal functions (jdct.h) */
+#define _jpeg_fdct_islow  jpeg12_fdct_islow
+#define _jpeg_fdct_ifast  jpeg12_fdct_ifast
+
+#define _jpeg_idct_islow  jpeg12_idct_islow
+#define _jpeg_idct_ifast  jpeg12_idct_ifast
+#define _jpeg_idct_float  jpeg12_idct_float
+#define _jpeg_idct_7x7  jpeg12_idct_7x7
+#define _jpeg_idct_6x6  jpeg12_idct_6x6
+#define _jpeg_idct_5x5  jpeg12_idct_5x5
+#define _jpeg_idct_4x4  jpeg12_idct_4x4
+#define _jpeg_idct_3x3  jpeg12_idct_3x3
+#define _jpeg_idct_2x2  jpeg12_idct_2x2
+#define _jpeg_idct_1x1  jpeg12_idct_1x1
+#define _jpeg_idct_9x9  jpeg12_idct_9x9
+#define _jpeg_idct_10x10  jpeg12_idct_10x10
+#define _jpeg_idct_11x11  jpeg12_idct_11x11
+#define _jpeg_idct_12x12  jpeg12_idct_12x12
+#define _jpeg_idct_13x13  jpeg12_idct_13x13
+#define _jpeg_idct_14x14  jpeg12_idct_14x14
+#define _jpeg_idct_15x15  jpeg12_idct_15x15
+#define _jpeg_idct_16x16  jpeg12_idct_16x16
+
+/* Internal fields (cdjpeg.h) */
+
+/* Use the 12-bit buffer in the cjpeg_source_struct and djpeg_dest_struct
+   structures. */
+#define _buffer  buffer12
+
+/* Image I/O functions (cdjpeg.h) */
+#define _jinit_read_gif  j12init_read_gif
+#define _jinit_write_gif  j12init_write_gif
+#define _jinit_read_ppm  j12init_read_ppm
+#define _jinit_write_ppm  j12init_write_ppm
+
+#define _read_color_map  read_color_map_12
+
+#else /* BITS_IN_JSAMPLE */
+
+/* Sample data types and macros (jmorecfg.h) */
+#define _JSAMPLE  JSAMPLE
+
+#define _MAXJSAMPLE  MAXJSAMPLE
+#define _CENTERJSAMPLE   CENTERJSAMPLE
+
+#define _JSAMPROW  JSAMPROW
+#define _JSAMPARRAY  JSAMPARRAY
+#define _JSAMPIMAGE  JSAMPIMAGE
+
+/* External functions (jpeglib.h) */
+#define _jpeg_write_scanlines  jpeg_write_scanlines
+#define _jpeg_write_raw_data  jpeg_write_raw_data
+#define _jpeg_read_scanlines  jpeg_read_scanlines
+#define _jpeg_skip_scanlines  jpeg_skip_scanlines
+#define _jpeg_crop_scanline  jpeg_crop_scanline
+#define _jpeg_read_raw_data  jpeg_read_raw_data
+
+/* Internal methods (jpegint.h) */
+
+/* Use the 8-bit method in the jpeg_c_main_controller structure. */
+#define _process_data  process_data
+/* Use the 8-bit method in the jpeg_c_prep_controller structure. */
+#define _pre_process_data  pre_process_data
+/* Use the 8-bit method in the jpeg_c_coef_controller structure. */
+#define _compress_data  compress_data
+/* Use the 8-bit method in the jpeg_color_converter structure. */
+#define _color_convert  color_convert
+/* Use the 8-bit method in the jpeg_downsampler structure. */
+#define _downsample  downsample
+/* Use the 8-bit method in the jpeg_forward_dct structure. */
+#define _forward_DCT  forward_DCT
+/* Use the 8-bit method in the jpeg_d_main_controller structure. */
+#define _process_data  process_data
+/* Use the 8-bit method in the jpeg_d_coef_controller structure. */
+#define _decompress_data  decompress_data
+/* Use the 8-bit method in the jpeg_d_post_controller structure. */
+#define _post_process_data  post_process_data
+/* Use the 8-bit method in the jpeg_inverse_dct structure. */
+#define _inverse_DCT_method_ptr  inverse_DCT_method_ptr
+#define _inverse_DCT  inverse_DCT
+/* Use the 8-bit method in the jpeg_upsampler structure. */
+#define _upsample  upsample
+/* Use the 8-bit method in the jpeg_color_converter structure. */
+#define _color_convert  color_convert
+/* Use the 8-bit method in the jpeg_color_quantizer structure. */
+#define _color_quantize  color_quantize
+
+/* Global internal functions (jpegint.h) */
+#define _jinit_c_main_controller  jinit_c_main_controller
+#define _jinit_c_prep_controller  jinit_c_prep_controller
+#define _jinit_c_coef_controller  jinit_c_coef_controller
+#define _jinit_color_converter  jinit_color_converter
+#define _jinit_downsampler  jinit_downsampler
+#define _jinit_forward_dct  jinit_forward_dct
+#ifdef C_LOSSLESS_SUPPORTED
+#define _jinit_c_diff_controller  jinit_c_diff_controller
+#define _jinit_lossless_compressor  jinit_lossless_compressor
+#endif
+
+#define _jinit_d_main_controller  jinit_d_main_controller
+#define _jinit_d_coef_controller  jinit_d_coef_controller
+#define _jinit_d_post_controller  jinit_d_post_controller
+#define _jinit_inverse_dct  jinit_inverse_dct
+#define _jinit_upsampler  jinit_upsampler
+#define _jinit_color_deconverter  jinit_color_deconverter
+#define _jinit_1pass_quantizer  jinit_1pass_quantizer
+#define _jinit_2pass_quantizer  jinit_2pass_quantizer
+#define _jinit_merged_upsampler  jinit_merged_upsampler
+#ifdef D_LOSSLESS_SUPPORTED
+#define _jinit_d_diff_controller  jinit_d_diff_controller
+#define _jinit_lossless_decompressor  jinit_lossless_decompressor
+#endif
+
+#define _jcopy_sample_rows  jcopy_sample_rows
+
+/* Global internal functions (jdct.h) */
+#define _jpeg_fdct_islow  jpeg_fdct_islow
+#define _jpeg_fdct_ifast  jpeg_fdct_ifast
+
+#define _jpeg_idct_islow  jpeg_idct_islow
+#define _jpeg_idct_ifast  jpeg_idct_ifast
+#define _jpeg_idct_float  jpeg_idct_float
+#define _jpeg_idct_7x7  jpeg_idct_7x7
+#define _jpeg_idct_6x6  jpeg_idct_6x6
+#define _jpeg_idct_5x5  jpeg_idct_5x5
+#define _jpeg_idct_4x4  jpeg_idct_4x4
+#define _jpeg_idct_3x3  jpeg_idct_3x3
+#define _jpeg_idct_2x2  jpeg_idct_2x2
+#define _jpeg_idct_1x1  jpeg_idct_1x1
+#define _jpeg_idct_9x9  jpeg_idct_9x9
+#define _jpeg_idct_10x10  jpeg_idct_10x10
+#define _jpeg_idct_11x11  jpeg_idct_11x11
+#define _jpeg_idct_12x12  jpeg_idct_12x12
+#define _jpeg_idct_13x13  jpeg_idct_13x13
+#define _jpeg_idct_14x14  jpeg_idct_14x14
+#define _jpeg_idct_15x15  jpeg_idct_15x15
+#define _jpeg_idct_16x16  jpeg_idct_16x16
+
+/* Internal fields (cdjpeg.h) */
+
+/* Use the 8-bit buffer in the cjpeg_source_struct and djpeg_dest_struct
+   structures. */
+#define _buffer  buffer
+
+/* Image I/O functions (cdjpeg.h) */
+#define _jinit_read_gif  jinit_read_gif
+#define _jinit_write_gif  jinit_write_gif
+#define _jinit_read_ppm  jinit_read_ppm
+#define _jinit_write_ppm  jinit_write_ppm
+
+#define _read_color_map  read_color_map
+
+#endif /* BITS_IN_JSAMPLE */
+
+#endif /* JSAMPLECOMP_H */
diff --git a/3rdparty/libjpeg-turbo/src/jsimd.h b/3rdparty/libjpeg-turbo/src/jsimd.h
index 6c203655ef..6ae021a651 100644
--- a/3rdparty/libjpeg-turbo/src/jsimd.h
+++ b/3rdparty/libjpeg-turbo/src/jsimd.h
@@ -2,8 +2,8 @@
  * jsimd.h
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2011, 2014, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2011, 2014, 2022, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2020, Arm Limited.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
@@ -12,6 +12,8 @@
  *
  */
 
+#ifdef WITH_SIMD
+
 #include "jchuff.h"             /* Declarations shared with jcphuff.c */
 
 EXTERN(int) jsimd_can_rgb_ycc(void);
@@ -114,10 +116,12 @@ EXTERN(int) jsimd_can_encode_mcu_AC_first_prepare(void);
 
 EXTERN(void) jsimd_encode_mcu_AC_first_prepare
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits);
+   UJCOEF *values, size_t *zerobits);
 
 EXTERN(int) jsimd_can_encode_mcu_AC_refine_prepare(void);
 
 EXTERN(int) jsimd_encode_mcu_AC_refine_prepare
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits);
+   UJCOEF *absvalues, size_t *bits);
+
+#endif /* WITH_SIMD */
diff --git a/3rdparty/libjpeg-turbo/src/jsimd_none.c b/3rdparty/libjpeg-turbo/src/jsimd_none.c
deleted file mode 100644
index 5b38a9fb5c..0000000000
--- a/3rdparty/libjpeg-turbo/src/jsimd_none.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/*
- * jsimd_none.c
- *
- * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2011, 2014, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
- * Copyright (C) 2020, Arm Limited.
- *
- * Based on the x86 SIMD extension for IJG JPEG library,
- * Copyright (C) 1999-2006, MIYASAKA Masaru.
- * For conditions of distribution and use, see copyright notice in jsimdext.inc
- *
- * This file contains stubs for when there is no SIMD support available.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jsimd.h"
-#include "jdct.h"
-#include "jsimddct.h"
-
-GLOBAL(int)
-jsimd_can_rgb_ycc(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_rgb_gray(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_ycc_rgb(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_ycc_rgb565(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_c_can_null_convert(void)
-{
-  return 0;
-}
-
-GLOBAL(void)
-jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
-                      JSAMPIMAGE output_buf, JDIMENSION output_row,
-                      int num_rows)
-{
-}
-
-GLOBAL(void)
-jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
-                       JSAMPIMAGE output_buf, JDIMENSION output_row,
-                       int num_rows)
-{
-}
-
-GLOBAL(void)
-jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                      JDIMENSION input_row, JSAMPARRAY output_buf,
-                      int num_rows)
-{
-}
-
-GLOBAL(void)
-jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                         JDIMENSION input_row, JSAMPARRAY output_buf,
-                         int num_rows)
-{
-}
-
-GLOBAL(void)
-jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
-                     JSAMPIMAGE output_buf, JDIMENSION output_row,
-                     int num_rows)
-{
-}
-
-GLOBAL(int)
-jsimd_can_h2v2_downsample(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_h2v1_downsample(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_h2v2_smooth_downsample(void)
-{
-  return 0;
-}
-
-GLOBAL(void)
-jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
-                      JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-}
-
-GLOBAL(void)
-jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo,
-                             jpeg_component_info *compptr,
-                             JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-}
-
-GLOBAL(void)
-jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
-                      JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-}
-
-GLOBAL(int)
-jsimd_can_h2v2_upsample(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_h2v1_upsample(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_int_upsample(void)
-{
-  return 0;
-}
-
-GLOBAL(void)
-jsimd_int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                   JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
-{
-}
-
-GLOBAL(void)
-jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
-{
-}
-
-GLOBAL(void)
-jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                    JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
-{
-}
-
-GLOBAL(int)
-jsimd_can_h2v2_fancy_upsample(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_h2v1_fancy_upsample(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_h1v2_fancy_upsample(void)
-{
-  return 0;
-}
-
-GLOBAL(void)
-jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
-{
-}
-
-GLOBAL(void)
-jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
-{
-}
-
-GLOBAL(void)
-jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
-{
-}
-
-GLOBAL(int)
-jsimd_can_h2v2_merged_upsample(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_h2v1_merged_upsample(void)
-{
-  return 0;
-}
-
-GLOBAL(void)
-jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
-{
-}
-
-GLOBAL(void)
-jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
-                           JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
-{
-}
-
-GLOBAL(int)
-jsimd_can_convsamp(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_convsamp_float(void)
-{
-  return 0;
-}
-
-GLOBAL(void)
-jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
-               DCTELEM *workspace)
-{
-}
-
-GLOBAL(void)
-jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
-                     FAST_FLOAT *workspace)
-{
-}
-
-GLOBAL(int)
-jsimd_can_fdct_islow(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_fdct_ifast(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_fdct_float(void)
-{
-  return 0;
-}
-
-GLOBAL(void)
-jsimd_fdct_islow(DCTELEM *data)
-{
-}
-
-GLOBAL(void)
-jsimd_fdct_ifast(DCTELEM *data)
-{
-}
-
-GLOBAL(void)
-jsimd_fdct_float(FAST_FLOAT *data)
-{
-}
-
-GLOBAL(int)
-jsimd_can_quantize(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_quantize_float(void)
-{
-  return 0;
-}
-
-GLOBAL(void)
-jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
-{
-}
-
-GLOBAL(void)
-jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
-                     FAST_FLOAT *workspace)
-{
-}
-
-GLOBAL(int)
-jsimd_can_idct_2x2(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_idct_4x4(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_idct_6x6(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_idct_12x12(void)
-{
-  return 0;
-}
-
-GLOBAL(void)
-jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-               JCOEFPTR coef_block, JSAMPARRAY output_buf,
-               JDIMENSION output_col)
-{
-}
-
-GLOBAL(void)
-jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-               JCOEFPTR coef_block, JSAMPARRAY output_buf,
-               JDIMENSION output_col)
-{
-}
-
-GLOBAL(void)
-jsimd_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-               JCOEFPTR coef_block, JSAMPARRAY output_buf,
-               JDIMENSION output_col)
-{
-}
-
-GLOBAL(void)
-jsimd_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                 JDIMENSION output_col)
-{
-}
-
-GLOBAL(int)
-jsimd_can_idct_islow(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_idct_ifast(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_can_idct_float(void)
-{
-  return 0;
-}
-
-GLOBAL(void)
-jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                 JDIMENSION output_col)
-{
-}
-
-GLOBAL(void)
-jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                 JDIMENSION output_col)
-{
-}
-
-GLOBAL(void)
-jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
-                 JCOEFPTR coef_block, JSAMPARRAY output_buf,
-                 JDIMENSION output_col)
-{
-}
-
-GLOBAL(int)
-jsimd_can_huff_encode_one_block(void)
-{
-  return 0;
-}
-
-GLOBAL(JOCTET *)
-jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
-                            int last_dc_val, c_derived_tbl *dctbl,
-                            c_derived_tbl *actbl)
-{
-  return NULL;
-}
-
-GLOBAL(int)
-jsimd_can_encode_mcu_AC_first_prepare(void)
-{
-  return 0;
-}
-
-GLOBAL(void)
-jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
-                                  const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
-{
-}
-
-GLOBAL(int)
-jsimd_can_encode_mcu_AC_refine_prepare(void)
-{
-  return 0;
-}
-
-GLOBAL(int)
-jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
-                                   const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
-{
-  return 0;
-}
diff --git a/3rdparty/libjpeg-turbo/src/jutils.c b/3rdparty/libjpeg-turbo/src/jutils.c
index d86271624a..24caac1902 100644
--- a/3rdparty/libjpeg-turbo/src/jutils.c
+++ b/3rdparty/libjpeg-turbo/src/jutils.c
@@ -17,8 +17,11 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsamplecomp.h"
 
 
+#if BITS_IN_JSAMPLE == 8
+
 /*
  * jpeg_zigzag_order[i] is the zigzag-order position of the i'th element
  * of a DCT block read in natural order (left to right, top to bottom).
@@ -89,19 +92,24 @@ jround_up(long a, long b)
   return a - (a % b);
 }
 
+#endif /* BITS_IN_JSAMPLE == 8 */
+
+
+#if BITS_IN_JSAMPLE != 16 || \
+    defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
 
 GLOBAL(void)
-jcopy_sample_rows(JSAMPARRAY input_array, int source_row,
-                  JSAMPARRAY output_array, int dest_row, int num_rows,
-                  JDIMENSION num_cols)
+_jcopy_sample_rows(_JSAMPARRAY input_array, int source_row,
+                   _JSAMPARRAY output_array, int dest_row, int num_rows,
+                   JDIMENSION num_cols)
 /* Copy some rows of samples from one place to another.
  * num_rows rows are copied from input_array[source_row++]
  * to output_array[dest_row++]; these areas may overlap for duplication.
  * The source and destination arrays must be at least as wide as num_cols.
  */
 {
-  register JSAMPROW inptr, outptr;
-  register size_t count = (size_t)(num_cols * sizeof(JSAMPLE));
+  register _JSAMPROW inptr, outptr;
+  register size_t count = (size_t)(num_cols * sizeof(_JSAMPLE));
   register int row;
 
   input_array += source_row;
@@ -114,6 +122,11 @@ jcopy_sample_rows(JSAMPARRAY input_array, int source_row,
   }
 }
 
+#endif /* BITS_IN_JSAMPLE != 16 ||
+          defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED) */
+
+
+#if BITS_IN_JSAMPLE == 8
 
 GLOBAL(void)
 jcopy_block_row(JBLOCKROW input_row, JBLOCKROW output_row,
@@ -131,3 +144,5 @@ jzero_far(void *target, size_t bytestozero)
 {
   memset(target, 0, bytestozero);
 }
+
+#endif /* BITS_IN_JSAMPLE == 8 */
diff --git a/3rdparty/libjpeg-turbo/src/jversion.h.in b/3rdparty/libjpeg-turbo/src/jversion.h.in
index dca4f08fdb..fc0ce3e09e 100644
--- a/3rdparty/libjpeg-turbo/src/jversion.h.in
+++ b/3rdparty/libjpeg-turbo/src/jversion.h.in
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2012-2022, D. R. Commander.
+ * Copyright (C) 2010, 2012-2024, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -36,19 +36,21 @@
  *   their code
  */
 
-#define JCOPYRIGHT \
-  "Copyright (C) 2009-2022 D. R. Commander\n" \
+#define JCOPYRIGHT1 \
+  "Copyright (C) 2009-2024 D. R. Commander\n" \
   "Copyright (C) 2015, 2020 Google, Inc.\n" \
   "Copyright (C) 2019-2020 Arm Limited\n" \
   "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
   "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
-  "Copyright (C) 2015 Intel Corporation\n" \
+  "Copyright (C) 2015 Intel Corporation\n"
+#define JCOPYRIGHT2 \
   "Copyright (C) 2013-2014 Linaro Limited\n" \
   "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
   "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
   "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
   "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
-  "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding"
+  "Copyright (C) 1999 Ken Murchison\n" \
+  "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding\n"
 
 #define JCOPYRIGHT_SHORT \
   "Copyright (C) @COPYRIGHT_YEAR@ The libjpeg-turbo Project and many others"
diff --git a/3rdparty/libjpeg-turbo/src/libjpeg.map.in b/3rdparty/libjpeg-turbo/src/libjpeg.map.in
new file mode 100644
index 0000000000..b4480d8347
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/libjpeg.map.in
@@ -0,0 +1,11 @@
+LIBJPEGTURBO_@JPEG_LIB_VERSION_DECIMAL@ {
+  @MEM_SRCDST_FUNCTIONS@
+  local:
+    jsimd_*;
+    jconst_*;
+};
+
+LIBJPEG_@JPEG_LIB_VERSION_DECIMAL@ {
+  global:
+    *;
+};
diff --git a/3rdparty/libjpeg-turbo/src/libjpeg.txt b/3rdparty/libjpeg-turbo/src/libjpeg.txt
new file mode 100644
index 0000000000..0fe95bb63c
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/libjpeg.txt
@@ -0,0 +1,3282 @@
+USING THE IJG JPEG LIBRARY
+
+This file was part of the Independent JPEG Group's software:
+Copyright (C) 1994-2013, Thomas G. Lane, Guido Vollbeding.
+Lossless JPEG Modifications:
+Copyright (C) 1999, Ken Murchison.
+libjpeg-turbo Modifications:
+Copyright (C) 2010, 2014-2018, 2020, 2022-2023, D. R. Commander.
+Copyright (C) 2015, Google, Inc.
+For conditions of distribution and use, see the accompanying README.ijg file.
+
+
+This file describes how to use the IJG JPEG library within an application
+program.  Read it if you want to write a program that uses the library.
+
+The file example.c provides heavily commented code for calling the JPEG
+library.  Also see jpeglib.h (the include file to be used by application
+programs) for full details about data structures and function parameter lists.
+The library source code, of course, is the ultimate reference.
+
+Note that there have been *major* changes from the application interface
+presented by IJG version 4 and earlier versions.  The old design had several
+inherent limitations, and it had accumulated a lot of cruft as we added
+features while trying to minimize application-interface changes.  We have
+sacrificed backward compatibility in the version 5 rewrite, but we think the
+improvements justify this.
+
+
+TABLE OF CONTENTS
+-----------------
+
+Overview:
+        Functions provided by the library
+        12-bit and 16-bit Data Precision
+        Outline of typical usage
+Basic library usage:
+        Data formats
+        Compression details
+        Decompression details
+        Partial image decompression
+        Mechanics of usage: include files, linking, etc
+Advanced features:
+        Compression parameter selection
+        Decompression parameter selection
+        Special color spaces
+        Error handling
+        Compressed data handling (source and destination managers)
+        I/O suspension
+        Progressive JPEG support
+        Buffered-image mode
+        Abbreviated datastreams and multiple images
+        Special markers
+        ICC profiles
+        Raw (downsampled) image data
+        Really raw data: DCT coefficients
+        Progress monitoring
+        Memory management
+        Memory usage
+        Library compile-time options
+        Portability considerations
+
+You should read at least the overview and basic usage sections before trying
+to program with the library.  The sections on advanced features can be read
+if and when you need them.
+
+
+OVERVIEW
+========
+
+Functions provided by the library
+---------------------------------
+
+The IJG JPEG library provides C code to read and write JPEG-compressed image
+files.  The surrounding application program receives or supplies image data a
+scanline at a time, using a straightforward uncompressed image format.  All
+details of color conversion and other preprocessing/postprocessing can be
+handled by the library.
+
+The library includes a substantial amount of code that is not covered by the
+JPEG standard but is necessary for typical applications of JPEG.  These
+functions preprocess the image before JPEG compression or postprocess it after
+decompression.  They include colorspace conversion, downsampling/upsampling,
+and color quantization.  The application indirectly selects use of this code
+by specifying the format in which it wishes to supply or receive image data.
+For example, if colormapped output is requested, then the decompression
+library automatically invokes color quantization.
+
+A wide range of quality vs. speed tradeoffs are possible in JPEG processing,
+and even more so in decompression postprocessing.  The decompression library
+provides multiple implementations that cover most of the useful tradeoffs,
+ranging from very-high-quality down to fast-preview operation.  On the
+compression side we have generally not provided low-quality choices, since
+compression is normally less time-critical.  It should be understood that the
+low-quality modes may not meet the JPEG standard's accuracy requirements;
+nonetheless, they are useful for viewers.
+
+A word about functions *not* provided by the library.  We handle a subset of
+the ISO JPEG standard; most baseline, extended-sequential, and progressive
+JPEG processes are supported.  (Our subset includes all features now in common
+use.)  Unsupported ISO options include:
+        * Hierarchical storage
+        * DNL marker
+        * Nonintegral subsampling ratios
+We support 8-bit (lossy and lossless), 12-bit (lossy and lossless), and 16-bit
+(lossless) data precision.
+
+By itself, the library handles only interchange JPEG datastreams --- in
+particular the widely used JFIF file format.  The library can be used by
+surrounding code to process interchange or abbreviated JPEG datastreams that
+are embedded in more complex file formats.  (For example, this library is
+used by the free LIBTIFF library to support JPEG compression in TIFF.)
+
+
+12-bit and 16-bit Data Precision
+--------------------------------
+
+The JPEG standard provides for baseline 8-bit and 12-bit DCT processes as well
+as 8-bit, 12-bit, and 16-bit lossless (predictive) processes.  This code
+supports 12-bit-per-component lossy or lossless JPEG if you set
+cinfo->data_precision to 12 and 16-bit-per-component lossless JPEG if you set
+cinfo->data_precision to 16.  Note that this causes the sample size to be
+larger than a char, so it affects the surrounding application's image data.
+The sample applications cjpeg and djpeg can support 12-bit mode only for PPM,
+PGM, and GIF file formats and 16-bit mode only for PPM and PGM file formats.
+
+Note that, when 12-bit data precision is enabled, the library always compresses
+in Huffman optimization mode, in order to generate valid Huffman tables.  This
+is necessary because our default Huffman tables only cover 8-bit data.  If you
+need to output 12-bit files in one pass, you'll have to supply suitable default
+Huffman tables.  You may also want to supply your own DCT quantization tables;
+the existing quality-scaling code has been developed for 8-bit use, and
+probably doesn't generate especially good tables for 12-bit.
+
+Functions that are specific to 12-bit data precision have a prefix of "jpeg12_"
+instead of "jpeg_" and use the following data types and macros:
+
+  * J12SAMPLE instead of JSAMPLE
+  * J12SAMPROW instead of JSAMPROW
+  * J12SAMPARRAY instead of JSAMPARRAY
+  * J12SAMPIMAGE instead of JSAMPIMAGE
+  * MAXJ12SAMPLE instead of MAXJSAMPLE
+  * CENTERJ12SAMPLE instead of CENTERJSAMPLE
+
+Functions that are specific to 16-bit data precision have a prefix of "jpeg16_"
+instead of "jpeg_" and use the following data types and macros:
+
+  * J16SAMPLE instead of JSAMPLE
+  * J16SAMPROW instead of JSAMPROW
+  * J16SAMPARRAY instead of JSAMPARRAY
+  * J16SAMPIMAGE instead of JSAMPIMAGE
+  * MAXJ16SAMPLE instead of MAXJSAMPLE
+  * CENTERJ16SAMPLE instead of CENTERJSAMPLE
+
+This allows 8-bit, 12-bit, and 16-bit data precision to be used in a single
+application.  (Refer to example.c).  Arithmetic coding and SIMD acceleration
+are not currently implemented for 12-bit data precision, nor are they
+implemented for lossless mode with any data precision.
+
+Refer to the descriptions of the data_precision compression and decompression
+parameters below for further information.
+
+This documentation uses "J*SAMPLE", "J*SAMPROW", "J*SAMPARRAY", and
+"J*SAMPIMAGE" to generically refer to the 8-bit, 12-bit, or 16-bit data types.
+
+
+Outline of typical usage
+------------------------
+
+The rough outline of a JPEG compression operation is:
+
+        Allocate and initialize a JPEG compression object
+        Specify the destination for the compressed data (eg, a file)
+        Set parameters for compression, including image size & colorspace
+        jpeg_start_compress(...);
+        while (scan lines remain to be written)
+                jpeg_write_scanlines(...);  /* Use jpeg12_write_scanlines() for
+                                               12-bit data precision and
+                                               jpeg16_write_scanlines() for
+                                               16-bit data precision. */
+        jpeg_finish_compress(...);
+        Release the JPEG compression object
+
+A JPEG compression object holds parameters and working state for the JPEG
+library.  We make creation/destruction of the object separate from starting
+or finishing compression of an image; the same object can be re-used for a
+series of image compression operations.  This makes it easy to re-use the
+same parameter settings for a sequence of images.  Re-use of a JPEG object
+also has important implications for processing abbreviated JPEG datastreams,
+as discussed later.
+
+The image data to be compressed is supplied to jpeg*_write_scanlines() from
+in-memory buffers.  If the application is doing file-to-file compression,
+reading image data from the source file is the application's responsibility.
+The library emits compressed data by calling a "data destination manager",
+which typically will write the data into a file; but the application can
+provide its own destination manager to do something else.
+
+Similarly, the rough outline of a JPEG decompression operation is:
+
+        Allocate and initialize a JPEG decompression object
+        Specify the source of the compressed data (eg, a file)
+        Call jpeg_read_header() to obtain image info
+        Set parameters for decompression
+        jpeg_start_decompress(...);
+        while (scan lines remain to be read)
+                jpeg_read_scanlines(...);  /* Use jpeg12_read_scanlines() for
+                                              12-bit data precision and
+                                              jpeg16_read_scanlines() for
+                                              16-bit data precision. */
+        jpeg_finish_decompress(...);
+        Release the JPEG decompression object
+
+This is comparable to the compression outline except that reading the
+datastream header is a separate step.  This is helpful because information
+about the image's size, colorspace, etc is available when the application
+selects decompression parameters.  For example, the application can choose an
+output scaling ratio that will fit the image into the available screen size.
+
+The decompression library obtains compressed data by calling a data source
+manager, which typically will read the data from a file; but other behaviors
+can be obtained with a custom source manager.  Decompressed data is delivered
+into in-memory buffers passed to jpeg*_read_scanlines().
+
+It is possible to abort an incomplete compression or decompression operation
+by calling jpeg_abort(); or, if you do not need to retain the JPEG object,
+simply release it by calling jpeg_destroy().
+
+JPEG compression and decompression objects are two separate struct types.
+However, they share some common fields, and certain routines such as
+jpeg_destroy() can work on either type of object.
+
+The JPEG library has no static variables: all state is in the compression
+or decompression object.  Therefore it is possible to process multiple
+compression and decompression operations concurrently, using multiple JPEG
+objects.
+
+Both compression and decompression can be done in an incremental memory-to-
+memory fashion, if suitable source/destination managers are used.  See the
+section on "I/O suspension" for more details.
+
+
+BASIC LIBRARY USAGE
+===================
+
+Data formats
+------------
+
+Before diving into procedural details, it is helpful to understand the
+image data format that the JPEG library expects or returns.
+
+The standard input image format is a rectangular array of pixels, with each
+pixel having the same number of "component" or "sample" values (color
+channels).  You must specify how many components there are and the colorspace
+interpretation of the components.  Most applications will use RGB data
+(three components per pixel) or grayscale data (one component per pixel).
+PLEASE NOTE THAT RGB DATA IS THREE SAMPLES PER PIXEL, GRAYSCALE ONLY ONE.
+A remarkable number of people manage to miss this, only to find that their
+programs don't work with grayscale JPEG files.
+
+There is no provision for colormapped input.  JPEG files are always full-color
+or full grayscale (or sometimes another colorspace such as CMYK).  You can
+feed in a colormapped image by expanding it to full-color format.  However
+JPEG often doesn't work very well with source data that has been colormapped,
+because of dithering noise.  This is discussed in more detail in the JPEG FAQ
+and the other references mentioned in the README.ijg file.
+
+Pixels are stored by scanlines, with each scanline running from left to
+right.  The component values for each pixel are adjacent in the row; for
+example, R,G,B,R,G,B,R,G,B,... for 24-bit RGB color.  Each scanline is an
+array of data type JSAMPLE or J12SAMPLE --- which is typically "unsigned char"
+or "short" (respectively), unless you've changed jmorecfg.h.  (You can also
+change the RGB pixel layout, say to B,G,R order, by modifying jmorecfg.h.  But
+see the restrictions listed in that file before doing so.)
+
+A 2-D array of pixels is formed by making a list of pointers to the starts of
+scanlines; so the scanlines need not be physically adjacent in memory.  Even
+if you process just one scanline at a time, you must make a one-element
+pointer array to conform to this structure.  Pointers to J*SAMPLE rows are of
+type J*SAMPROW, and the pointer to the pointer array is of type J*SAMPARRAY.
+
+The library accepts or supplies one or more complete scanlines per call.
+It is not possible to process part of a row at a time.  Scanlines are always
+processed top-to-bottom.  You can process an entire image in one call if you
+have it all in memory, but usually it's simplest to process one scanline at
+a time.
+
+For best results, source data values should have the precision specified by
+cinfo->data_precision (normally 8 bits).  For instance, if you choose to
+compress data that's only 6 bits/channel, you should left-justify each value in
+a byte before passing it to the compressor.  If you need to compress data
+that has more than 8 bits/channel, set cinfo->data_precision = 12 or 16.
+
+
+The data format returned by the decompressor is the same in all details,
+except that colormapped output is supported.  (Again, a JPEG file is never
+colormapped.  But you can ask the decompressor to perform on-the-fly color
+quantization to deliver colormapped output.)  If you request colormapped
+output then the returned data array contains a single J*SAMPLE per pixel;
+its value is an index into a color map.  The color map is represented as
+a 2-D J*SAMPARRAY in which each row holds the values of one color component,
+that is, colormap[i][j] is the value of the i'th color component for pixel
+value (map index) j.  Note that since the colormap indexes are stored in
+J*SAMPLEs, the maximum number of colors is limited by the size of J*SAMPLE
+(ie, at most 256 colors for 8-bit data precision, 4096 colors for 12-bit data
+precision, and 65536 colors for 16-bit data precision).
+
+
+Compression details
+-------------------
+
+Here we revisit the JPEG compression outline given in the overview.
+
+1. Allocate and initialize a JPEG compression object.
+
+A JPEG compression object is a "struct jpeg_compress_struct".  (It also has
+a bunch of subsidiary structures which are allocated via malloc(), but the
+application doesn't control those directly.)  This struct can be just a local
+variable in the calling routine, if a single routine is going to execute the
+whole JPEG compression sequence.  Otherwise it can be static or allocated
+from malloc().
+
+You will also need a structure representing a JPEG error handler.  The part
+of this that the library cares about is a "struct jpeg_error_mgr".  If you
+are providing your own error handler, you'll typically want to embed the
+jpeg_error_mgr struct in a larger structure; this is discussed later under
+"Error handling".  For now we'll assume you are just using the default error
+handler.  The default error handler will print JPEG error/warning messages
+on stderr, and it will call exit() if a fatal error occurs.
+
+You must initialize the error handler structure, store a pointer to it into
+the JPEG object's "err" field, and then call jpeg_create_compress() to
+initialize the rest of the JPEG object.
+
+Typical code for this step, if you are using the default error handler, is
+
+        struct jpeg_compress_struct cinfo;
+        struct jpeg_error_mgr jerr;
+        ...
+        cinfo.err = jpeg_std_error(&jerr);
+        jpeg_create_compress(&cinfo);
+
+jpeg_create_compress allocates a small amount of memory, so it could fail
+if you are out of memory.  In that case it will exit via the error handler;
+that's why the error handler must be initialized first.
+
+
+2. Specify the destination for the compressed data (eg, a file).
+
+As previously mentioned, the JPEG library delivers compressed data to a
+"data destination" module.  The library includes one data destination
+module which knows how to write to a stdio stream.  You can use your own
+destination module if you want to do something else, as discussed later.
+
+If you use the standard destination module, you must open the target stdio
+stream beforehand.  Typical code for this step looks like:
+
+        FILE *outfile;
+        ...
+        if ((outfile = fopen(filename, "wb")) == NULL) {
+            fprintf(stderr, "can't open %s\n", filename);
+            exit(1);
+        }
+        jpeg_stdio_dest(&cinfo, outfile);
+
+where the last line invokes the standard destination module.
+
+WARNING: it is critical that the binary compressed data be delivered to the
+output file unchanged.  On non-Unix systems the stdio library may perform
+newline translation or otherwise corrupt binary data.  To suppress this
+behavior, you may need to use a "b" option to fopen (as shown above), or use
+setmode() or another routine to put the stdio stream in binary mode.  See
+cjpeg.c and djpeg.c for code that has been found to work on many systems.
+
+You can select the data destination after setting other parameters (step 3),
+if that's more convenient.  You may not change the destination between
+calling jpeg_start_compress() and jpeg_finish_compress().
+
+
+3. Set parameters for compression, including image size & colorspace.
+
+You must supply information about the source image by setting the following
+fields in the JPEG object (cinfo structure):
+
+        image_width             Width of image, in pixels
+        image_height            Height of image, in pixels
+        input_components        Number of color channels (samples per pixel)
+        in_color_space          Color space of source image
+
+The image dimensions are, hopefully, obvious.  JPEG supports image dimensions
+of 1 to 64K pixels in either direction.  The input color space is typically
+RGB or grayscale, and input_components is 3 or 1 accordingly.  (See "Special
+color spaces", later, for more info.)  The in_color_space field must be
+assigned one of the J_COLOR_SPACE enum constants, typically JCS_RGB or
+JCS_GRAYSCALE.
+
+JPEG has a large number of compression parameters that determine how the
+image is encoded.  Most applications don't need or want to know about all
+these parameters.  You can set all the parameters to reasonable defaults by
+calling jpeg_set_defaults(); then, if there are particular values you want
+to change, you can do so after that.  The "Compression parameter selection"
+section tells about all the parameters.
+
+You must set in_color_space correctly before calling jpeg_set_defaults(),
+because the defaults depend on the source image colorspace.  However the
+other three source image parameters need not be valid until you call
+jpeg_start_compress().  There's no harm in calling jpeg_set_defaults() more
+than once, if that happens to be convenient.
+
+Typical code for a 24-bit RGB source image is
+
+        cinfo.image_width = Width;      /* image width and height, in pixels */
+        cinfo.image_height = Height;
+        cinfo.input_components = 3;     /* # of color components per pixel */
+        cinfo.in_color_space = JCS_RGB; /* colorspace of input image */
+
+        jpeg_set_defaults(&cinfo);
+        /* Make optional parameter settings here */
+
+
+4. jpeg_start_compress(...);
+
+After you have established the data destination and set all the necessary
+source image info and other parameters, call jpeg_start_compress() to begin
+a compression cycle.  This will initialize internal state, allocate working
+storage, and emit the first few bytes of the JPEG datastream header.
+
+Typical code:
+
+        jpeg_start_compress(&cinfo, TRUE);
+
+The "TRUE" parameter ensures that a complete JPEG interchange datastream
+will be written.  This is appropriate in most cases.  If you think you might
+want to use an abbreviated datastream, read the section on abbreviated
+datastreams, below.
+
+Once you have called jpeg_start_compress(), you may not alter any JPEG
+parameters or other fields of the JPEG object until you have completed
+the compression cycle.
+
+
+5. while (scan lines remain to be written)
+        jpeg_write_scanlines(...);  /* Use jpeg12_write_scanlines() for 12-bit
+                                       data precision and
+                                       jpeg16_write_scanlines() for 16-bit data
+                                       precision. */
+
+Now write all the required image data by calling jpeg*_write_scanlines()
+one or more times.  You can pass one or more scanlines in each call, up
+to the total image height.  In most applications it is convenient to pass
+just one or a few scanlines at a time.  The expected format for the passed
+data is discussed under "Data formats", above.
+
+Image data should be written in top-to-bottom scanline order.
+Rec. ITU-T T.81 | ISO/IEC 10918-1 says, "Applications determine which edges of
+a source image are defined as top, bottom, left, and right."  However, if you
+want your files to be compatible with everyone else's, then top-to-bottom order
+must be used.  If the source data must be read in bottom-to-top order, then you
+can use the JPEG library's virtual array mechanism to invert the data
+efficiently.  Examples of this can be found in the sample application cjpeg.
+
+The library maintains a count of the number of scanlines written so far
+in the next_scanline field of the JPEG object.  Usually you can just use
+this variable as the loop counter, so that the loop test looks like
+"while (cinfo.next_scanline < cinfo.image_height)".
+
+Code for this step depends heavily on the way that you store the source data.
+example.c shows the following code for the case of a full-size 2-D source
+array containing 3-byte RGB pixels:
+
+        JSAMPROW row_pointer[1];        /* pointer to a single row
+                                           Use J12SAMPROW for 12-bit data
+                                           precision and J16SAMPROW for 16-bit
+                                           data precision. */
+
+        while (cinfo.next_scanline < cinfo.image_height) {
+            row_pointer[0] = image_buffer[cinfo.next_scanline];
+            jpeg_write_scanlines(&cinfo, row_pointer, 1);
+                                        /* Use jpeg12_write_scanlines() for
+                                           12-bit data precision and
+                                           jpeg16_write_scanlines() for 16-bit
+                                           data precision. */
+        }
+
+jpeg*_write_scanlines() returns the number of scanlines actually written.
+This will normally be equal to the number passed in, so you can usually
+ignore the return value.  It is different in just two cases:
+  * If you try to write more scanlines than the declared image height,
+    the additional scanlines are ignored.
+  * If you use a suspending data destination manager, output buffer overrun
+    will cause the compressor to return before accepting all the passed lines.
+    This feature is discussed under "I/O suspension", below.  The normal
+    stdio destination manager will NOT cause this to happen.
+In any case, the return value is the same as the change in the value of
+next_scanline.
+
+
+6. jpeg_finish_compress(...);
+
+After all the image data has been written, call jpeg_finish_compress() to
+complete the compression cycle.  This step is ESSENTIAL to ensure that the
+last bufferload of data is written to the data destination.
+jpeg_finish_compress() also releases working memory associated with the JPEG
+object.
+
+Typical code:
+
+        jpeg_finish_compress(&cinfo);
+
+If using the stdio destination manager, don't forget to close the output
+stdio stream (if necessary) afterwards.
+
+If you have requested a multi-pass operating mode, such as Huffman code
+optimization, jpeg_finish_compress() will perform the additional passes using
+data buffered by the first pass.  In this case jpeg_finish_compress() may take
+quite a while to complete.  With the default compression parameters, this will
+not happen.
+
+It is an error to call jpeg_finish_compress() before writing the necessary
+total number of scanlines.  If you wish to abort compression, call
+jpeg_abort() as discussed below.
+
+After completing a compression cycle, you may dispose of the JPEG object
+as discussed next, or you may use it to compress another image.  In that case
+return to step 2, 3, or 4 as appropriate.  If you do not change the
+destination manager, the new datastream will be written to the same target.
+If you do not change any JPEG parameters, the new datastream will be written
+with the same parameters as before.  Note that you can change the input image
+dimensions freely between cycles, but if you change the input colorspace, you
+should call jpeg_set_defaults() to adjust for the new colorspace; and then
+you'll need to repeat all of step 3.
+
+
+7. Release the JPEG compression object.
+
+When you are done with a JPEG compression object, destroy it by calling
+jpeg_destroy_compress().  This will free all subsidiary memory (regardless of
+the previous state of the object).  Or you can call jpeg_destroy(), which
+works for either compression or decompression objects --- this may be more
+convenient if you are sharing code between compression and decompression
+cases.  (Actually, these routines are equivalent except for the declared type
+of the passed pointer.  To avoid gripes from ANSI C compilers, jpeg_destroy()
+should be passed a j_common_ptr.)
+
+If you allocated the jpeg_compress_struct structure from malloc(), freeing
+it is your responsibility --- jpeg_destroy() won't.  Ditto for the error
+handler structure.
+
+Typical code:
+
+        jpeg_destroy_compress(&cinfo);
+
+
+8. Aborting.
+
+If you decide to abort a compression cycle before finishing, you can clean up
+in either of two ways:
+
+* If you don't need the JPEG object any more, just call
+  jpeg_destroy_compress() or jpeg_destroy() to release memory.  This is
+  legitimate at any point after calling jpeg_create_compress() --- in fact,
+  it's safe even if jpeg_create_compress() fails.
+
+* If you want to re-use the JPEG object, call jpeg_abort_compress(), or call
+  jpeg_abort() which works on both compression and decompression objects.
+  This will return the object to an idle state, releasing any working memory.
+  jpeg_abort() is allowed at any time after successful object creation.
+
+Note that cleaning up the data destination, if required, is your
+responsibility; neither of these routines will call term_destination().
+(See "Compressed data handling", below, for more about that.)
+
+jpeg_destroy() and jpeg_abort() are the only safe calls to make on a JPEG
+object that has reported an error by calling error_exit (see "Error handling"
+for more info).  The internal state of such an object is likely to be out of
+whack.  Either of these two routines will return the object to a known state.
+
+
+Decompression details
+---------------------
+
+Here we revisit the JPEG decompression outline given in the overview.
+
+1. Allocate and initialize a JPEG decompression object.
+
+This is just like initialization for compression, as discussed above,
+except that the object is a "struct jpeg_decompress_struct" and you
+call jpeg_create_decompress().  Error handling is exactly the same.
+
+Typical code:
+
+        struct jpeg_decompress_struct cinfo;
+        struct jpeg_error_mgr jerr;
+        ...
+        cinfo.err = jpeg_std_error(&jerr);
+        jpeg_create_decompress(&cinfo);
+
+(Both here and in the IJG code, we usually use variable name "cinfo" for
+both compression and decompression objects.)
+
+
+2. Specify the source of the compressed data (eg, a file).
+
+As previously mentioned, the JPEG library reads compressed data from a "data
+source" module.  The library includes one data source module which knows how
+to read from a stdio stream.  You can use your own source module if you want
+to do something else, as discussed later.
+
+If you use the standard source module, you must open the source stdio stream
+beforehand.  Typical code for this step looks like:
+
+        FILE *infile;
+        ...
+        if ((infile = fopen(filename, "rb")) == NULL) {
+            fprintf(stderr, "can't open %s\n", filename);
+            exit(1);
+        }
+        jpeg_stdio_src(&cinfo, infile);
+
+where the last line invokes the standard source module.
+
+WARNING: it is critical that the binary compressed data be read unchanged.
+On non-Unix systems the stdio library may perform newline translation or
+otherwise corrupt binary data.  To suppress this behavior, you may need to use
+a "b" option to fopen (as shown above), or use setmode() or another routine to
+put the stdio stream in binary mode.  See cjpeg.c and djpeg.c for code that
+has been found to work on many systems.
+
+You may not change the data source between calling jpeg_read_header() and
+jpeg_finish_decompress().  If you wish to read a series of JPEG images from
+a single source file, you should repeat the jpeg_read_header() to
+jpeg_finish_decompress() sequence without reinitializing either the JPEG
+object or the data source module; this prevents buffered input data from
+being discarded.
+
+
+3. Call jpeg_read_header() to obtain image info.
+
+Typical code for this step is just
+
+        jpeg_read_header(&cinfo, TRUE);
+
+This will read the source datastream header markers, up to the beginning
+of the compressed data proper.  On return, the image dimensions and other
+info have been stored in the JPEG object.  The application may wish to
+consult this information before selecting decompression parameters.
+
+More complex code is necessary if
+  * A suspending data source is used --- in that case jpeg_read_header()
+    may return before it has read all the header data.  See "I/O suspension",
+    below.  The normal stdio source manager will NOT cause this to happen.
+  * Abbreviated JPEG files are to be processed --- see the section on
+    abbreviated datastreams.  Standard applications that deal only in
+    interchange JPEG files need not be concerned with this case either.
+
+It is permissible to stop at this point if you just wanted to find out the
+image dimensions and other header info for a JPEG file.  In that case,
+call jpeg_destroy() when you are done with the JPEG object, or call
+jpeg_abort() to return it to an idle state before selecting a new data
+source and reading another header.
+
+
+4. Set parameters for decompression.
+
+jpeg_read_header() sets appropriate default decompression parameters based on
+the properties of the image (in particular, its colorspace).  However, you
+may well want to alter these defaults before beginning the decompression.
+For example, the default is to produce full color output from a color file.
+If you want colormapped output you must ask for it.  Other options allow the
+returned image to be scaled and allow various speed/quality tradeoffs to be
+selected.  "Decompression parameter selection", below, gives details.
+
+If the defaults are appropriate, nothing need be done at this step.
+
+Note that all default values are set by each call to jpeg_read_header().
+If you reuse a decompression object, you cannot expect your parameter
+settings to be preserved across cycles, as you can for compression.
+You must set desired parameter values each time.
+
+
+5. jpeg_start_decompress(...);
+
+Once the parameter values are satisfactory, call jpeg_start_decompress() to
+begin decompression.  This will initialize internal state, allocate working
+memory, and prepare for returning data.
+
+Typical code is just
+
+        jpeg_start_decompress(&cinfo);
+
+If you have requested a multi-pass operating mode, such as 2-pass color
+quantization, jpeg_start_decompress() will do everything needed before data
+output can begin.  In this case jpeg_start_decompress() may take quite a while
+to complete.  With a single-scan (non progressive) JPEG file and default
+decompression parameters, this will not happen; jpeg_start_decompress() will
+return quickly.
+
+After this call, the final output image dimensions, including any requested
+scaling, are available in the JPEG object; so is the selected colormap, if
+colormapped output has been requested.  Useful fields include
+
+        output_width            image width and height, as scaled
+        output_height
+        out_color_components    # of color components in out_color_space
+        output_components       # of color components returned per pixel
+        colormap                the selected colormap, if any
+        actual_number_of_colors         number of entries in colormap
+
+output_components is 1 (a colormap index) when quantizing colors; otherwise it
+equals out_color_components.  It is the number of J*SAMPLE values that will be
+emitted per pixel in the output arrays.
+
+Typically you will need to allocate data buffers to hold the incoming image.
+You will need output_width * output_components J*SAMPLEs per scanline in your
+output buffer, and a total of output_height scanlines will be returned.
+
+Note: if you are using the JPEG library's internal memory manager to allocate
+data buffers (as djpeg does), then the manager's protocol requires that you
+request large buffers *before* calling jpeg_start_decompress().  This is a
+little tricky since the output_XXX fields are not normally valid then.  You
+can make them valid by calling jpeg_calc_output_dimensions() after setting the
+relevant parameters (scaling, output color space, and quantization flag).
+
+
+6. while (scan lines remain to be read)
+        jpeg_read_scanlines(...);  /* Use jpeg12_read_scanlines() for 12-bit
+                                      data precision and
+                                      jpeg16_read_scanlines() for 16-bit data
+                                      precision. */
+
+Now you can read the decompressed image data by calling jpeg*_read_scanlines()
+one or more times.  At each call, you pass in the maximum number of scanlines
+to be read (ie, the height of your working buffer); jpeg*_read_scanlines()
+will return up to that many lines.  The return value is the number of lines
+actually read.  The format of the returned data is discussed under "Data
+formats", above.  Don't forget that grayscale and color JPEGs will return
+different data formats!
+
+Image data is returned in top-to-bottom scanline order.  If you must write
+out the image in bottom-to-top order, you can use the JPEG library's virtual
+array mechanism to invert the data efficiently.  Examples of this can be
+found in the sample application djpeg.
+
+The library maintains a count of the number of scanlines returned so far
+in the output_scanline field of the JPEG object.  Usually you can just use
+this variable as the loop counter, so that the loop test looks like
+"while (cinfo.output_scanline < cinfo.output_height)".  (Note that the test
+should NOT be against image_height, unless you never use scaling.  The
+image_height field is the height of the original unscaled image.)
+The return value always equals the change in the value of output_scanline.
+
+If you don't use a suspending data source, it is safe to assume that
+jpeg*_read_scanlines() reads at least one scanline per call, until the
+bottom of the image has been reached.
+
+If you use a buffer larger than one scanline, it is NOT safe to assume that
+jpeg*_read_scanlines() fills it.  (The current implementation returns only a
+few scanlines per call, no matter how large a buffer you pass.)  So you must
+always provide a loop that calls jpeg*_read_scanlines() repeatedly until the
+whole image has been read.
+
+
+7. jpeg_finish_decompress(...);
+
+After all the image data has been read, call jpeg_finish_decompress() to
+complete the decompression cycle.  This causes working memory associated
+with the JPEG object to be released.
+
+Typical code:
+
+        jpeg_finish_decompress(&cinfo);
+
+If using the stdio source manager, don't forget to close the source stdio
+stream if necessary.
+
+It is an error to call jpeg_finish_decompress() before reading the correct
+total number of scanlines.  If you wish to abort decompression, call
+jpeg_abort() as discussed below.
+
+After completing a decompression cycle, you may dispose of the JPEG object as
+discussed next, or you may use it to decompress another image.  In that case
+return to step 2 or 3 as appropriate.  If you do not change the source
+manager, the next image will be read from the same source.
+
+
+8. Release the JPEG decompression object.
+
+When you are done with a JPEG decompression object, destroy it by calling
+jpeg_destroy_decompress() or jpeg_destroy().  The previous discussion of
+destroying compression objects applies here too.
+
+Typical code:
+
+        jpeg_destroy_decompress(&cinfo);
+
+
+9. Aborting.
+
+You can abort a decompression cycle by calling jpeg_destroy_decompress() or
+jpeg_destroy() if you don't need the JPEG object any more, or
+jpeg_abort_decompress() or jpeg_abort() if you want to reuse the object.
+The previous discussion of aborting compression cycles applies here too.
+
+
+Partial image decompression
+---------------------------
+
+Partial image decompression is convenient for performance-critical applications
+that wish to view only a portion of a large JPEG image without decompressing
+the whole thing.  It it also useful in memory-constrained environments (such as
+on mobile devices.)  This library provides the following functions to support
+partial image decompression:
+
+1. Skipping rows when decompressing
+
+        jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines);
+                /* Use jpeg12_skip_scanlines() for 12-bit data precision. */
+
+This function provides application programmers with the ability to skip over
+multiple rows in the JPEG image.
+
+Suspending data sources are not supported by this function.  Calling
+jpeg*_skip_scanlines() with a suspending data source will result in undefined
+behavior.  Two-pass color quantization is also not supported by this function.
+Calling jpeg*_skip_scanlines() with two-pass color quantization enabled will
+result in an error.
+
+jpeg*_skip_scanlines() will not allow skipping past the bottom of the image.
+If the value of num_lines is large enough to skip past the bottom of the image,
+then the function will skip to the end of the image instead.
+
+If the value of num_lines is valid, then jpeg*_skip_scanlines() will always
+skip all of the input rows requested.  There is no need to inspect the return
+value of the function in that case.
+
+Best results will be achieved by calling jpeg*_skip_scanlines() for large
+chunks of rows.  The function should be viewed as a way to quickly jump to a
+particular vertical offset in the JPEG image in order to decode a subset of the
+image.  Used in this manner, it will provide significant performance
+improvements.
+
+Calling jpeg*_skip_scanlines() for small values of num_lines has several
+potential drawbacks:
+    1) JPEG decompression occurs in blocks, so if jpeg*_skip_scanlines() is
+       called from the middle of a decompression block, then it is likely that
+       much of the decompression work has already been done for the first
+       couple of rows that need to be skipped.
+    2) When this function returns, it must leave the decompressor in a state
+       such that it is ready to read the next line.  This may involve
+       decompressing a block that must be partially skipped.
+These issues are especially tricky for cases in which upsampling requires
+context rows.  In the worst case, jpeg*_skip_scanlines() will perform similarly
+to jpeg*_read_scanlines() (since it will actually call jpeg*_read_scanlines().)
+
+2. Decompressing partial scanlines
+
+        jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset,
+                            JDIMENSION *width)
+                /* Use jpeg12_crop_scanline() for 12-bit data precision. */
+
+This function provides application programmers with the ability to decompress
+only a portion of each row in the JPEG image.  It must be called after
+jpeg_start_decompress() and before any calls to jpeg*_read_scanlines() or
+jpeg*_skip_scanlines().
+
+If xoffset and width do not form a valid subset of the image row, then this
+function will generate an error.  Note that if the output image is scaled, then
+xoffset and width are relative to the scaled image dimensions.
+
+xoffset and width are passed by reference because xoffset must fall on an iMCU
+boundary.  If it doesn't, then it will be moved left to the nearest iMCU
+boundary, and width will be increased accordingly.  If the calling program does
+not like the adjusted values of xoffset and width, then it can call
+jpeg*_crop_scanline() again with new values (for instance, if it wants to move
+xoffset to the nearest iMCU boundary to the right instead of to the left.)
+
+After calling this function, cinfo->output_width will be set to the adjusted
+width.  This value should be used when allocating an output buffer to pass to
+jpeg*_read_scanlines().
+
+The output image from a partial-width decompression will be identical to the
+corresponding image region from a full decode, with one exception:  The "fancy"
+(smooth) h2v2 (4:2:0) and h2v1 (4:2:2) upsampling algorithms fill in the
+missing chroma components by averaging the chroma components from neighboring
+pixels, except on the right and left edges of the image (where there are no
+neighboring pixels.)  When performing a partial-width decompression, these
+"fancy" upsampling algorithms may treat the left and right edges of the partial
+image region as if they are the left and right edges of the image, meaning that
+the upsampling algorithm may be simplified.  The result is that the pixels on
+the left or right edge of the partial image may not be exactly identical to the
+corresponding pixels in the original image.
+
+
+Mechanics of usage: include files, linking, etc
+-----------------------------------------------
+
+Applications using the JPEG library should include the header file jpeglib.h
+to obtain declarations of data types and routines.  Before including
+jpeglib.h, include system headers that define at least the typedefs FILE and
+size_t.  On ANSI-conforming systems, including <stdio.h> is sufficient; on
+older Unix systems, you may need <sys/types.h> to define size_t.
+
+If the application needs to refer to individual JPEG library error codes, also
+include jerror.h to define those symbols.
+
+jpeglib.h indirectly includes the files jconfig.h and jmorecfg.h.  If you are
+installing the JPEG header files in a system directory, you will want to
+install all four files: jpeglib.h, jerror.h, jconfig.h, jmorecfg.h.
+
+The most convenient way to include the JPEG code into your executable program
+is to prepare a library file ("libjpeg.a", or a corresponding name on non-Unix
+machines) and reference it at your link step.  If you use only half of the
+library (only compression or only decompression), only that much code will be
+included from the library, unless your linker is hopelessly brain-damaged.
+The supplied build system builds libjpeg.a automatically.
+
+It may be worth pointing out that the core JPEG library does not actually
+require the stdio library: only the default source/destination managers and
+error handler need it.  You can use the library in a stdio-less environment
+if you replace those modules and use jmemnobs.c (or another memory manager of
+your own devising).  More info about the minimum system library requirements
+may be found in jinclude.h.
+
+
+ADVANCED FEATURES
+=================
+
+Compression parameter selection
+-------------------------------
+
+This section describes all the optional parameters you can set for JPEG
+compression, as well as the "helper" routines provided to assist in this
+task.  Proper setting of some parameters requires detailed understanding
+of the JPEG standard; if you don't know what a parameter is for, it's best
+not to mess with it!  See REFERENCES in the README.ijg file for pointers to
+more info about JPEG.
+
+It's a good idea to call jpeg_set_defaults() first, even if you plan to set
+all the parameters; that way your code is more likely to work with future JPEG
+libraries that have additional parameters.  For the same reason, we recommend
+you use a helper routine where one is provided, in preference to twiddling
+cinfo fields directly.
+
+The helper routines are:
+
+jpeg_set_defaults (j_compress_ptr cinfo)
+        This routine sets all JPEG parameters to reasonable defaults, using
+        only the input image's color space (field in_color_space, which must
+        already be set in cinfo).  Many applications will only need to use
+        this routine and perhaps jpeg_set_quality().
+
+jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace)
+        Sets the JPEG file's colorspace (field jpeg_color_space) as specified,
+        and sets other color-space-dependent parameters appropriately.  See
+        "Special color spaces", below, before using this.  A large number of
+        parameters, including all per-component parameters, are set by this
+        routine; if you want to twiddle individual parameters you should call
+        jpeg_set_colorspace() before rather than after.
+
+jpeg_default_colorspace (j_compress_ptr cinfo)
+        Selects an appropriate JPEG colorspace based on cinfo->in_color_space,
+        and calls jpeg_set_colorspace().  This is actually a subroutine of
+        jpeg_set_defaults().  It's broken out in case you want to change
+        just the colorspace-dependent JPEG parameters.
+
+jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline)
+        Constructs JPEG quantization tables appropriate for the indicated
+        quality setting.  The quality value is expressed on the 0..100 scale
+        recommended by IJG (cjpeg's "-quality" switch uses this routine).
+        Note that the exact mapping from quality values to tables may change
+        in future IJG releases as more is learned about DCT quantization.
+        If the force_baseline parameter is TRUE, then the quantization table
+        entries are constrained to the range 1..255 for full JPEG baseline
+        compatibility.  In the current implementation, this only makes a
+        difference for quality settings below 25, and it effectively prevents
+        very small/low quality files from being generated.  The IJG decoder
+        is capable of reading the non-baseline files generated at low quality
+        settings when force_baseline is FALSE, but other decoders may not be.
+
+jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
+                         boolean force_baseline)
+        Same as jpeg_set_quality() except that the generated tables are the
+        sample tables given in Annex K (Clause K.1) of
+        Rec. ITU-T T.81 (1992) | ISO/IEC 10918-1:1994, multiplied by the
+        specified scale factor (which is expressed as a percentage; thus
+        scale_factor = 100 reproduces the spec's tables).  Note that larger
+        scale factors give lower quality.  This entry point is useful for
+        conforming to the Adobe PostScript DCT conventions, but we do not
+        recommend linear scaling as a user-visible quality scale otherwise.
+        force_baseline again constrains the computed table entries to 1..255.
+
+int jpeg_quality_scaling (int quality)
+        Converts a value on the IJG-recommended quality scale to a linear
+        scaling percentage.  Note that this routine may change or go away
+        in future releases --- IJG may choose to adopt a scaling method that
+        can't be expressed as a simple scalar multiplier, in which case the
+        premise of this routine collapses.  Caveat user.
+
+jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
+        [libjpeg v7+ API/ABI emulation only]
+        Set default quantization tables with linear q_scale_factor[] values
+        (see below).
+
+jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
+                      const unsigned int *basic_table,
+                      int scale_factor, boolean force_baseline)
+        Allows an arbitrary quantization table to be created.  which_tbl
+        indicates which table slot to fill.  basic_table points to an array
+        of 64 unsigned ints given in normal array order.  These values are
+        multiplied by scale_factor/100 and then clamped to the range 1..65535
+        (or to 1..255 if force_baseline is TRUE).
+        CAUTION: prior to library version 6a, jpeg_add_quant_table expected
+        the basic table to be given in JPEG zigzag order.  If you need to
+        write code that works with either older or newer versions of this
+        routine, you must check the library version number.  Something like
+        "#if JPEG_LIB_VERSION >= 61" is the right test.
+
+jpeg_simple_progression (j_compress_ptr cinfo)
+        Generates a default scan script for writing a progressive-JPEG file.
+        This is the recommended method of creating a progressive file,
+        unless you want to make a custom scan sequence.  You must ensure that
+        the JPEG color space is set correctly before calling this routine.
+
+jpeg_enable_lossless (j_compress_ptr cinfo, int predictor_selection_value,
+                      int point_transform)
+        Enables lossless mode with the specified predictor selection value
+        (1 - 7) and optional point transform (0 - {precision}-1, where
+        {precision} is the JPEG data precision).  A point transform value of 0
+        is necessary in order to create a fully lossless JPEG image.  (A
+        non-zero point transform value right-shifts the input samples by the
+        specified number of bits, which is effectively a form of lossy color
+        quantization.)  In most cases, lossless mode is considerably slower
+        than, and does not compress as effectively as, lossy mode.  Thus, it is
+        typically used only for applications that require mathematically
+        lossless compression.  Note that the following features will be
+        unavailable when compressing or decompressing lossless JPEG images:
+          * Partial image decompression
+          * Quality/quantization table selection
+          * DCT/IDCT algorithm selection
+          * Smoothing
+          * Downsampling/upsampling
+          * Color space conversion (the JPEG image will use the same color
+            space as the input image)
+          * Color quantization
+          * IDCT scaling
+          * Raw (downsampled) data input/output
+          * Transcoding of DCT coefficients
+        Any parameters used to enable or configure those features will be
+        ignored.
+
+        Lossless mode shares no algorithms with lossy mode.  Instead, it uses
+        differential pulse-code modulation (DPCM), an algorithm whereby each
+        sample is encoded as the difference between the sample's value and a
+        "predictor", which is based on the values of neighboring samples.  If
+        Ra is the sample immediately to the left of the current sample, Rb is
+        the sample immediately above the current sample, and Rc is the sample
+        diagonally to the left and above the current sample, then the
+        relationship between the predictor selection value and the predictor is
+        as follows:
+
+        PSV  Predictor
+        --------------
+        1    Ra
+        2    Rb
+        3    Rc
+        4    Ra + Rb – Rc
+        5    Ra + (Rb – Rc) / 2
+        6    Rb + (Ra – Rc) / 2
+        7    (Ra + Rb) / 2
+
+        Predictors 1-3 are 1-dimensional predictors, whereas Predictors 4-7 are
+        2-dimensional predictors.  The best predictor for a particular image
+        depends on the image.
+
+
+Compression parameters (cinfo fields) include:
+
+boolean arith_code
+        If TRUE, use arithmetic coding.
+        If FALSE, use Huffman coding.
+
+int data_precision
+        To create a 12-bit-per-component JPEG file, set data_precision to 12
+        prior to calling jpeg_start_compress() or using the memory manager,
+        then use jpeg12_write_scanlines() or jpeg12_write_raw_data() instead of
+        jpeg_write_scanlines() or jpeg_write_raw_data().  To create a
+        16-bit-per-component lossless JPEG file, set data_precision to 16 prior
+        to calling jpeg_start_compress() or using the memory manager, then use
+        jpeg16_write_scanlines() instead of jpeg_write_scanlines().  Note that
+        16-bit data precision requires lossless mode.  (See
+        jpeg_enable_lossless().)
+
+J_DCT_METHOD dct_method
+        Selects the algorithm used for the DCT step.  Choices are:
+                JDCT_ISLOW: accurate integer method
+                JDCT_IFAST: less accurate integer method [legacy feature]
+                JDCT_FLOAT: floating-point method [legacy feature]
+                JDCT_DEFAULT: default method (normally JDCT_ISLOW)
+                JDCT_FASTEST: fastest method (normally JDCT_IFAST)
+        When the Independent JPEG Group's software was first released in 1991,
+        the compression time for a 1-megapixel JPEG image on a mainstream PC
+        was measured in minutes.  Thus, JDCT_IFAST provided noticeable
+        performance benefits.  On modern CPUs running libjpeg-turbo, however,
+        the compression time for a 1-megapixel JPEG image is measured in
+        milliseconds, and thus the performance benefits of JDCT_IFAST are much
+        less noticeable.  On modern x86/x86-64 CPUs that support AVX2
+        instructions, JDCT_IFAST and JDCT_ISLOW have similar performance.  On
+        other types of CPUs, JDCT_IFAST is generally about 5-15% faster than
+        JDCT_ISLOW.
+
+        For quality levels of 90 and below, there should be little or no
+        perceptible quality difference between the two algorithms.  For quality
+        levels above 90, however, the difference between JDCT_IFAST and
+        JDCT_ISLOW becomes more pronounced.  With quality=97, for instance,
+        JDCT_IFAST incurs generally about a 1-3 dB loss in PSNR relative to
+        JDCT_ISLOW, but this can be larger for some images.  Do not use
+        JDCT_IFAST with quality levels above 97.  The algorithm often
+        degenerates at quality=98 and above and can actually produce a more
+        lossy image than if lower quality levels had been used.  Also, in
+        libjpeg-turbo, JDCT_IFAST is not fully accelerated for quality levels
+        above 97, so it will be slower than JDCT_ISLOW.
+
+        JDCT_FLOAT does not produce significantly more accurate results than
+        JDCT_ISLOW, and it is much slower.  JDCT_FLOAT may also give different
+        results on different machines due to varying roundoff behavior, whereas
+        the integer methods should give the same results on all machines.
+
+J_COLOR_SPACE jpeg_color_space
+int num_components
+        The JPEG color space and corresponding number of components; see
+        "Special color spaces", below, for more info.  We recommend using
+        jpeg_set_color_space() if you want to change these.
+
+boolean optimize_coding
+        TRUE causes the compressor to compute optimal Huffman coding tables
+        for the image.  This requires an extra pass over the data and
+        therefore costs a good deal of space and time.  The default is
+        FALSE, which tells the compressor to use the supplied or default
+        Huffman tables.  In most cases optimal tables save only a few percent
+        of file size compared to the default tables.  Note that when this is
+        TRUE, you need not supply Huffman tables at all, and any you do
+        supply will be overwritten.
+
+unsigned int restart_interval
+int restart_in_rows
+        To emit restart markers in the JPEG file, set one of these nonzero.
+        Set restart_interval to specify the exact interval in MCU blocks
+        (samples in lossless mode).  Set restart_in_rows to specify the
+        interval in MCU rows.  (If restart_in_rows is not 0, then
+        restart_interval is set after the image width in MCUs is computed.)
+        Defaults are zero (no restarts).  One restart marker per MCU row is
+        often a good choice.  NOTE: the overhead of restart markers is higher
+        in grayscale JPEG files than in color files, and MUCH higher in
+        progressive JPEGs.  If you use restarts, you may want to use larger
+        intervals in those cases.
+
+const jpeg_scan_info *scan_info
+int num_scans
+        By default, scan_info is NULL; this causes the compressor to write a
+        single-scan sequential JPEG file.  If not NULL, scan_info points to
+        an array of scan definition records of length num_scans.  The
+        compressor will then write a JPEG file having one scan for each scan
+        definition record.  This is used to generate noninterleaved or
+        progressive JPEG files.  The library checks that the scan array
+        defines a valid JPEG scan sequence.  (jpeg_simple_progression creates
+        a suitable scan definition array for progressive JPEG.)  This is
+        discussed further under "Progressive JPEG support".
+
+int smoothing_factor
+        If non-zero, the input image is smoothed; the value should be 1 for
+        minimal smoothing to 100 for maximum smoothing.  Consult jcsample.c
+        for details of the smoothing algorithm.  The default is zero.
+
+boolean write_JFIF_header
+        If TRUE, a JFIF APP0 marker is emitted.  jpeg_set_defaults() and
+        jpeg_set_colorspace() set this TRUE if a JFIF-legal JPEG color space
+        (ie, YCbCr or grayscale) is selected, otherwise FALSE.
+
+UINT8 JFIF_major_version
+UINT8 JFIF_minor_version
+        The version number to be written into the JFIF marker.
+        jpeg_set_defaults() initializes the version to 1.01 (major=minor=1).
+        You should set it to 1.02 (major=1, minor=2) if you plan to write
+        any JFIF 1.02 extension markers.
+
+UINT8 density_unit
+UINT16 X_density
+UINT16 Y_density
+        The resolution information to be written into the JFIF marker;
+        not used otherwise.  density_unit may be 0 for unknown,
+        1 for dots/inch, or 2 for dots/cm.  The default values are 0,1,1
+        indicating square pixels of unknown size.
+
+boolean write_Adobe_marker
+        If TRUE, an Adobe APP14 marker is emitted.  jpeg_set_defaults() and
+        jpeg_set_colorspace() set this TRUE if JPEG color space RGB, CMYK,
+        or YCCK is selected, otherwise FALSE.  It is generally a bad idea
+        to set both write_JFIF_header and write_Adobe_marker.  In fact,
+        you probably shouldn't change the default settings at all --- the
+        default behavior ensures that the JPEG file's color space can be
+        recognized by the decoder.
+
+JQUANT_TBL *quant_tbl_ptrs[NUM_QUANT_TBLS]
+        Pointers to coefficient quantization tables, one per table slot,
+        or NULL if no table is defined for a slot.  Usually these should
+        be set via one of the above helper routines; jpeg_add_quant_table()
+        is general enough to define any quantization table.  The other
+        routines will set up table slot 0 for luminance quality and table
+        slot 1 for chrominance.
+
+int q_scale_factor[NUM_QUANT_TBLS]
+        [libjpeg v7+ API/ABI emulation only]
+        Linear quantization scaling factors (0-100, default 100)
+        for use with jpeg_default_qtables().
+        See rdswitch.c and cjpeg.c for an example of usage.
+        Note that the q_scale_factor[] values use "linear" scales, so JPEG
+        quality levels chosen by the user must be converted to these scales
+        using jpeg_quality_scaling().  Here is an example that corresponds to
+        cjpeg -quality 90,70:
+
+                jpeg_set_defaults(cinfo);
+
+                /* Set luminance quality 90. */
+                cinfo->q_scale_factor[0] = jpeg_quality_scaling(90);
+                /* Set chrominance quality 70. */
+                cinfo->q_scale_factor[1] = jpeg_quality_scaling(70);
+
+                jpeg_default_qtables(cinfo, force_baseline);
+
+        CAUTION: Setting separate quality levels for chrominance and luminance
+        is mainly only useful if chrominance subsampling is disabled.  2x2
+        chrominance subsampling (AKA "4:2:0") is the default, but you can
+        explicitly disable subsampling as follows:
+
+                cinfo->comp_info[0].v_samp_factor = 1;
+                cinfo->comp_info[0].h_samp_factor = 1;
+
+JHUFF_TBL *dc_huff_tbl_ptrs[NUM_HUFF_TBLS]
+JHUFF_TBL *ac_huff_tbl_ptrs[NUM_HUFF_TBLS]
+        Pointers to Huffman coding tables, one per table slot, or NULL if
+        no table is defined for a slot.  Slots 0 and 1 are filled with the
+        JPEG sample tables by jpeg_set_defaults().  If you need to allocate
+        more table structures, jpeg_alloc_huff_table() may be used.
+        Note that optimal Huffman tables can be computed for an image
+        by setting optimize_coding, as discussed above; there's seldom
+        any need to mess with providing your own Huffman tables.
+
+
+[libjpeg v7+ API/ABI emulation only]
+The actual dimensions of the JPEG image that will be written to the file are
+given by the following fields.  These are computed from the input image
+dimensions and the compression parameters by jpeg_start_compress().  You can
+also call jpeg_calc_jpeg_dimensions() to obtain the values that will result
+from the current parameter settings.  This can be useful if you are trying
+to pick a scaling ratio that will get close to a desired target size.
+
+JDIMENSION jpeg_width           Actual dimensions of output image.
+JDIMENSION jpeg_height
+
+
+Per-component parameters are stored in the struct cinfo.comp_info[i] for
+component number i.  Note that components here refer to components of the
+JPEG color space, *not* the source image color space.  A suitably large
+comp_info[] array is allocated by jpeg_set_defaults(); if you choose not
+to use that routine, it's up to you to allocate the array.
+
+int component_id
+        The one-byte identifier code to be recorded in the JPEG file for
+        this component.  For the standard color spaces, we recommend you
+        leave the default values alone.
+
+int h_samp_factor
+int v_samp_factor
+        Horizontal and vertical sampling factors for the component; must
+        be 1..4 according to the JPEG standard.  Note that larger sampling
+        factors indicate a higher-resolution component; many people find
+        this behavior quite unintuitive.  The default values are 2,2 for
+        luminance components and 1,1 for chrominance components, except
+        for grayscale where 1,1 is used.
+
+int quant_tbl_no
+        Quantization table number for component.  The default value is
+        0 for luminance components and 1 for chrominance components.
+
+int dc_tbl_no
+int ac_tbl_no
+        DC and AC entropy coding table numbers.  The default values are
+        0 for luminance components and 1 for chrominance components.
+
+int component_index
+        Must equal the component's index in comp_info[].  (Beginning in
+        release v6, the compressor library will fill this in automatically;
+        you don't have to.)
+
+
+Decompression parameter selection
+---------------------------------
+
+Decompression parameter selection is somewhat simpler than compression
+parameter selection, since all of the JPEG internal parameters are
+recorded in the source file and need not be supplied by the application.
+(Unless you are working with abbreviated files, in which case see
+"Abbreviated datastreams", below.)  Decompression parameters control
+the postprocessing done on the image to deliver it in a format suitable
+for the application's use.  Many of the parameters control speed/quality
+tradeoffs, in which faster decompression may be obtained at the price of
+a poorer-quality image.  The defaults select the highest quality (slowest)
+processing.
+
+The following fields in the JPEG object are set by jpeg_read_header() and
+may be useful to the application in choosing decompression parameters:
+
+int data_precision                      Data precision (bits per component)
+        If data_precision is 12, then use jpeg12_read_scanlines(),
+        jpeg12_skip_scanlines(), jpeg12_crop_scanline(), and/or
+        jpeg12_read_raw_data() instead of jpeg_read_scanlines(),
+        jpeg_skip_scanlines(), jpeg_crop_scanline(), and/or
+        jpeg_read_raw_data().  If data_precision is 16, then use
+        jpeg16_read_scanlines() instead of jpeg_read_scanlines().
+
+JDIMENSION image_width                  Width and height of image
+JDIMENSION image_height
+int num_components                      Number of color components
+J_COLOR_SPACE jpeg_color_space          Colorspace of image
+boolean saw_JFIF_marker                 TRUE if a JFIF APP0 marker was seen
+  UINT8 JFIF_major_version              Version information from JFIF marker
+  UINT8 JFIF_minor_version
+  UINT8 density_unit                    Resolution data from JFIF marker
+  UINT16 X_density
+  UINT16 Y_density
+boolean saw_Adobe_marker                TRUE if an Adobe APP14 marker was seen
+  UINT8 Adobe_transform                 Color transform code from Adobe marker
+
+The JPEG color space, unfortunately, is something of a guess since the JPEG
+standard proper does not provide a way to record it.  In practice most files
+adhere to the JFIF or Adobe conventions, and the decoder will recognize these
+correctly.  See "Special color spaces", below, for more info.
+
+
+The decompression parameters that determine the basic properties of the
+returned image are:
+
+J_COLOR_SPACE out_color_space
+        Output color space.  jpeg_read_header() sets an appropriate default
+        based on jpeg_color_space; typically it will be RGB or grayscale.
+        The application can change this field to request output in a different
+        colorspace.  For example, set it to JCS_GRAYSCALE to get grayscale
+        output from a color file.  (This is useful for previewing: grayscale
+        output is faster than full color since the color components need not
+        be processed.)  Note that not all possible color space transforms are
+        currently implemented; you may need to extend jdcolor.c if you want an
+        unusual conversion.
+
+unsigned int scale_num, scale_denom
+        Scale the image by the fraction scale_num/scale_denom.  Default is
+        1/1, or no scaling.  Currently, the only supported scaling ratios
+        are M/8 with all M from 1 to 16, or any reduced fraction thereof (such
+        as 1/2, 3/4, etc.)  (The library design allows for arbitrary
+        scaling ratios but this is not likely to be implemented any time soon.)
+        Smaller scaling ratios permit significantly faster decoding since
+        fewer pixels need be processed and a simpler IDCT method can be used.
+
+boolean quantize_colors
+        If set TRUE, colormapped output will be delivered.  Default is FALSE,
+        meaning that full-color output will be delivered.
+
+The next three parameters are relevant only if quantize_colors is TRUE.
+
+int desired_number_of_colors
+        Maximum number of colors to use in generating a library-supplied color
+        map (the actual number of colors is returned in a different field).
+        Default 256.  Ignored when the application supplies its own color map.
+
+boolean two_pass_quantize
+        If TRUE, an extra pass over the image is made to select a custom color
+        map for the image.  This usually looks a lot better than the one-size-
+        fits-all colormap that is used otherwise.  Default is TRUE.  Ignored
+        when the application supplies its own color map.
+
+J_DITHER_MODE dither_mode
+        Selects color dithering method.  Supported values are:
+                JDITHER_NONE    no dithering: fast, very low quality
+                JDITHER_ORDERED ordered dither: moderate speed and quality
+                JDITHER_FS      Floyd-Steinberg dither: slow, high quality
+        Default is JDITHER_FS.  (At present, ordered dither is implemented
+        only in the single-pass, standard-colormap case.  If you ask for
+        ordered dither when two_pass_quantize is TRUE or when you supply
+        an external color map, you'll get F-S dithering.)
+
+When quantize_colors is TRUE, the target color map is described by the next
+two fields.  colormap is set to NULL by jpeg_read_header().  The application
+can supply a color map by setting colormap non-NULL and setting
+actual_number_of_colors to the map size.  Otherwise, jpeg_start_decompress()
+selects a suitable color map and sets these two fields itself.
+[Implementation restriction: at present, an externally supplied colormap is
+only accepted for 3-component output color spaces.]
+
+JSAMPARRAY colormap
+        The color map, represented as a 2-D pixel array of out_color_components
+        rows and actual_number_of_colors columns.  Ignored if not quantizing.
+        CAUTION: if the JPEG library creates its own colormap, the storage
+        pointed to by this field is released by jpeg_finish_decompress().
+        Copy the colormap somewhere else first, if you want to save it.
+        CAUTION: if data_precision is 12 or 16, then this is actually a
+        J12SAMPARRAY or a J16SAMPARRAY, so it must be type-cast in order to
+        read/write 12-bit or 16-bit samples from/to the array.
+
+int actual_number_of_colors
+        The number of colors in the color map.
+
+Additional decompression parameters that the application may set include:
+
+J_DCT_METHOD dct_method
+        Selects the algorithm used for the DCT step.  Choices are:
+                JDCT_ISLOW: accurate integer method
+                JDCT_IFAST: less accurate integer method [legacy feature]
+                JDCT_FLOAT: floating-point method [legacy feature]
+                JDCT_DEFAULT: default method (normally JDCT_ISLOW)
+                JDCT_FASTEST: fastest method (normally JDCT_IFAST)
+        When the Independent JPEG Group's software was first released in 1991,
+        the decompression time for a 1-megapixel JPEG image on a mainstream PC
+        was measured in minutes.  Thus, JDCT_IFAST provided noticeable
+        performance benefits.  On modern CPUs running libjpeg-turbo, however,
+        the decompression time for a 1-megapixel JPEG image is measured in
+        milliseconds, and thus the performance benefits of JDCT_IFAST are much
+        less noticeable.  On modern x86/x86-64 CPUs that support AVX2
+        instructions, JDCT_IFAST and JDCT_ISLOW have similar performance.  On
+        other types of CPUs, JDCT_IFAST is generally about 5-15% faster than
+        JDCT_ISLOW.
+
+        If the JPEG image was compressed using a quality level of 85 or below,
+        then there should be little or no perceptible quality difference
+        between the two algorithms.  When decompressing images that were
+        compressed using quality levels above 85, however, the difference
+        between JDCT_IFAST and JDCT_ISLOW becomes more pronounced.  With images
+        compressed using quality=97, for instance, JDCT_IFAST incurs generally
+        about a 4-6 dB loss in PSNR relative to JDCT_ISLOW, but this can be
+        larger for some images.  If you can avoid it, do not use JDCT_IFAST
+        when decompressing images that were compressed using quality levels
+        above 97.  The algorithm often degenerates for such images and can
+        actually produce a more lossy output image than if the JPEG image had
+        been compressed using lower quality levels.
+
+        JDCT_FLOAT does not produce significantly more accurate results than
+        JDCT_ISLOW, and it is much slower.  JDCT_FLOAT may also give different
+        results on different machines due to varying roundoff behavior, whereas
+        the integer methods should give the same results on all machines.
+
+boolean do_fancy_upsampling
+        If TRUE, do careful upsampling of chroma components.  If FALSE,
+        a faster but sloppier method is used.  Default is TRUE.  The visual
+        impact of the sloppier method is often very small.
+
+boolean do_block_smoothing
+        If TRUE, interblock smoothing is applied in early stages of decoding
+        progressive JPEG files; if FALSE, not.  Default is TRUE.  Early
+        progression stages look "fuzzy" with smoothing, "blocky" without.
+        In any case, block smoothing ceases to be applied after the first few
+        AC coefficients are known to full accuracy, so it is relevant only
+        when using buffered-image mode for progressive images.
+
+boolean enable_1pass_quant
+boolean enable_external_quant
+boolean enable_2pass_quant
+        These are significant only in buffered-image mode, which is
+        described in its own section below.
+
+
+The output image dimensions are given by the following fields.  These are
+computed from the source image dimensions and the decompression parameters
+by jpeg_start_decompress().  You can also call jpeg_calc_output_dimensions()
+to obtain the values that will result from the current parameter settings.
+This can be useful if you are trying to pick a scaling ratio that will get
+close to a desired target size.  It's also important if you are using the
+JPEG library's memory manager to allocate output buffer space, because you
+are supposed to request such buffers *before* jpeg_start_decompress().
+
+JDIMENSION output_width         Actual dimensions of output image.
+JDIMENSION output_height
+int out_color_components        Number of color components in out_color_space.
+int output_components           Number of color components returned.
+int rec_outbuf_height           Recommended height of scanline buffer.
+
+When quantizing colors, output_components is 1, indicating a single color map
+index per pixel.  Otherwise it equals out_color_components.  The output arrays
+are required to be output_width * output_components J*SAMPLEs wide.
+
+rec_outbuf_height is the recommended minimum height (in scanlines) of the
+buffer passed to jpeg*_read_scanlines().  If the buffer is smaller, the
+library will still work, but time will be wasted due to unnecessary data
+copying.  In high-quality modes, rec_outbuf_height is always 1, but some
+faster, lower-quality modes set it to larger values (typically 2 to 4).
+If you are going to ask for a high-speed processing mode, you may as well
+go to the trouble of honoring rec_outbuf_height so as to avoid data copying.
+(An output buffer larger than rec_outbuf_height lines is OK, but won't
+provide any material speed improvement over that height.)
+
+
+Special color spaces
+--------------------
+
+The JPEG standard itself is "color blind" and doesn't specify any particular
+color space.  It is customary to convert color data to a luminance/chrominance
+color space before compressing, since this permits greater compression.  The
+existing de-facto JPEG file format standards specify YCbCr or grayscale data
+(JFIF), or grayscale, RGB, YCbCr, CMYK, or YCCK (Adobe).  For special
+applications such as multispectral images, other color spaces can be used,
+but it must be understood that such files will be unportable.
+
+The JPEG library can handle the most common colorspace conversions (namely
+RGB <=> YCbCr and CMYK <=> YCCK).  It can also deal with data of an unknown
+color space, passing it through without conversion.  If you deal extensively
+with an unusual color space, you can easily extend the library to understand
+additional color spaces and perform appropriate conversions.
+
+For compression, the source data's color space is specified by field
+in_color_space.  This is transformed to the JPEG file's color space given
+by jpeg_color_space.  jpeg_set_defaults() chooses a reasonable JPEG color
+space depending on in_color_space, but you can override this by calling
+jpeg_set_colorspace().  Of course you must select a supported transformation.
+jccolor.c currently supports the following transformations:
+        RGB => YCbCr
+        RGB => GRAYSCALE
+        YCbCr => GRAYSCALE
+        CMYK => YCCK
+plus the null transforms: GRAYSCALE => GRAYSCALE, RGB => RGB,
+YCbCr => YCbCr, CMYK => CMYK, YCCK => YCCK, and UNKNOWN => UNKNOWN.
+
+The de-facto file format standards (JFIF and Adobe) specify APPn markers that
+indicate the color space of the JPEG file.  It is important to ensure that
+these are written correctly, or omitted if the JPEG file's color space is not
+one of the ones supported by the de-facto standards.  jpeg_set_colorspace()
+will set the compression parameters to include or omit the APPn markers
+properly, so long as it is told the truth about the JPEG color space.
+For example, if you are writing some random 3-component color space without
+conversion, don't try to fake out the library by setting in_color_space and
+jpeg_color_space to JCS_YCbCr; use JCS_UNKNOWN.  You may want to write an
+APPn marker of your own devising to identify the colorspace --- see "Special
+markers", below.
+
+When told that the color space is UNKNOWN, the library will default to using
+luminance-quality compression parameters for all color components.  You may
+well want to change these parameters.  See the source code for
+jpeg_set_colorspace(), in jcparam.c, for details.
+
+For decompression, the JPEG file's color space is given in jpeg_color_space,
+and this is transformed to the output color space out_color_space.
+jpeg_read_header's setting of jpeg_color_space can be relied on if the file
+conforms to JFIF or Adobe conventions, but otherwise it is no better than a
+guess.  If you know the JPEG file's color space for certain, you can override
+jpeg_read_header's guess by setting jpeg_color_space.  jpeg_read_header also
+selects a default output color space based on (its guess of) jpeg_color_space;
+set out_color_space to override this.  Again, you must select a supported
+transformation.  jdcolor.c currently supports
+        YCbCr => RGB
+        YCbCr => GRAYSCALE
+        RGB => GRAYSCALE
+        GRAYSCALE => RGB
+        YCCK => CMYK
+as well as the null transforms.  (Since GRAYSCALE=>RGB is provided, an
+application can force grayscale JPEGs to look like color JPEGs if it only
+wants to handle one case.)
+
+The two-pass color quantizer, jquant2.c, is specialized to handle RGB data
+(it weights distances appropriately for RGB colors).  You'll need to modify
+the code if you want to use it for non-RGB output color spaces.  Note that
+jquant2.c is used to map to an application-supplied colormap as well as for
+the normal two-pass colormap selection process.
+
+CAUTION: it appears that Adobe Photoshop writes inverted data in CMYK JPEG
+files: 0 represents 100% ink coverage, rather than 0% ink as you'd expect.
+This is arguably a bug in Photoshop, but if you need to work with Photoshop
+CMYK files, you will have to deal with it in your application.  We cannot
+"fix" this in the library by inverting the data during the CMYK<=>YCCK
+transform, because that would break other applications, notably Ghostscript.
+Photoshop versions prior to 3.0 write EPS files containing JPEG-encoded CMYK
+data in the same inverted-YCCK representation used in bare JPEG files, but
+the surrounding PostScript code performs an inversion using the PS image
+operator.  I am told that Photoshop 3.0 will write uninverted YCCK in
+EPS/JPEG files, and will omit the PS-level inversion.  (But the data
+polarity used in bare JPEG files will not change in 3.0.)  In either case,
+the JPEG library must not invert the data itself, or else Ghostscript would
+read these EPS files incorrectly.
+
+
+Error handling
+--------------
+
+When the default error handler is used, any error detected inside the JPEG
+routines will cause a message to be printed on stderr, followed by exit().
+You can supply your own error handling routines to override this behavior
+and to control the treatment of nonfatal warnings and trace/debug messages.
+The file example.c illustrates the most common case, which is to have the
+application regain control after an error rather than exiting.
+
+The JPEG library never writes any message directly; it always goes through
+the error handling routines.  Three classes of messages are recognized:
+  * Fatal errors: the library cannot continue.
+  * Warnings: the library can continue, but the data is corrupt, and a
+    damaged output image is likely to result.
+  * Trace/informational messages.  These come with a trace level indicating
+    the importance of the message; you can control the verbosity of the
+    program by adjusting the maximum trace level that will be displayed.
+
+You may, if you wish, simply replace the entire JPEG error handling module
+(jerror.c) with your own code.  However, you can avoid code duplication by
+only replacing some of the routines depending on the behavior you need.
+This is accomplished by calling jpeg_std_error() as usual, but then overriding
+some of the method pointers in the jpeg_error_mgr struct, as illustrated by
+example.c.
+
+All of the error handling routines will receive a pointer to the JPEG object
+(a j_common_ptr which points to either a jpeg_compress_struct or a
+jpeg_decompress_struct; if you need to tell which, test the is_decompressor
+field).  This struct includes a pointer to the error manager struct in its
+"err" field.  Frequently, custom error handler routines will need to access
+additional data which is not known to the JPEG library or the standard error
+handler.  The most convenient way to do this is to embed either the JPEG
+object or the jpeg_error_mgr struct in a larger structure that contains
+additional fields; then casting the passed pointer provides access to the
+additional fields.  Again, see example.c for one way to do it.  (Beginning
+with IJG version 6b, there is also a void pointer "client_data" in each
+JPEG object, which the application can also use to find related data.
+The library does not touch client_data at all.)
+
+The individual methods that you might wish to override are:
+
+error_exit (j_common_ptr cinfo)
+        Receives control for a fatal error.  Information sufficient to
+        generate the error message has been stored in cinfo->err; call
+        output_message to display it.  Control must NOT return to the caller;
+        generally this routine will exit() or longjmp() somewhere.
+        Typically you would override this routine to get rid of the exit()
+        default behavior.  Note that if you continue processing, you should
+        clean up the JPEG object with jpeg_abort() or jpeg_destroy().
+
+output_message (j_common_ptr cinfo)
+        Actual output of any JPEG message.  Override this to send messages
+        somewhere other than stderr.  Note that this method does not know
+        how to generate a message, only where to send it.
+
+format_message (j_common_ptr cinfo, char *buffer)
+        Constructs a readable error message string based on the error info
+        stored in cinfo->err.  This method is called by output_message.  Few
+        applications should need to override this method.  One possible
+        reason for doing so is to implement dynamic switching of error message
+        language.
+
+emit_message (j_common_ptr cinfo, int msg_level)
+        Decide whether or not to emit a warning or trace message; if so,
+        calls output_message.  The main reason for overriding this method
+        would be to abort on warnings.  msg_level is -1 for warnings,
+        0 and up for trace messages.
+
+Only error_exit() and emit_message() are called from the rest of the JPEG
+library; the other two are internal to the error handler.
+
+The actual message texts are stored in an array of strings which is pointed to
+by the field err->jpeg_message_table.  The messages are numbered from 0 to
+err->last_jpeg_message, and it is these code numbers that are used in the
+JPEG library code.  You could replace the message texts (for instance, with
+messages in French or German) by changing the message table pointer.  See
+jerror.h for the default texts.  CAUTION: this table will almost certainly
+change or grow from one library version to the next.
+
+It may be useful for an application to add its own message texts that are
+handled by the same mechanism.  The error handler supports a second "add-on"
+message table for this purpose.  To define an addon table, set the pointer
+err->addon_message_table and the message numbers err->first_addon_message and
+err->last_addon_message.  If you number the addon messages beginning at 1000
+or so, you won't have to worry about conflicts with the library's built-in
+messages.  See the sample applications cjpeg/djpeg for an example of using
+addon messages (the addon messages are defined in cderror.h).
+
+Actual invocation of the error handler is done via macros defined in jerror.h:
+        ERREXITn(...)   for fatal errors
+        WARNMSn(...)    for corrupt-data warnings
+        TRACEMSn(...)   for trace and informational messages.
+These macros store the message code and any additional parameters into the
+error handler struct, then invoke the error_exit() or emit_message() method.
+The variants of each macro are for varying numbers of additional parameters.
+The additional parameters are inserted into the generated message using
+standard printf() format codes.
+
+See jerror.h and jerror.c for further details.
+
+
+Compressed data handling (source and destination managers)
+----------------------------------------------------------
+
+The JPEG compression library sends its compressed data to a "destination
+manager" module.  The default destination manager just writes the data to a
+memory buffer or to a stdio stream, but you can provide your own manager to
+do something else.  Similarly, the decompression library calls a "source
+manager" to obtain the compressed data; you can provide your own source
+manager if you want the data to come from somewhere other than a memory
+buffer or a stdio stream.
+
+In both cases, compressed data is processed a bufferload at a time: the
+destination or source manager provides a work buffer, and the library invokes
+the manager only when the buffer is filled or emptied.  (You could define a
+one-character buffer to force the manager to be invoked for each byte, but
+that would be rather inefficient.)  The buffer's size and location are
+controlled by the manager, not by the library.  For example, the memory
+source manager just makes the buffer pointer and length point to the original
+data in memory.  In this case the buffer-reload procedure will be invoked
+only if the decompressor ran off the end of the datastream, which would
+indicate an erroneous datastream.
+
+The work buffer is defined as an array of datatype JOCTET, which is generally
+"char" or "unsigned char".  On a machine where char is not exactly 8 bits
+wide, you must define JOCTET as a wider data type and then modify the data
+source and destination modules to transcribe the work arrays into 8-bit units
+on external storage.
+
+A data destination manager struct contains a pointer and count defining the
+next byte to write in the work buffer and the remaining free space:
+
+        JOCTET *next_output_byte;   /* => next byte to write in buffer */
+        size_t free_in_buffer;      /* # of byte spaces remaining in buffer */
+
+The library increments the pointer and decrements the count until the buffer
+is filled.  The manager's empty_output_buffer method must reset the pointer
+and count.  The manager is expected to remember the buffer's starting address
+and total size in private fields not visible to the library.
+
+A data destination manager provides three methods:
+
+init_destination (j_compress_ptr cinfo)
+        Initialize destination.  This is called by jpeg_start_compress()
+        before any data is actually written.  It must initialize
+        next_output_byte and free_in_buffer.  free_in_buffer must be
+        initialized to a positive value.
+
+empty_output_buffer (j_compress_ptr cinfo)
+        This is called whenever the buffer has filled (free_in_buffer
+        reaches zero).  In typical applications, it should write out the
+        *entire* buffer (use the saved start address and buffer length;
+        ignore the current state of next_output_byte and free_in_buffer).
+        Then reset the pointer & count to the start of the buffer, and
+        return TRUE indicating that the buffer has been dumped.
+        free_in_buffer must be set to a positive value when TRUE is
+        returned.  A FALSE return should only be used when I/O suspension is
+        desired (this operating mode is discussed in the next section).
+
+term_destination (j_compress_ptr cinfo)
+        Terminate destination --- called by jpeg_finish_compress() after all
+        data has been written.  In most applications, this must flush any
+        data remaining in the buffer.  Use either next_output_byte or
+        free_in_buffer to determine how much data is in the buffer.
+
+term_destination() is NOT called by jpeg_abort() or jpeg_destroy().  If you
+want the destination manager to be cleaned up during an abort, you must do it
+yourself.
+
+You will also need code to create a jpeg_destination_mgr struct, fill in its
+method pointers, and insert a pointer to the struct into the "dest" field of
+the JPEG compression object.  This can be done in-line in your setup code if
+you like, but it's probably cleaner to provide a separate routine similar to
+the jpeg_stdio_dest() or jpeg_mem_dest() routines of the supplied destination
+managers.
+
+Decompression source managers follow a parallel design, but with some
+additional frammishes.  The source manager struct contains a pointer and count
+defining the next byte to read from the work buffer and the number of bytes
+remaining:
+
+        const JOCTET *next_input_byte;  /* => next byte to read from buffer */
+        size_t bytes_in_buffer;         /* # of bytes remaining in buffer */
+
+The library increments the pointer and decrements the count until the buffer
+is emptied.  The manager's fill_input_buffer method must reset the pointer and
+count.  In most applications, the manager must remember the buffer's starting
+address and total size in private fields not visible to the library.
+
+A data source manager provides five methods:
+
+init_source (j_decompress_ptr cinfo)
+        Initialize source.  This is called by jpeg_read_header() before any
+        data is actually read.  Unlike init_destination(), it may leave
+        bytes_in_buffer set to 0 (in which case a fill_input_buffer() call
+        will occur immediately).
+
+fill_input_buffer (j_decompress_ptr cinfo)
+        This is called whenever bytes_in_buffer has reached zero and more
+        data is wanted.  In typical applications, it should read fresh data
+        into the buffer (ignoring the current state of next_input_byte and
+        bytes_in_buffer), reset the pointer & count to the start of the
+        buffer, and return TRUE indicating that the buffer has been reloaded.
+        It is not necessary to fill the buffer entirely, only to obtain at
+        least one more byte.  bytes_in_buffer MUST be set to a positive value
+        if TRUE is returned.  A FALSE return should only be used when I/O
+        suspension is desired (this mode is discussed in the next section).
+
+skip_input_data (j_decompress_ptr cinfo, long num_bytes)
+        Skip num_bytes worth of data.  The buffer pointer and count should
+        be advanced over num_bytes input bytes, refilling the buffer as
+        needed.  This is used to skip over a potentially large amount of
+        uninteresting data (such as an APPn marker).  In some applications
+        it may be possible to optimize away the reading of the skipped data,
+        but it's not clear that being smart is worth much trouble; large
+        skips are uncommon.  bytes_in_buffer may be zero on return.
+        A zero or negative skip count should be treated as a no-op.
+
+resync_to_restart (j_decompress_ptr cinfo, int desired)
+        This routine is called only when the decompressor has failed to find
+        a restart (RSTn) marker where one is expected.  Its mission is to
+        find a suitable point for resuming decompression.  For most
+        applications, we recommend that you just use the default resync
+        procedure, jpeg_resync_to_restart().  However, if you are able to back
+        up in the input data stream, or if you have a-priori knowledge about
+        the likely location of restart markers, you may be able to do better.
+        Read the read_restart_marker() and jpeg_resync_to_restart() routines
+        in jdmarker.c if you think you'd like to implement your own resync
+        procedure.
+
+term_source (j_decompress_ptr cinfo)
+        Terminate source --- called by jpeg_finish_decompress() after all
+        data has been read.  Often a no-op.
+
+For both fill_input_buffer() and skip_input_data(), there is no such thing
+as an EOF return.  If the end of the file has been reached, the routine has
+a choice of exiting via ERREXIT() or inserting fake data into the buffer.
+In most cases, generating a warning message and inserting a fake EOI marker
+is the best course of action --- this will allow the decompressor to output
+however much of the image is there.  In pathological cases, the decompressor
+may swallow the EOI and again demand data ... just keep feeding it fake EOIs.
+jdatasrc.c illustrates the recommended error recovery behavior.
+
+term_source() is NOT called by jpeg_abort() or jpeg_destroy().  If you want
+the source manager to be cleaned up during an abort, you must do it yourself.
+
+You will also need code to create a jpeg_source_mgr struct, fill in its method
+pointers, and insert a pointer to the struct into the "src" field of the JPEG
+decompression object.  This can be done in-line in your setup code if you
+like, but it's probably cleaner to provide a separate routine similar to the
+jpeg_stdio_src() or jpeg_mem_src() routines of the supplied source managers.
+
+For more information, consult the memory and stdio source and destination
+managers in jdatasrc.c and jdatadst.c.
+
+
+I/O suspension
+--------------
+
+Some applications need to use the JPEG library as an incremental memory-to-
+memory filter: when the compressed data buffer is filled or emptied, they want
+control to return to the outer loop, rather than expecting that the buffer can
+be emptied or reloaded within the data source/destination manager subroutine.
+The library supports this need by providing an "I/O suspension" mode, which we
+describe in this section.
+
+The I/O suspension mode is not a panacea: nothing is guaranteed about the
+maximum amount of time spent in any one call to the library, so it will not
+eliminate response-time problems in single-threaded applications.  If you
+need guaranteed response time, we suggest you "bite the bullet" and implement
+a real multi-tasking capability.
+
+To use I/O suspension, cooperation is needed between the calling application
+and the data source or destination manager; you will always need a custom
+source/destination manager.  (Please read the previous section if you haven't
+already.)  The basic idea is that the empty_output_buffer() or
+fill_input_buffer() routine is a no-op, merely returning FALSE to indicate
+that it has done nothing.  Upon seeing this, the JPEG library suspends
+operation and returns to its caller.  The surrounding application is
+responsible for emptying or refilling the work buffer before calling the
+JPEG library again.
+
+Compression suspension:
+
+For compression suspension, use an empty_output_buffer() routine that returns
+FALSE; typically it will not do anything else.  This will cause the
+compressor to return to the caller of jpeg*_write_scanlines(), with the return
+value indicating that not all the supplied scanlines have been accepted.
+The application must make more room in the output buffer, adjust the output
+buffer pointer/count appropriately, and then call jpeg*_write_scanlines()
+again, pointing to the first unconsumed scanline.
+
+When forced to suspend, the compressor will backtrack to a convenient stopping
+point (usually the start of the current MCU); it will regenerate some output
+data when restarted.  Therefore, although empty_output_buffer() is only
+called when the buffer is filled, you should NOT write out the entire buffer
+after a suspension.  Write only the data up to the current position of
+next_output_byte/free_in_buffer.  The data beyond that point will be
+regenerated after resumption.
+
+Because of the backtracking behavior, a good-size output buffer is essential
+for efficiency; you don't want the compressor to suspend often.  (In fact, an
+overly small buffer could lead to infinite looping, if a single MCU required
+more data than would fit in the buffer.)  We recommend a buffer of at least
+several Kbytes.  You may want to insert explicit code to ensure that you don't
+call jpeg*_write_scanlines() unless there is a reasonable amount of space in
+the output buffer; in other words, flush the buffer before trying to compress
+more data.
+
+The compressor does not allow suspension while it is trying to write JPEG
+markers at the beginning and end of the file.  This means that:
+  * At the beginning of a compression operation, there must be enough free
+    space in the output buffer to hold the header markers (typically 600 or
+    so bytes).  The recommended buffer size is bigger than this anyway, so
+    this is not a problem as long as you start with an empty buffer.  However,
+    this restriction might catch you if you insert large special markers, such
+    as a JFIF thumbnail image, without flushing the buffer afterwards.
+  * When you call jpeg_finish_compress(), there must be enough space in the
+    output buffer to emit any buffered data and the final EOI marker.  In the
+    current implementation, half a dozen bytes should suffice for this, but
+    for safety's sake we recommend ensuring that at least 100 bytes are free
+    before calling jpeg_finish_compress().
+
+A more significant restriction is that jpeg_finish_compress() cannot suspend.
+This means you cannot use suspension with multi-pass operating modes, namely
+Huffman code optimization and multiple-scan output.  Those modes write the
+whole file during jpeg_finish_compress(), which will certainly result in
+buffer overrun.  (Note that this restriction applies only to compression,
+not decompression.  The decompressor supports input suspension in all of its
+operating modes.)
+
+Decompression suspension:
+
+For decompression suspension, use a fill_input_buffer() routine that simply
+returns FALSE (except perhaps during error recovery, as discussed below).
+This will cause the decompressor to return to its caller with an indication
+that suspension has occurred.  This can happen at four places:
+  * jpeg_read_header(): will return JPEG_SUSPENDED.
+  * jpeg_start_decompress(): will return FALSE, rather than its usual TRUE.
+  * jpeg*_read_scanlines(): will return the number of scanlines already
+        completed (possibly 0).
+  * jpeg_finish_decompress(): will return FALSE, rather than its usual TRUE.
+The surrounding application must recognize these cases, load more data into
+the input buffer, and repeat the call.  In the case of jpeg*_read_scanlines(),
+increment the passed pointers past any scanlines successfully read.
+
+Just as with compression, the decompressor will typically backtrack to a
+convenient restart point before suspending.  When fill_input_buffer() is
+called, next_input_byte/bytes_in_buffer point to the current restart point,
+which is where the decompressor will backtrack to if FALSE is returned.
+The data beyond that position must NOT be discarded if you suspend; it needs
+to be re-read upon resumption.  In most implementations, you'll need to shift
+this data down to the start of your work buffer and then load more data after
+it.  Again, this behavior means that a several-Kbyte work buffer is essential
+for decent performance; furthermore, you should load a reasonable amount of
+new data before resuming decompression.  (If you loaded, say, only one new
+byte each time around, you could waste a LOT of cycles.)
+
+The skip_input_data() source manager routine requires special care in a
+suspension scenario.  This routine is NOT granted the ability to suspend the
+decompressor; it can decrement bytes_in_buffer to zero, but no more.  If the
+requested skip distance exceeds the amount of data currently in the input
+buffer, then skip_input_data() must set bytes_in_buffer to zero and record the
+additional skip distance somewhere else.  The decompressor will immediately
+call fill_input_buffer(), which should return FALSE, which will cause a
+suspension return.  The surrounding application must then arrange to discard
+the recorded number of bytes before it resumes loading the input buffer.
+(Yes, this design is rather baroque, but it avoids complexity in the far more
+common case where a non-suspending source manager is used.)
+
+If the input data has been exhausted, we recommend that you emit a warning
+and insert dummy EOI markers just as a non-suspending data source manager
+would do.  This can be handled either in the surrounding application logic or
+within fill_input_buffer(); the latter is probably more efficient.  If
+fill_input_buffer() knows that no more data is available, it can set the
+pointer/count to point to a dummy EOI marker and then return TRUE just as
+though it had read more data in a non-suspending situation.
+
+The decompressor does not attempt to suspend within standard JPEG markers;
+instead it will backtrack to the start of the marker and reprocess the whole
+marker next time.  Hence the input buffer must be large enough to hold the
+longest standard marker in the file.  Standard JPEG markers should normally
+not exceed a few hundred bytes each (DHT tables are typically the longest).
+We recommend at least a 2K buffer for performance reasons, which is much
+larger than any correct marker is likely to be.  For robustness against
+damaged marker length counts, you may wish to insert a test in your
+application for the case that the input buffer is completely full and yet
+the decoder has suspended without consuming any data --- otherwise, if this
+situation did occur, it would lead to an endless loop.  (The library can't
+provide this test since it has no idea whether "the buffer is full", or
+even whether there is a fixed-size input buffer.)
+
+The input buffer would need to be 64K to allow for arbitrary COM or APPn
+markers, but these are handled specially: they are either saved into allocated
+memory, or skipped over by calling skip_input_data().  In the former case,
+suspension is handled correctly, and in the latter case, the problem of
+buffer overrun is placed on skip_input_data's shoulders, as explained above.
+Note that if you provide your own marker handling routine for large markers,
+you should consider how to deal with buffer overflow.
+
+Multiple-buffer management:
+
+In some applications it is desirable to store the compressed data in a linked
+list of buffer areas, so as to avoid data copying.  This can be handled by
+having empty_output_buffer() or fill_input_buffer() set the pointer and count
+to reference the next available buffer; FALSE is returned only if no more
+buffers are available.  Although seemingly straightforward, there is a
+pitfall in this approach: the backtrack that occurs when FALSE is returned
+could back up into an earlier buffer.  For example, when fill_input_buffer()
+is called, the current pointer & count indicate the backtrack restart point.
+Since fill_input_buffer() will set the pointer and count to refer to a new
+buffer, the restart position must be saved somewhere else.  Suppose a second
+call to fill_input_buffer() occurs in the same library call, and no
+additional input data is available, so fill_input_buffer must return FALSE.
+If the JPEG library has not moved the pointer/count forward in the current
+buffer, then *the correct restart point is the saved position in the prior
+buffer*.  Prior buffers may be discarded only after the library establishes
+a restart point within a later buffer.  Similar remarks apply for output into
+a chain of buffers.
+
+The library will never attempt to backtrack over a skip_input_data() call,
+so any skipped data can be permanently discarded.  You still have to deal
+with the case of skipping not-yet-received data, however.
+
+It's much simpler to use only a single buffer; when fill_input_buffer() is
+called, move any unconsumed data (beyond the current pointer/count) down to
+the beginning of this buffer and then load new data into the remaining buffer
+space.  This approach requires a little more data copying but is far easier
+to get right.
+
+
+Progressive JPEG support
+------------------------
+
+Progressive JPEG rearranges the stored data into a series of scans of
+increasing quality.  In situations where a JPEG file is transmitted across a
+slow communications link, a decoder can generate a low-quality image very
+quickly from the first scan, then gradually improve the displayed quality as
+more scans are received.  The final image after all scans are complete is
+identical to that of a regular (sequential) JPEG file of the same quality
+setting.  Progressive JPEG files are often slightly smaller than equivalent
+sequential JPEG files, but the possibility of incremental display is the main
+reason for using progressive JPEG.
+
+The IJG encoder library generates progressive JPEG files when given a
+suitable "scan script" defining how to divide the data into scans.
+Creation of progressive JPEG files is otherwise transparent to the encoder.
+Progressive JPEG files can also be read transparently by the decoder library.
+If the decoding application simply uses the library as defined above, it
+will receive a final decoded image without any indication that the file was
+progressive.  Of course, this approach does not allow incremental display.
+To perform incremental display, an application needs to use the decoder
+library's "buffered-image" mode, in which it receives a decoded image
+multiple times.
+
+Each displayed scan requires about as much work to decode as a full JPEG
+image of the same size, so the decoder must be fairly fast in relation to the
+data transmission rate in order to make incremental display useful.  However,
+it is possible to skip displaying the image and simply add the incoming bits
+to the decoder's coefficient buffer.  This is fast because only Huffman
+decoding need be done, not IDCT, upsampling, colorspace conversion, etc.
+The IJG decoder library allows the application to switch dynamically between
+displaying the image and simply absorbing the incoming bits.  A properly
+coded application can automatically adapt the number of display passes to
+suit the time available as the image is received.  Also, a final
+higher-quality display cycle can be performed from the buffered data after
+the end of the file is reached.
+
+Progressive compression:
+
+To create a progressive JPEG file (or a multiple-scan sequential JPEG file),
+set the scan_info cinfo field to point to an array of scan descriptors, and
+perform compression as usual.  Instead of constructing your own scan list,
+you can call the jpeg_simple_progression() helper routine to create a
+recommended progression sequence; this method should be used by all
+applications that don't want to get involved in the nitty-gritty of
+progressive scan sequence design.  (If you want to provide user control of
+scan sequences, you may wish to borrow the scan script reading code found
+in rdswitch.c, so that you can read scan script files just like cjpeg's.)
+When scan_info is not NULL, the compression library will store DCT'd data
+into a buffer array as jpeg*_write_scanlines() is called, and will emit all
+the requested scans during jpeg_finish_compress().  This implies that
+multiple-scan output cannot be created with a suspending data destination
+manager, since jpeg_finish_compress() does not support suspension.  We
+should also note that the compressor currently forces Huffman optimization
+mode when creating a progressive JPEG file, because the default Huffman
+tables are unsuitable for progressive files.
+
+Progressive decompression:
+
+When buffered-image mode is not used, the decoder library will read all of
+a multi-scan file during jpeg_start_decompress(), so that it can provide a
+final decoded image.  (Here "multi-scan" means either progressive or
+multi-scan sequential.)  This makes multi-scan files transparent to the
+decoding application.  However, existing applications that used suspending
+input with version 5 of the IJG library will need to be modified to check
+for a suspension return from jpeg_start_decompress().
+
+To perform incremental display, an application must use the library's
+buffered-image mode.  This is described in the next section.
+
+
+Buffered-image mode
+-------------------
+
+In buffered-image mode, the library stores the partially decoded image in a
+coefficient buffer, from which it can be read out as many times as desired.
+This mode is typically used for incremental display of progressive JPEG files,
+but it can be used with any JPEG file.  Each scan of a progressive JPEG file
+adds more data (more detail) to the buffered image.  The application can
+display in lockstep with the source file (one display pass per input scan),
+or it can allow input processing to outrun display processing.  By making
+input and display processing run independently, it is possible for the
+application to adapt progressive display to a wide range of data transmission
+rates.
+
+The basic control flow for buffered-image decoding is
+
+        jpeg_create_decompress()
+        set data source
+        jpeg_read_header()
+        set overall decompression parameters
+        cinfo.buffered_image = TRUE;    /* select buffered-image mode */
+        jpeg_start_decompress()
+        for (each output pass) {
+            adjust output decompression parameters if required
+            jpeg_start_output()         /* start a new output pass */
+            for (all scanlines in image) {
+                jpeg_read_scanlines()   /* Use jpeg12_read_scanlines() for
+                                           12-bit data precision and
+                                           jpeg16_read_scanlines() for 16-bit
+                                           data precision. */
+                display scanlines
+            }
+            jpeg_finish_output()        /* terminate output pass */
+        }
+        jpeg_finish_decompress()
+        jpeg_destroy_decompress()
+
+This differs from ordinary unbuffered decoding in that there is an additional
+level of looping.  The application can choose how many output passes to make
+and how to display each pass.
+
+The simplest approach to displaying progressive images is to do one display
+pass for each scan appearing in the input file.  In this case the outer loop
+condition is typically
+        while (!jpeg_input_complete(&cinfo))
+and the start-output call should read
+        jpeg_start_output(&cinfo, cinfo.input_scan_number);
+The second parameter to jpeg_start_output() indicates which scan of the input
+file is to be displayed; the scans are numbered starting at 1 for this
+purpose.  (You can use a loop counter starting at 1 if you like, but using
+the library's input scan counter is easier.)  The library automatically reads
+data as necessary to complete each requested scan, and jpeg_finish_output()
+advances to the next scan or end-of-image marker (hence input_scan_number
+will be incremented by the time control arrives back at jpeg_start_output()).
+With this technique, data is read from the input file only as needed, and
+input and output processing run in lockstep.
+
+After reading the final scan and reaching the end of the input file, the
+buffered image remains available; it can be read additional times by
+repeating the jpeg_start_output()/jpeg*_read_scanlines()/jpeg_finish_output()
+sequence.  For example, a useful technique is to use fast one-pass color
+quantization for display passes made while the image is arriving, followed by
+a final display pass using two-pass quantization for highest quality.  This
+is done by changing the library parameters before the final output pass.
+Changing parameters between passes is discussed in detail below.
+
+In general the last scan of a progressive file cannot be recognized as such
+until after it is read, so a post-input display pass is the best approach if
+you want special processing in the final pass.
+
+When done with the image, be sure to call jpeg_finish_decompress() to release
+the buffered image (or just use jpeg_destroy_decompress()).
+
+If input data arrives faster than it can be displayed, the application can
+cause the library to decode input data in advance of what's needed to produce
+output.  This is done by calling the routine jpeg_consume_input().
+The return value is one of the following:
+        JPEG_REACHED_SOS:    reached an SOS marker (the start of a new scan)
+        JPEG_REACHED_EOI:    reached the EOI marker (end of image)
+        JPEG_ROW_COMPLETED:  completed reading one MCU row of compressed data
+        JPEG_SCAN_COMPLETED: completed reading last MCU row of current scan
+        JPEG_SUSPENDED:      suspended before completing any of the above
+(JPEG_SUSPENDED can occur only if a suspending data source is used.)  This
+routine can be called at any time after initializing the JPEG object.  It
+reads some additional data and returns when one of the indicated significant
+events occurs.  (If called after the EOI marker is reached, it will
+immediately return JPEG_REACHED_EOI without attempting to read more data.)
+
+The library's output processing will automatically call jpeg_consume_input()
+whenever the output processing overtakes the input; thus, simple lockstep
+display requires no direct calls to jpeg_consume_input().  But by adding
+calls to jpeg_consume_input(), you can absorb data in advance of what is
+being displayed.  This has two benefits:
+  * You can limit buildup of unprocessed data in your input buffer.
+  * You can eliminate extra display passes by paying attention to the
+    state of the library's input processing.
+
+The first of these benefits only requires interspersing calls to
+jpeg_consume_input() with your display operations and any other processing
+you may be doing.  To avoid wasting cycles due to backtracking, it's best to
+call jpeg_consume_input() only after a hundred or so new bytes have arrived.
+This is discussed further under "I/O suspension", above.  (Note: the JPEG
+library currently is not thread-safe.  You must not call jpeg_consume_input()
+from one thread of control if a different library routine is working on the
+same JPEG object in another thread.)
+
+When input arrives fast enough that more than one new scan is available
+before you start a new output pass, you may as well skip the output pass
+corresponding to the completed scan.  This occurs for free if you pass
+cinfo.input_scan_number as the target scan number to jpeg_start_output().
+The input_scan_number field is simply the index of the scan currently being
+consumed by the input processor.  You can ensure that this is up-to-date by
+emptying the input buffer just before calling jpeg_start_output(): call
+jpeg_consume_input() repeatedly until it returns JPEG_SUSPENDED or
+JPEG_REACHED_EOI.
+
+The target scan number passed to jpeg_start_output() is saved in the
+cinfo.output_scan_number field.  The library's output processing calls
+jpeg_consume_input() whenever the current input scan number and row within
+that scan is less than or equal to the current output scan number and row.
+Thus, input processing can "get ahead" of the output processing but is not
+allowed to "fall behind".  You can achieve several different effects by
+manipulating this interlock rule.  For example, if you pass a target scan
+number greater than the current input scan number, the output processor will
+wait until that scan starts to arrive before producing any output.  (To avoid
+an infinite loop, the target scan number is automatically reset to the last
+scan number when the end of image is reached.  Thus, if you specify a large
+target scan number, the library will just absorb the entire input file and
+then perform an output pass.  This is effectively the same as what
+jpeg_start_decompress() does when you don't select buffered-image mode.)
+When you pass a target scan number equal to the current input scan number,
+the image is displayed no faster than the current input scan arrives.  The
+final possibility is to pass a target scan number less than the current input
+scan number; this disables the input/output interlock and causes the output
+processor to simply display whatever it finds in the image buffer, without
+waiting for input.  (However, the library will not accept a target scan
+number less than one, so you can't avoid waiting for the first scan.)
+
+When data is arriving faster than the output display processing can advance
+through the image, jpeg_consume_input() will store data into the buffered
+image beyond the point at which the output processing is reading data out
+again.  If the input arrives fast enough, it may "wrap around" the buffer to
+the point where the input is more than one whole scan ahead of the output.
+If the output processing simply proceeds through its display pass without
+paying attention to the input, the effect seen on-screen is that the lower
+part of the image is one or more scans better in quality than the upper part.
+Then, when the next output scan is started, you have a choice of what target
+scan number to use.  The recommended choice is to use the current input scan
+number at that time, which implies that you've skipped the output scans
+corresponding to the input scans that were completed while you processed the
+previous output scan.  In this way, the decoder automatically adapts its
+speed to the arriving data, by skipping output scans as necessary to keep up
+with the arriving data.
+
+When using this strategy, you'll want to be sure that you perform a final
+output pass after receiving all the data; otherwise your last display may not
+be full quality across the whole screen.  So the right outer loop logic is
+something like this:
+        do {
+            absorb any waiting input by calling jpeg_consume_input()
+            final_pass = jpeg_input_complete(&cinfo);
+            adjust output decompression parameters if required
+            jpeg_start_output(&cinfo, cinfo.input_scan_number);
+            ...
+            jpeg_finish_output()
+        } while (!final_pass);
+rather than quitting as soon as jpeg_input_complete() returns TRUE.  This
+arrangement makes it simple to use higher-quality decoding parameters
+for the final pass.  But if you don't want to use special parameters for
+the final pass, the right loop logic is like this:
+        for (;;) {
+            absorb any waiting input by calling jpeg_consume_input()
+            jpeg_start_output(&cinfo, cinfo.input_scan_number);
+            ...
+            jpeg_finish_output()
+            if (jpeg_input_complete(&cinfo) &&
+                cinfo.input_scan_number == cinfo.output_scan_number)
+              break;
+        }
+In this case you don't need to know in advance whether an output pass is to
+be the last one, so it's not necessary to have reached EOF before starting
+the final output pass; rather, what you want to test is whether the output
+pass was performed in sync with the final input scan.  This form of the loop
+will avoid an extra output pass whenever the decoder is able (or nearly able)
+to keep up with the incoming data.
+
+When the data transmission speed is high, you might begin a display pass,
+then find that much or all of the file has arrived before you can complete
+the pass.  (You can detect this by noting the JPEG_REACHED_EOI return code
+from jpeg_consume_input(), or equivalently by testing jpeg_input_complete().)
+In this situation you may wish to abort the current display pass and start a
+new one using the newly arrived information.  To do so, just call
+jpeg_finish_output() and then start a new pass with jpeg_start_output().
+
+A variant strategy is to abort and restart display if more than one complete
+scan arrives during an output pass; this can be detected by noting
+JPEG_REACHED_SOS returns and/or examining cinfo.input_scan_number.  This
+idea should be employed with caution, however, since the display process
+might never get to the bottom of the image before being aborted, resulting
+in the lower part of the screen being several passes worse than the upper.
+In most cases it's probably best to abort an output pass only if the whole
+file has arrived and you want to begin the final output pass immediately.
+
+When receiving data across a communication link, we recommend always using
+the current input scan number for the output target scan number; if a
+higher-quality final pass is to be done, it should be started (aborting any
+incomplete output pass) as soon as the end of file is received.  However,
+many other strategies are possible.  For example, the application can examine
+the parameters of the current input scan and decide whether to display it or
+not.  If the scan contains only chroma data, one might choose not to use it
+as the target scan, expecting that the scan will be small and will arrive
+quickly.  To skip to the next scan, call jpeg_consume_input() until it
+returns JPEG_REACHED_SOS or JPEG_REACHED_EOI.  Or just use the next higher
+number as the target scan for jpeg_start_output(); but that method doesn't
+let you inspect the next scan's parameters before deciding to display it.
+
+
+In buffered-image mode, jpeg_start_decompress() never performs input and
+thus never suspends.  An application that uses input suspension with
+buffered-image mode must be prepared for suspension returns from these
+routines:
+* jpeg_start_output() performs input only if you request 2-pass quantization
+  and the target scan isn't fully read yet.  (This is discussed below.)
+* jpeg*_read_scanlines(), as always, returns the number of scanlines that it
+  was able to produce before suspending.
+* jpeg_finish_output() will read any markers following the target scan,
+  up to the end of the file or the SOS marker that begins another scan.
+  (But it reads no input if jpeg_consume_input() has already reached the
+  end of the file or a SOS marker beyond the target output scan.)
+* jpeg_finish_decompress() will read until the end of file, and thus can
+  suspend if the end hasn't already been reached (as can be tested by
+  calling jpeg_input_complete()).
+jpeg_start_output(), jpeg_finish_output(), and jpeg_finish_decompress()
+all return TRUE if they completed their tasks, FALSE if they had to suspend.
+In the event of a FALSE return, the application must load more input data
+and repeat the call.  Applications that use non-suspending data sources need
+not check the return values of these three routines.
+
+
+It is possible to change decoding parameters between output passes in the
+buffered-image mode.  The decoder library currently supports only very
+limited changes of parameters.  ONLY THE FOLLOWING parameter changes are
+allowed after jpeg_start_decompress() is called:
+* dct_method can be changed before each call to jpeg_start_output().
+  For example, one could use a fast DCT method for early scans, changing
+  to a higher quality method for the final scan.
+* dither_mode can be changed before each call to jpeg_start_output();
+  of course this has no impact if not using color quantization.  Typically
+  one would use ordered dither for initial passes, then switch to
+  Floyd-Steinberg dither for the final pass.  Caution: changing dither mode
+  can cause more memory to be allocated by the library.  Although the amount
+  of memory involved is not large (a scanline or so), it may cause the
+  initial max_memory_to_use specification to be exceeded, which in the worst
+  case would result in an out-of-memory failure.
+* do_block_smoothing can be changed before each call to jpeg_start_output().
+  This setting is relevant only when decoding a progressive JPEG image.
+  During the first DC-only scan, block smoothing provides a very "fuzzy" look
+  instead of the very "blocky" look seen without it; which is better seems a
+  matter of personal taste.  But block smoothing is nearly always a win
+  during later stages, especially when decoding a successive-approximation
+  image: smoothing helps to hide the slight blockiness that otherwise shows
+  up on smooth gradients until the lowest coefficient bits are sent.
+* Color quantization mode can be changed under the rules described below.
+  You *cannot* change between full-color and quantized output (because that
+  would alter the required I/O buffer sizes), but you can change which
+  quantization method is used.
+
+When generating color-quantized output, changing quantization method is a
+very useful way of switching between high-speed and high-quality display.
+The library allows you to change among its three quantization methods:
+1. Single-pass quantization to a fixed color cube.
+   Selected by cinfo.two_pass_quantize = FALSE and cinfo.colormap = NULL.
+2. Single-pass quantization to an application-supplied colormap.
+   Selected by setting cinfo.colormap to point to the colormap (the value of
+   two_pass_quantize is ignored); also set cinfo.actual_number_of_colors.
+3. Two-pass quantization to a colormap chosen specifically for the image.
+   Selected by cinfo.two_pass_quantize = TRUE and cinfo.colormap = NULL.
+   (This is the default setting selected by jpeg_read_header, but it is
+   probably NOT what you want for the first pass of progressive display!)
+These methods offer successively better quality and lesser speed.  However,
+only the first method is available for quantizing in non-RGB color spaces.
+
+IMPORTANT: because the different quantizer methods have very different
+working-storage requirements, the library requires you to indicate which
+one(s) you intend to use before you call jpeg_start_decompress().  (If we did
+not require this, the max_memory_to_use setting would be a complete fiction.)
+You do this by setting one or more of these three cinfo fields to TRUE:
+        enable_1pass_quant              Fixed color cube colormap
+        enable_external_quant           Externally-supplied colormap
+        enable_2pass_quant              Two-pass custom colormap
+All three are initialized FALSE by jpeg_read_header().  But
+jpeg_start_decompress() automatically sets TRUE the one selected by the
+current two_pass_quantize and colormap settings, so you only need to set the
+enable flags for any other quantization methods you plan to change to later.
+
+After setting the enable flags correctly at jpeg_start_decompress() time, you
+can change to any enabled quantization method by setting two_pass_quantize
+and colormap properly just before calling jpeg_start_output().  The following
+special rules apply:
+1. You must explicitly set cinfo.colormap to NULL when switching to 1-pass
+   or 2-pass mode from a different mode, or when you want the 2-pass
+   quantizer to be re-run to generate a new colormap.
+2. To switch to an external colormap, or to change to a different external
+   colormap than was used on the prior pass, you must call
+   jpeg_new_colormap() after setting cinfo.colormap.
+NOTE: if you want to use the same colormap as was used in the prior pass,
+you should not do either of these things.  This will save some nontrivial
+switchover costs.
+(These requirements exist because cinfo.colormap will always be non-NULL
+after completing a prior output pass, since both the 1-pass and 2-pass
+quantizers set it to point to their output colormaps.  Thus you have to
+do one of these two things to notify the library that something has changed.
+Yup, it's a bit klugy, but it's necessary to do it this way for backwards
+compatibility.)
+
+Note that in buffered-image mode, the library generates any requested colormap
+during jpeg_start_output(), not during jpeg_start_decompress().
+
+When using two-pass quantization, jpeg_start_output() makes a pass over the
+buffered image to determine the optimum color map; it therefore may take a
+significant amount of time, whereas ordinarily it does little work.  The
+progress monitor hook is called during this pass, if defined.  It is also
+important to realize that if the specified target scan number is greater than
+or equal to the current input scan number, jpeg_start_output() will attempt
+to consume input as it makes this pass.  If you use a suspending data source,
+you need to check for a FALSE return from jpeg_start_output() under these
+conditions.  The combination of 2-pass quantization and a not-yet-fully-read
+target scan is the only case in which jpeg_start_output() will consume input.
+
+
+Application authors who support buffered-image mode may be tempted to use it
+for all JPEG images, even single-scan ones.  This will work, but it is
+inefficient: there is no need to create an image-sized coefficient buffer for
+single-scan images.  Requesting buffered-image mode for such an image wastes
+memory.  Worse, it can cost time on large images, since the buffered data has
+to be swapped out or written to a temporary file.  If you are concerned about
+maximum performance on baseline JPEG files, you should use buffered-image
+mode only when the incoming file actually has multiple scans.  This can be
+tested by calling jpeg_has_multiple_scans(), which will return a correct
+result at any time after jpeg_read_header() completes.
+
+It is also worth noting that when you use jpeg_consume_input() to let input
+processing get ahead of output processing, the resulting pattern of access to
+the coefficient buffer is quite nonsequential.  It's best to use the memory
+manager jmemnobs.c if you can (ie, if you have enough real or virtual main
+memory).  If not, at least make sure that max_memory_to_use is set as high as
+possible.  If the JPEG memory manager has to use a temporary file, you will
+probably see a lot of disk traffic and poor performance.  (This could be
+improved with additional work on the memory manager, but we haven't gotten
+around to it yet.)
+
+In some applications it may be convenient to use jpeg_consume_input() for all
+input processing, including reading the initial markers; that is, you may
+wish to call jpeg_consume_input() instead of jpeg_read_header() during
+startup.  This works, but note that you must check for JPEG_REACHED_SOS and
+JPEG_REACHED_EOI return codes as the equivalent of jpeg_read_header's codes.
+Once the first SOS marker has been reached, you must call
+jpeg_start_decompress() before jpeg_consume_input() will consume more input;
+it'll just keep returning JPEG_REACHED_SOS until you do.  If you read a
+tables-only file this way, jpeg_consume_input() will return JPEG_REACHED_EOI
+without ever returning JPEG_REACHED_SOS; be sure to check for this case.
+If this happens, the decompressor will not read any more input until you call
+jpeg_abort() to reset it.  It is OK to call jpeg_consume_input() even when not
+using buffered-image mode, but in that case it's basically a no-op after the
+initial markers have been read: it will just return JPEG_SUSPENDED.
+
+
+Abbreviated datastreams and multiple images
+-------------------------------------------
+
+A JPEG compression or decompression object can be reused to process multiple
+images.  This saves a small amount of time per image by eliminating the
+"create" and "destroy" operations, but that isn't the real purpose of the
+feature.  Rather, reuse of an object provides support for abbreviated JPEG
+datastreams.  Object reuse can also simplify processing a series of images in
+a single input or output file.  This section explains these features.
+
+A JPEG file normally contains several hundred bytes worth of quantization
+and Huffman tables.  In a situation where many images will be stored or
+transmitted with identical tables, this may represent an annoying overhead.
+The JPEG standard therefore permits tables to be omitted.  The standard
+defines three classes of JPEG datastreams:
+  * "Interchange" datastreams contain an image and all tables needed to decode
+     the image.  These are the usual kind of JPEG file.
+  * "Abbreviated image" datastreams contain an image, but are missing some or
+    all of the tables needed to decode that image.
+  * "Abbreviated table specification" (henceforth "tables-only") datastreams
+    contain only table specifications.
+To decode an abbreviated image, it is necessary to load the missing table(s)
+into the decoder beforehand.  This can be accomplished by reading a separate
+tables-only file.  A variant scheme uses a series of images in which the first
+image is an interchange (complete) datastream, while subsequent ones are
+abbreviated and rely on the tables loaded by the first image.  It is assumed
+that once the decoder has read a table, it will remember that table until a
+new definition for the same table number is encountered.
+
+It is the application designer's responsibility to figure out how to associate
+the correct tables with an abbreviated image.  While abbreviated datastreams
+can be useful in a closed environment, their use is strongly discouraged in
+any situation where data exchange with other applications might be needed.
+Caveat designer.
+
+The JPEG library provides support for reading and writing any combination of
+tables-only datastreams and abbreviated images.  In both compression and
+decompression objects, a quantization or Huffman table will be retained for
+the lifetime of the object, unless it is overwritten by a new table definition.
+
+
+To create abbreviated image datastreams, it is only necessary to tell the
+compressor not to emit some or all of the tables it is using.  Each
+quantization and Huffman table struct contains a boolean field "sent_table",
+which normally is initialized to FALSE.  For each table used by the image, the
+header-writing process emits the table and sets sent_table = TRUE unless it is
+already TRUE.  (In normal usage, this prevents outputting the same table
+definition multiple times, as would otherwise occur because the chroma
+components typically share tables.)  Thus, setting this field to TRUE before
+calling jpeg_start_compress() will prevent the table from being written at
+all.
+
+If you want to create a "pure" abbreviated image file containing no tables,
+just call "jpeg_suppress_tables(&cinfo, TRUE)" after constructing all the
+tables.  If you want to emit some but not all tables, you'll need to set the
+individual sent_table fields directly.
+
+To create an abbreviated image, you must also call jpeg_start_compress()
+with a second parameter of FALSE, not TRUE.  Otherwise jpeg_start_compress()
+will force all the sent_table fields to FALSE.  (This is a safety feature to
+prevent abbreviated images from being created accidentally.)
+
+To create a tables-only file, perform the same parameter setup that you
+normally would, but instead of calling jpeg_start_compress() and so on, call
+jpeg_write_tables(&cinfo).  This will write an abbreviated datastream
+containing only SOI, DQT and/or DHT markers, and EOI.  All the quantization
+and Huffman tables that are currently defined in the compression object will
+be emitted unless their sent_tables flag is already TRUE, and then all the
+sent_tables flags will be set TRUE.
+
+A sure-fire way to create matching tables-only and abbreviated image files
+is to proceed as follows:
+
+        create JPEG compression object
+        set JPEG parameters
+        set destination to tables-only file
+        jpeg_write_tables(&cinfo);
+        set destination to image file
+        jpeg_start_compress(&cinfo, FALSE);
+        write data...
+        jpeg_finish_compress(&cinfo);
+
+Since the JPEG parameters are not altered between writing the table file and
+the abbreviated image file, the same tables are sure to be used.  Of course,
+you can repeat the jpeg_start_compress() ... jpeg_finish_compress() sequence
+many times to produce many abbreviated image files matching the table file.
+
+You cannot suppress output of the computed Huffman tables when Huffman
+optimization is selected.  (If you could, there'd be no way to decode the
+image...)  Generally, you don't want to set optimize_coding = TRUE when
+you are trying to produce abbreviated files.
+
+In some cases you might want to compress an image using tables which are
+not stored in the application, but are defined in an interchange or
+tables-only file readable by the application.  This can be done by setting up
+a JPEG decompression object to read the specification file, then copying the
+tables into your compression object.  See jpeg_copy_critical_parameters()
+for an example of copying quantization tables.
+
+
+To read abbreviated image files, you simply need to load the proper tables
+into the decompression object before trying to read the abbreviated image.
+If the proper tables are stored in the application program, you can just
+allocate the table structs and fill in their contents directly.  For example,
+to load a fixed quantization table into table slot "n":
+
+    if (cinfo.quant_tbl_ptrs[n] == NULL)
+      cinfo.quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) &cinfo);
+    quant_ptr = cinfo.quant_tbl_ptrs[n];        /* quant_ptr is JQUANT_TBL* */
+    for (i = 0; i < 64; i++) {
+      /* Qtable[] is desired quantization table, in natural array order */
+      quant_ptr->quantval[i] = Qtable[i];
+    }
+
+Code to load a fixed Huffman table is typically (for AC table "n"):
+
+    if (cinfo.ac_huff_tbl_ptrs[n] == NULL)
+      cinfo.ac_huff_tbl_ptrs[n] = jpeg_alloc_huff_table((j_common_ptr) &cinfo);
+    huff_ptr = cinfo.ac_huff_tbl_ptrs[n];       /* huff_ptr is JHUFF_TBL* */
+    for (i = 1; i <= 16; i++) {
+      /* counts[i] is number of Huffman codes of length i bits, i=1..16 */
+      huff_ptr->bits[i] = counts[i];
+    }
+    for (i = 0; i < 256; i++) {
+      /* symbols[] is the list of Huffman symbols, in code-length order */
+      huff_ptr->huffval[i] = symbols[i];
+    }
+
+(Note that trying to set cinfo.quant_tbl_ptrs[n] to point directly at a
+constant JQUANT_TBL object is not safe.  If the incoming file happened to
+contain a quantization table definition, your master table would get
+overwritten!  Instead allocate a working table copy and copy the master table
+into it, as illustrated above.  Ditto for Huffman tables, of course.)
+
+You might want to read the tables from a tables-only file, rather than
+hard-wiring them into your application.  The jpeg_read_header() call is
+sufficient to read a tables-only file.  You must pass a second parameter of
+FALSE to indicate that you do not require an image to be present.  Thus, the
+typical scenario is
+
+        create JPEG decompression object
+        set source to tables-only file
+        jpeg_read_header(&cinfo, FALSE);
+        set source to abbreviated image file
+        jpeg_read_header(&cinfo, TRUE);
+        set decompression parameters
+        jpeg_start_decompress(&cinfo);
+        read data...
+        jpeg_finish_decompress(&cinfo);
+
+In some cases, you may want to read a file without knowing whether it contains
+an image or just tables.  In that case, pass FALSE and check the return value
+from jpeg_read_header(): it will be JPEG_HEADER_OK if an image was found,
+JPEG_HEADER_TABLES_ONLY if only tables were found.  (A third return value,
+JPEG_SUSPENDED, is possible when using a suspending data source manager.)
+Note that jpeg_read_header() will not complain if you read an abbreviated
+image for which you haven't loaded the missing tables; the missing-table check
+occurs later, in jpeg_start_decompress().
+
+
+It is possible to read a series of images from a single source file by
+repeating the jpeg_read_header() ... jpeg_finish_decompress() sequence,
+without releasing/recreating the JPEG object or the data source module.
+(If you did reinitialize, any partial bufferload left in the data source
+buffer at the end of one image would be discarded, causing you to lose the
+start of the next image.)  When you use this method, stored tables are
+automatically carried forward, so some of the images can be abbreviated images
+that depend on tables from earlier images.
+
+If you intend to write a series of images into a single destination file,
+you might want to make a specialized data destination module that doesn't
+flush the output buffer at term_destination() time.  This would speed things
+up by some trifling amount.  Of course, you'd need to remember to flush the
+buffer after the last image.  You can make the later images be abbreviated
+ones by passing FALSE to jpeg_start_compress().
+
+
+Special markers
+---------------
+
+Some applications may need to insert or extract special data in the JPEG
+datastream.  The JPEG standard provides marker types "COM" (comment) and
+"APP0" through "APP15" (application) to hold application-specific data.
+Unfortunately, the use of these markers is not specified by the standard.
+COM markers are fairly widely used to hold user-supplied text.  The JFIF file
+format spec uses APP0 markers with specified initial strings to hold certain
+data.  Adobe applications use APP14 markers beginning with the string "Adobe"
+for miscellaneous data.  Other APPn markers are rarely seen, but might
+contain almost anything.
+
+If you wish to store user-supplied text, we recommend you use COM markers
+and place readable 7-bit ASCII text in them.  Newline conventions are not
+standardized --- expect to find LF (Unix style), CR/LF (DOS style), or CR
+(Mac style).  A robust COM reader should be able to cope with random binary
+garbage, including nulls, since some applications generate COM markers
+containing non-ASCII junk.  (But yours should not be one of them.)
+
+For program-supplied data, use an APPn marker, and be sure to begin it with an
+identifying string so that you can tell whether the marker is actually yours.
+It's probably best to avoid using APP0 or APP14 for any private markers.
+(NOTE: the upcoming SPIFF standard will use APP8 markers; we recommend you
+not use APP8 markers for any private purposes, either.)
+
+Keep in mind that at most 65533 bytes can be put into one marker, but you
+can have as many markers as you like.
+
+By default, the IJG compression library will write a JFIF APP0 marker if the
+selected JPEG colorspace is grayscale or YCbCr, or an Adobe APP14 marker if
+the selected colorspace is RGB, CMYK, or YCCK.  You can disable this, but
+we don't recommend it.  The decompression library will recognize JFIF and
+Adobe markers and will set the JPEG colorspace properly when one is found.
+
+
+You can write special markers immediately following the datastream header by
+calling jpeg_write_marker() after jpeg_start_compress() and before the first
+call to jpeg*_write_scanlines().  When you do this, the markers appear after
+the SOI and the JFIF APP0 and Adobe APP14 markers (if written), but before
+all else.  Specify the marker type parameter as "JPEG_COM" for COM or
+"JPEG_APP0 + n" for APPn.  (Actually, jpeg_write_marker will let you write
+any marker type, but we don't recommend writing any other kinds of marker.)
+For example, to write a user comment string pointed to by comment_text:
+        jpeg_write_marker(cinfo, JPEG_COM, comment_text, strlen(comment_text));
+
+If it's not convenient to store all the marker data in memory at once,
+you can instead call jpeg_write_m_header() followed by multiple calls to
+jpeg_write_m_byte().  If you do it this way, it's your responsibility to
+call jpeg_write_m_byte() exactly the number of times given in the length
+parameter to jpeg_write_m_header().  (This method lets you empty the
+output buffer partway through a marker, which might be important when
+using a suspending data destination module.  In any case, if you are using
+a suspending destination, you should flush its buffer after inserting
+any special markers.  See "I/O suspension".)
+
+Or, if you prefer to synthesize the marker byte sequence yourself,
+you can just cram it straight into the data destination module.
+
+If you are writing JFIF 1.02 extension markers (thumbnail images), don't
+forget to set cinfo.JFIF_minor_version = 2 so that the encoder will write the
+correct JFIF version number in the JFIF header marker.  The library's default
+is to write version 1.01, but that's wrong if you insert any 1.02 extension
+markers.  (We could probably get away with just defaulting to 1.02, but there
+used to be broken decoders that would complain about unknown minor version
+numbers.  To reduce compatibility risks it's safest not to write 1.02 unless
+you are actually using 1.02 extensions.)
+
+
+When reading, two methods of handling special markers are available:
+1. You can ask the library to save the contents of COM and/or APPn markers
+into memory, and then examine them at your leisure afterwards.
+2. You can supply your own routine to process COM and/or APPn markers
+on-the-fly as they are read.
+The first method is simpler to use, especially if you are using a suspending
+data source; writing a marker processor that copes with input suspension is
+not easy (consider what happens if the marker is longer than your available
+input buffer).  However, the second method conserves memory since the marker
+data need not be kept around after it's been processed.
+
+For either method, you'd normally set up marker handling after creating a
+decompression object and before calling jpeg_read_header(), because the
+markers of interest will typically be near the head of the file and so will
+be scanned by jpeg_read_header.  Once you've established a marker handling
+method, it will be used for the life of that decompression object
+(potentially many datastreams), unless you change it.  Marker handling is
+determined separately for COM markers and for each APPn marker code.
+
+
+To save the contents of special markers in memory, call
+        jpeg_save_markers(cinfo, marker_code, length_limit)
+where marker_code is the marker type to save, JPEG_COM or JPEG_APP0+n.
+(To arrange to save all the special marker types, you need to call this
+routine 17 times, for COM and APP0-APP15.)  If the incoming marker is longer
+than length_limit data bytes, only length_limit bytes will be saved; this
+parameter allows you to avoid chewing up memory when you only need to see the
+first few bytes of a potentially large marker.  If you want to save all the
+data, set length_limit to 0xFFFF; that is enough since marker lengths are only
+16 bits.  As a special case, setting length_limit to 0 prevents that marker
+type from being saved at all.  (That is the default behavior, in fact.)
+
+After jpeg_read_header() completes, you can examine the special markers by
+following the cinfo->marker_list pointer chain.  All the special markers in
+the file appear in this list, in order of their occurrence in the file (but
+omitting any markers of types you didn't ask for).  Both the original data
+length and the saved data length are recorded for each list entry; the latter
+will not exceed length_limit for the particular marker type.  Note that these
+lengths exclude the marker length word, whereas the stored representation
+within the JPEG file includes it.  (Hence the maximum data length is really
+only 65533.)
+
+It is possible that additional special markers appear in the file beyond the
+SOS marker at which jpeg_read_header stops; if so, the marker list will be
+extended during reading of the rest of the file.  This is not expected to be
+common, however.  If you are short on memory you may want to reset the length
+limit to zero for all marker types after finishing jpeg_read_header, to
+ensure that the max_memory_to_use setting cannot be exceeded due to addition
+of later markers.
+
+The marker list remains stored until you call jpeg_finish_decompress or
+jpeg_abort, at which point the memory is freed and the list is set to empty.
+(jpeg_destroy also releases the storage, of course.)
+
+Note that the library is internally interested in APP0 and APP14 markers;
+if you try to set a small nonzero length limit on these types, the library
+will silently force the length up to the minimum it wants.  (But you can set
+a zero length limit to prevent them from being saved at all.)  Also, in a
+16-bit environment, the maximum length limit may be constrained to less than
+65533 by malloc() limitations.  It is therefore best not to assume that the
+effective length limit is exactly what you set it to be.
+
+
+If you want to supply your own marker-reading routine, you do it by calling
+jpeg_set_marker_processor().  A marker processor routine must have the
+signature
+        boolean jpeg_marker_parser_method (j_decompress_ptr cinfo)
+Although the marker code is not explicitly passed, the routine can find it
+in cinfo->unread_marker.  At the time of call, the marker proper has been
+read from the data source module.  The processor routine is responsible for
+reading the marker length word and the remaining parameter bytes, if any.
+Return TRUE to indicate success.  (FALSE should be returned only if you are
+using a suspending data source and it tells you to suspend.  See the standard
+marker processors in jdmarker.c for appropriate coding methods if you need to
+use a suspending data source.)
+
+If you override the default APP0 or APP14 processors, it is up to you to
+recognize JFIF and Adobe markers if you want colorspace recognition to occur
+properly.  We recommend copying and extending the default processors if you
+want to do that.  (A better idea is to save these marker types for later
+examination by calling jpeg_save_markers(); that method doesn't interfere
+with the library's own processing of these markers.)
+
+jpeg_set_marker_processor() and jpeg_save_markers() are mutually exclusive
+--- if you call one it overrides any previous call to the other, for the
+particular marker type specified.
+
+A simple example of an external COM processor can be found in djpeg.c.
+Also, see jpegtran.c for an example of using jpeg_save_markers.
+
+
+ICC profiles
+------------
+
+Two functions are provided for writing and reading International Color
+Consortium (ICC) device profiles embedded in JFIF JPEG image files:
+
+        void jpeg_write_icc_profile (j_compress_ptr cinfo,
+                                     const JOCTET *icc_data_ptr,
+                                     unsigned int icc_data_len);
+        boolean jpeg_read_icc_profile (j_decompress_ptr cinfo,
+                                       JOCTET **icc_data_ptr,
+                                       unsigned int *icc_data_len);
+
+The ICC has defined a standard for including such data in JPEG "APP2" markers.
+The aforementioned functions do not know anything about the internal structure
+of the ICC profile data; they just know how to embed the profile data into a
+JPEG file while writing it, or to extract the profile data from a JPEG file
+while reading it.
+
+jpeg_write_icc_profile() must be called after calling jpeg_start_compress() and
+before the first call to jpeg*_write_scanlines() or jpeg*_write_raw_data().
+This ordering ensures that the APP2 marker(s) will appear after the SOI and
+JFIF or Adobe markers, but before all other data.
+
+jpeg_read_icc_profile() returns TRUE if an ICC profile was found and FALSE
+otherwise.  If an ICC profile was found, then the function will allocate a
+memory region containing the profile and will return a pointer to that memory
+region in *icc_data_ptr, as well as the length of the region in *icc_data_len.
+This memory region is allocated by the library using malloc() and must be freed
+by the caller using free() when the memory region is no longer needed.  Callers
+wishing to use jpeg_read_icc_profile() must call
+
+        jpeg_save_markers(cinfo, JPEG_APP0 + 2, 0xFFFF);
+
+prior to calling jpeg_read_header().  jpeg_read_icc_profile() can be called at
+any point between jpeg_read_header() and jpeg_finish_decompress().
+
+
+Raw (downsampled) image data
+----------------------------
+
+Some applications need to supply already-downsampled image data to the JPEG
+compressor, or to receive raw downsampled data from the decompressor.  The
+library supports this requirement by allowing the application to write or
+read raw data, bypassing the normal preprocessing or postprocessing steps.
+The interface is different from the standard one and is somewhat harder to
+use.  If your interest is merely in bypassing color conversion, we recommend
+that you use the standard interface and simply set jpeg_color_space =
+in_color_space (or jpeg_color_space = out_color_space for decompression).
+The mechanism described in this section is necessary only to supply or
+receive downsampled image data, in which not all components have the same
+dimensions.
+
+
+To compress raw data, you must supply the data in the colorspace to be used
+in the JPEG file (please read the earlier section on Special color spaces)
+and downsampled to the sampling factors specified in the JPEG parameters.
+You must supply the data in the format used internally by the JPEG library,
+namely a J*SAMPIMAGE array.  This is an array of pointers to two-dimensional
+arrays, each of type J*SAMPARRAY.  Each 2-D array holds the values for one
+color component.  This structure is necessary since the components are of
+different sizes.  If the image dimensions are not a multiple of the MCU size,
+you must also pad the data correctly (usually, this is done by replicating
+the last column and/or row).  The data must be padded to a multiple of a DCT
+block in each component: that is, each downsampled row must contain a
+multiple of 8 valid samples, and there must be a multiple of 8 sample rows
+for each component.  (For applications such as conversion of digital TV
+images, the standard image size is usually a multiple of the DCT block size,
+so that no padding need actually be done.)
+
+The procedure for compression of raw data is basically the same as normal
+compression, except that you call jpeg_write_raw_data() or
+jpeg12_write_raw_data() in place of jpeg_write_scanlines() or
+jpeg12_write_scanlines().  Before calling jpeg_start_compress(), you must do
+the following:
+  * Set cinfo->raw_data_in to TRUE.  (It is set FALSE by jpeg_set_defaults().)
+    This notifies the library that you will be supplying raw data.
+  * Ensure jpeg_color_space is correct --- an explicit jpeg_set_colorspace()
+    call is a good idea.  Note that since color conversion is bypassed,
+    in_color_space is ignored, except that jpeg_set_defaults() uses it to
+    choose the default jpeg_color_space setting.
+  * Ensure the sampling factors, cinfo->comp_info[i].h_samp_factor and
+    cinfo->comp_info[i].v_samp_factor, are correct.  Since these indicate the
+    dimensions of the data you are supplying, it's wise to set them
+    explicitly, rather than assuming the library's defaults are what you want.
+
+To pass raw data to the library, call jpeg*_write_raw_data() in place of
+jpeg*_write_scanlines().  The routines work similarly except that
+jpeg*_write_raw_data takes a J*SAMPIMAGE data array rather than J*SAMPARRAY.
+The scanlines count passed to and returned from jpeg*_write_raw_data is
+measured in terms of the component with the largest v_samp_factor.
+
+jpeg*_write_raw_data() processes one MCU row per call, which is to say
+v_samp_factor*DCTSIZE sample rows of each component.  The passed num_lines
+value must be at least max_v_samp_factor*DCTSIZE, and the return value will
+be exactly that amount (or possibly some multiple of that amount, in future
+library versions).  This is true even on the last call at the bottom of the
+image; don't forget to pad your data as necessary.
+
+The required dimensions of the supplied data can be computed for each
+component as
+        cinfo->comp_info[i].width_in_blocks*DCTSIZE  samples per row
+        cinfo->comp_info[i].height_in_blocks*DCTSIZE rows in image
+after jpeg_start_compress() has initialized those fields.  If the valid data
+is smaller than this, it must be padded appropriately.  For some sampling
+factors and image sizes, additional dummy DCT blocks are inserted to make
+the image a multiple of the MCU dimensions.  The library creates such dummy
+blocks itself; it does not read them from your supplied data.  Therefore you
+need never pad by more than DCTSIZE samples.  An example may help here.
+Assume 2h2v downsampling of YCbCr data, that is
+        cinfo->comp_info[0].h_samp_factor = 2           for Y
+        cinfo->comp_info[0].v_samp_factor = 2
+        cinfo->comp_info[1].h_samp_factor = 1           for Cb
+        cinfo->comp_info[1].v_samp_factor = 1
+        cinfo->comp_info[2].h_samp_factor = 1           for Cr
+        cinfo->comp_info[2].v_samp_factor = 1
+and suppose that the nominal image dimensions (cinfo->image_width and
+cinfo->image_height) are 101x101 pixels.  Then jpeg_start_compress() will
+compute downsampled_width = 101 and width_in_blocks = 13 for Y,
+downsampled_width = 51 and width_in_blocks = 7 for Cb and Cr (and the same
+for the height fields).  You must pad the Y data to at least 13*8 = 104
+columns and rows, the Cb/Cr data to at least 7*8 = 56 columns and rows.  The
+MCU height is max_v_samp_factor = 2 DCT rows so you must pass at least 16
+scanlines on each call to jpeg*_write_raw_data(), which is to say 16 actual
+sample rows of Y and 8 each of Cb and Cr.  A total of 7 MCU rows are needed,
+so you must pass a total of 7*16 = 112 "scanlines".  The last DCT block row
+of Y data is dummy, so it doesn't matter what you pass for it in the data
+arrays, but the scanlines count must total up to 112 so that all of the Cb
+and Cr data gets passed.
+
+Output suspension is supported with raw-data compression: if the data
+destination module suspends, jpeg*_write_raw_data() will return 0.
+In this case the same data rows must be passed again on the next call.
+
+
+Decompression with raw data output implies bypassing all postprocessing:
+you cannot ask for rescaling or color quantization, for instance.  More
+seriously, you must deal with the color space and sampling factors present in
+the incoming file.  If your application only handles, say, 2h1v YCbCr data,
+you must check for and fail on other color spaces or other sampling factors.
+The library will not convert to a different color space for you.
+
+To obtain raw data output, set cinfo->raw_data_out = TRUE before
+jpeg_start_decompress() (it is set FALSE by jpeg_read_header()).  Be sure to
+verify that the color space and sampling factors are ones you can handle.
+Then call jpeg_read_raw_data() or jpeg12_read_raw_data() in place of
+jpeg_read_scanlines() or jpeg12_read_scanlines().  The decompression process is
+otherwise the same as usual.
+
+jpeg*_read_raw_data() returns one MCU row per call, and thus you must pass a
+buffer of at least max_v_samp_factor*DCTSIZE scanlines (scanline counting is
+the same as for raw-data compression).  The buffer you pass must be large
+enough to hold the actual data plus padding to DCT-block boundaries.  As with
+compression, any entirely dummy DCT blocks are not processed so you need not
+allocate space for them, but the total scanline count includes them.  The
+above example of computing buffer dimensions for raw-data compression is
+equally valid for decompression.
+
+Input suspension is supported with raw-data decompression: if the data source
+module suspends, jpeg*_read_raw_data() will return 0.  You can also use
+buffered-image mode to read raw data in multiple passes.
+
+
+Really raw data: DCT coefficients
+---------------------------------
+
+It is possible to read or write the contents of a JPEG file as raw DCT
+coefficients.  This facility is mainly intended for use in lossless
+transcoding between different JPEG file formats.  Other possible applications
+include lossless cropping of a JPEG image, lossless reassembly of a
+multi-strip or multi-tile TIFF/JPEG file into a single JPEG datastream, etc.
+
+To read the contents of a JPEG file as DCT coefficients, open the file and do
+jpeg_read_header() as usual.  But instead of calling jpeg_start_decompress()
+and jpeg*_read_scanlines(), call jpeg_read_coefficients().  This will read the
+entire image into a set of virtual coefficient-block arrays, one array per
+component.  The return value is a pointer to an array of virtual-array
+descriptors.  Each virtual array can be accessed directly using the JPEG
+memory manager's access_virt_barray method (see Memory management, below,
+and also read structure.txt's discussion of virtual array handling).  Or,
+for simple transcoding to a different JPEG file format, the array list can
+just be handed directly to jpeg_write_coefficients().
+
+Each block in the block arrays contains quantized coefficient values in
+normal array order (not JPEG zigzag order).  The block arrays contain only
+DCT blocks containing real data; any entirely-dummy blocks added to fill out
+interleaved MCUs at the right or bottom edges of the image are discarded
+during reading and are not stored in the block arrays.  (The size of each
+block array can be determined from the width_in_blocks and height_in_blocks
+fields of the component's comp_info entry.)  This is also the data format
+expected by jpeg_write_coefficients().
+
+When you are done using the virtual arrays, call jpeg_finish_decompress()
+to release the array storage and return the decompression object to an idle
+state; or just call jpeg_destroy() if you don't need to reuse the object.
+
+If you use a suspending data source, jpeg_read_coefficients() will return
+NULL if it is forced to suspend; a non-NULL return value indicates successful
+completion.  You need not test for a NULL return value when using a
+non-suspending data source.
+
+It is also possible to call jpeg_read_coefficients() to obtain access to the
+decoder's coefficient arrays during a normal decode cycle in buffered-image
+mode.  This frammish might be useful for progressively displaying an incoming
+image and then re-encoding it without loss.  To do this, decode in buffered-
+image mode as discussed previously, then call jpeg_read_coefficients() after
+the last jpeg_finish_output() call.  The arrays will be available for your use
+until you call jpeg_finish_decompress().
+
+
+To write the contents of a JPEG file as DCT coefficients, you must provide
+the DCT coefficients stored in virtual block arrays.  You can either pass
+block arrays read from an input JPEG file by jpeg_read_coefficients(), or
+allocate virtual arrays from the JPEG compression object and fill them
+yourself.  In either case, jpeg_write_coefficients() is substituted for
+jpeg_start_compress() and jpeg*_write_scanlines().  Thus the sequence is
+  * Create compression object
+  * Set all compression parameters as necessary
+  * Request virtual arrays if needed
+  * jpeg_write_coefficients()
+  * jpeg_finish_compress()
+  * Destroy or re-use compression object
+jpeg_write_coefficients() is passed a pointer to an array of virtual block
+array descriptors; the number of arrays is equal to cinfo.num_components.
+
+The virtual arrays need only have been requested, not realized, before
+jpeg_write_coefficients() is called.  A side-effect of
+jpeg_write_coefficients() is to realize any virtual arrays that have been
+requested from the compression object's memory manager.  Thus, when obtaining
+the virtual arrays from the compression object, you should fill the arrays
+after calling jpeg_write_coefficients().  The data is actually written out
+when you call jpeg_finish_compress(); jpeg_write_coefficients() only writes
+the file header.
+
+When writing raw DCT coefficients, it is crucial that the JPEG quantization
+tables and sampling factors match the way the data was encoded, or the
+resulting file will be invalid.  For transcoding from an existing JPEG file,
+we recommend using jpeg_copy_critical_parameters().  This routine initializes
+all the compression parameters to default values (like jpeg_set_defaults()),
+then copies the critical information from a source decompression object.
+The decompression object should have just been used to read the entire
+JPEG input file --- that is, it should be awaiting jpeg_finish_decompress().
+
+jpeg_write_coefficients() marks all tables stored in the compression object
+as needing to be written to the output file (thus, it acts like
+jpeg_start_compress(cinfo, TRUE)).  This is for safety's sake, to avoid
+emitting abbreviated JPEG files by accident.  If you really want to emit an
+abbreviated JPEG file, call jpeg_suppress_tables(), or set the tables'
+individual sent_table flags, between calling jpeg_write_coefficients() and
+jpeg_finish_compress().
+
+
+Progress monitoring
+-------------------
+
+Some applications may need to regain control from the JPEG library every so
+often.  The typical use of this feature is to produce a percent-done bar or
+other progress display.  (For a simple example, see cjpeg.c or djpeg.c.)
+Although you do get control back frequently during the data-transferring pass
+(the jpeg*_read_scanlines or jpeg*_write_scanlines loop), any additional passes
+will occur inside jpeg_finish_compress or jpeg_start_decompress; those
+routines may take a long time to execute, and you don't get control back
+until they are done.
+
+You can define a progress-monitor routine which will be called periodically
+by the library.  No guarantees are made about how often this call will occur,
+so we don't recommend you use it for mouse tracking or anything like that.
+At present, a call will occur once per MCU row, scanline, or sample row
+group, whichever unit is convenient for the current processing mode; so the
+wider the image, the longer the time between calls.  During the data
+transferring pass, only one call occurs per call of jpeg*_read_scanlines or
+jpeg*_write_scanlines, so don't pass a large number of scanlines at once if
+you want fine resolution in the progress count.  (If you really need to use
+the callback mechanism for time-critical tasks like mouse tracking, you could
+insert additional calls inside some of the library's inner loops.)
+
+To establish a progress-monitor callback, create a struct jpeg_progress_mgr,
+fill in its progress_monitor field with a pointer to your callback routine,
+and set cinfo->progress to point to the struct.  The callback will be called
+whenever cinfo->progress is non-NULL.  (This pointer is set to NULL by
+jpeg_create_compress or jpeg_create_decompress; the library will not change
+it thereafter.  So if you allocate dynamic storage for the progress struct,
+make sure it will live as long as the JPEG object does.  Allocating from the
+JPEG memory manager with lifetime JPOOL_PERMANENT will work nicely.)  You
+can use the same callback routine for both compression and decompression.
+
+The jpeg_progress_mgr struct contains four fields which are set by the library:
+        long pass_counter;      /* work units completed in this pass */
+        long pass_limit;        /* total number of work units in this pass */
+        int completed_passes;   /* passes completed so far */
+        int total_passes;       /* total number of passes expected */
+During any one pass, pass_counter increases from 0 up to (not including)
+pass_limit; the step size is usually but not necessarily 1.  The pass_limit
+value may change from one pass to another.  The expected total number of
+passes is in total_passes, and the number of passes already completed is in
+completed_passes.  Thus the fraction of work completed may be estimated as
+                completed_passes + (pass_counter/pass_limit)
+                --------------------------------------------
+                                total_passes
+ignoring the fact that the passes may not be equal amounts of work.
+
+When decompressing, pass_limit can even change within a pass, because it
+depends on the number of scans in the JPEG file, which isn't always known in
+advance.  The computed fraction-of-work-done may jump suddenly (if the library
+discovers it has overestimated the number of scans) or even decrease (in the
+opposite case).  It is not wise to put great faith in the work estimate.
+
+When using the decompressor's buffered-image mode, the progress monitor work
+estimate is likely to be completely unhelpful, because the library has no way
+to know how many output passes will be demanded of it.  Currently, the library
+sets total_passes based on the assumption that there will be one more output
+pass if the input file end hasn't yet been read (jpeg_input_complete() isn't
+TRUE), but no more output passes if the file end has been reached when the
+output pass is started.  This means that total_passes will rise as additional
+output passes are requested.  If you have a way of determining the input file
+size, estimating progress based on the fraction of the file that's been read
+will probably be more useful than using the library's value.
+
+
+Memory management
+-----------------
+
+This section covers some key facts about the JPEG library's built-in memory
+manager.  For more info, please read structure.txt's section about the memory
+manager, and consult the source code if necessary.
+
+All memory and temporary file allocation within the library is done via the
+memory manager.  If necessary, you can replace the "back end" of the memory
+manager to control allocation yourself (for example, if you don't want the
+library to use malloc() and free() for some reason).
+
+Some data is allocated "permanently" and will not be freed until the JPEG
+object is destroyed.  Most data is allocated "per image" and is freed by
+jpeg_finish_compress, jpeg_finish_decompress, or jpeg_abort.  You can call the
+memory manager yourself to allocate structures that will automatically be
+freed at these times.  Typical code for this is
+  ptr = (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, size);
+Use JPOOL_PERMANENT to get storage that lasts as long as the JPEG object.
+Use alloc_large instead of alloc_small for anything bigger than a few Kbytes.
+There are also alloc_sarray and alloc_barray routines that automatically
+build 2-D sample or block arrays.
+
+The library's minimum space requirements to process an image depend on the
+image's width, but not on its height, because the library ordinarily works
+with "strip" buffers that are as wide as the image but just a few rows high.
+Some operating modes (eg, two-pass color quantization) require full-image
+buffers.  Such buffers are treated as "virtual arrays": only the current strip
+need be in memory, and the rest can be swapped out to a temporary file.
+
+When using temporary files, the library will make the in-memory buffers for
+its virtual arrays just big enough to stay within a "maximum memory" setting.
+Your application can set this limit by setting cinfo->mem->max_memory_to_use
+after creating the JPEG object.  (Of course, there is still a minimum size for
+the buffers, so the max-memory setting is effective only if it is bigger than
+the minimum space needed.)  If you allocate any large structures yourself, you
+must allocate them before jpeg_start_compress() or jpeg_start_decompress() in
+order to have them counted against the max memory limit.  Also keep in mind
+that space allocated with alloc_small() is ignored, on the assumption that
+it's too small to be worth worrying about; so a reasonable safety margin
+should be left when setting max_memory_to_use.
+
+NOTE: Unless you develop your own memory manager back end, then temporary files
+will never be used.  The back end provided in libjpeg-turbo (jmemnobs.c) simply
+malloc()s and free()s virtual arrays, and an error occurs if the required
+memory exceeds the limit specified in cinfo->mem->max_memory_to_use.
+
+
+Memory usage
+------------
+
+Working memory requirements while performing compression or decompression
+depend on image dimensions, image characteristics (such as colorspace and
+JPEG process), and operating mode (application-selected options).
+
+As of v6b, the decompressor requires:
+ 1. About 24K in more-or-less-fixed-size data.  This varies a bit depending
+    on operating mode and image characteristics (particularly color vs.
+    grayscale), but it doesn't depend on image dimensions.
+ 2. Strip buffers (of size proportional to the image width) for IDCT and
+    upsampling results.  The worst case for commonly used sampling factors
+    is about 34 bytes * width in pixels for a color image.  A grayscale image
+    only needs about 8 bytes per pixel column.
+ 3. A full-image DCT coefficient buffer is needed to decode a multi-scan JPEG
+    file (including progressive JPEGs), or whenever you select buffered-image
+    mode.  This takes 2 bytes/coefficient.  At typical 2x2 sampling, that's
+    3 bytes per pixel for a color image.  Worst case (1x1 sampling) requires
+    6 bytes/pixel.  For grayscale, figure 2 bytes/pixel.
+ 4. To perform 2-pass color quantization, the decompressor also needs a
+    128K color lookup table and a full-image pixel buffer (3 bytes/pixel).
+This does not count any memory allocated by the application, such as a
+buffer to hold the final output image.
+
+The above figures are valid for 8-bit JPEG data precision and a machine with
+32-bit ints.  For 12-bit and 16-bit JPEG data, double the size of the strip
+buffers and quantization pixel buffer.  The "fixed-size" data will be somewhat
+smaller with 16-bit ints, larger with 64-bit ints.  Also, CMYK or other unusual
+color spaces will require different amounts of space.
+
+The full-image coefficient and pixel buffers, if needed at all, do not
+have to be fully RAM resident; you can have the library use temporary
+files instead when the total memory usage would exceed a limit you set.
+(But if your OS supports virtual memory, it's probably better to just use
+jmemnobs and let the OS do the swapping.)
+
+The compressor's memory requirements are similar, except that it has no need
+for color quantization.  Also, it needs a full-image DCT coefficient buffer
+if Huffman-table optimization is asked for, even if progressive mode is not
+requested.
+
+If you need more detailed information about memory usage in a particular
+situation, you can enable the MEM_STATS code in jmemmgr.c.
+
+
+Library compile-time options
+----------------------------
+
+A number of compile-time options are available by modifying jmorecfg.h.
+
+The maximum number of components (color channels) in the image is determined
+by MAX_COMPONENTS.  The JPEG standard allows up to 255 components, but we
+expect that few applications will need more than four or so.
+
+On machines with unusual data type sizes, you may be able to improve
+performance or reduce memory space by tweaking the various typedefs in
+jmorecfg.h.  In particular, on some RISC CPUs, access to arrays of "short"s
+is quite slow; consider trading memory for speed by making JCOEF, INT16, and
+UINT16 be "int" or "unsigned int".  UINT8 is also a candidate to become int.
+You probably don't want to make J*SAMPLE be int unless you have lots of memory
+to burn.
+
+You can reduce the size of the library by compiling out various optional
+functions.  To do this, undefine xxx_SUPPORTED symbols as necessary.
+
+You can also save a few K by not having text error messages in the library;
+the standard error message table occupies about 5Kb.  This is particularly
+reasonable for embedded applications where there's no good way to display
+a message anyway.  To do this, remove the creation of the message table
+(jpeg_std_message_table[]) from jerror.c, and alter format_message to do
+something reasonable without it.  You could output the numeric value of the
+message code number, for example.  If you do this, you can also save a couple
+more K by modifying the TRACEMSn() macros in jerror.h to expand to nothing;
+you don't need trace capability anyway, right?
+
+
+Portability considerations
+--------------------------
+
+The JPEG library has been written to be extremely portable; the sample
+applications cjpeg and djpeg are slightly less so.  This section summarizes
+the design goals in this area.  (If you encounter any bugs that cause the
+library to be less portable than is claimed here, we'd appreciate hearing
+about them.)
+
+The code works fine on ANSI C and C++ compilers, using any of the popular
+system include file setups, and some not-so-popular ones too.
+
+The code is not dependent on the exact sizes of the C data types.  As
+distributed, we make the assumptions that
+        char    is at least 8 bits wide
+        short   is at least 16 bits wide
+        int     is at least 16 bits wide
+        long    is at least 32 bits wide
+(These are the minimum requirements of the ANSI C standard.)  Wider types will
+work fine, although memory may be used inefficiently if char is much larger
+than 8 bits or short is much bigger than 16 bits.  The code should work
+equally well with 16- or 32-bit ints.
+
+In a system where these assumptions are not met, you may be able to make the
+code work by modifying the typedefs in jmorecfg.h.  However, you will probably
+have difficulty if int is less than 16 bits wide, since references to plain
+int abound in the code.
+
+char can be either signed or unsigned, although the code runs faster if an
+unsigned char type is available.  If char is wider than 8 bits, you will need
+to redefine JOCTET and/or provide custom data source/destination managers so
+that JOCTET represents exactly 8 bits of data on external storage.
+
+The JPEG library proper does not assume ASCII representation of characters.
+But some of the image file I/O modules in cjpeg/djpeg do have ASCII
+dependencies in file-header manipulation; so does cjpeg's select_file_type()
+routine.
+
+The JPEG library does not rely heavily on the C library.  In particular, C
+stdio is used only by the data source/destination modules and the error
+handler, all of which are application-replaceable.  (cjpeg/djpeg are more
+heavily dependent on stdio.)  malloc and free are called only from the memory
+manager "back end" module, so you can use a different memory allocator by
+replacing that one file.
+
+More info about porting the code may be gleaned by reading jconfig.txt,
+jmorecfg.h, and jinclude.h.
diff --git a/3rdparty/libjpeg-turbo/src/rdbmp.c b/3rdparty/libjpeg-turbo/src/rdbmp.c
new file mode 100644
index 0000000000..c2c06fd001
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/rdbmp.c
@@ -0,0 +1,689 @@
+/*
+ * rdbmp.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2017 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Modified 2011 by Siarhei Siamashka.
+ * Copyright (C) 2015, 2017-2018, 2021-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains routines to read input images in Microsoft "BMP"
+ * format (MS Windows 3.x, OS/2 1.x, and OS/2 2.x flavors).
+ * Currently, only 8-, 24-, and 32-bit images are supported, not 1-bit or
+ * 4-bit (feeding such low-depth images into JPEG would be silly anyway).
+ * Also, we don't support RLE-compressed files.
+ *
+ * These routines may need modification for non-Unix environments or
+ * specialized applications.  As they stand, they assume input from
+ * an ordinary stdio stream.  They further assume that reading begins
+ * at the start of the file; start_input may need work if the
+ * user interface has already read some data (e.g., to determine that
+ * the file is indeed BMP format).
+ *
+ * This code contributed by James Arthur Boucher.
+ */
+
+#include "cmyk.h"
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+
+#ifdef BMP_SUPPORTED
+
+
+/* Macros to deal with unsigned chars as efficiently as compiler allows */
+
+typedef unsigned char U_CHAR;
+#define UCH(x)  ((int)(x))
+
+
+#define ReadOK(file, buffer, len) \
+  (fread(buffer, 1, len, file) == ((size_t)(len)))
+
+static int alpha_index[JPEG_NUMCS] = {
+  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1
+};
+
+
+/* Private version of data source object */
+
+typedef struct _bmp_source_struct *bmp_source_ptr;
+
+typedef struct _bmp_source_struct {
+  struct cjpeg_source_struct pub; /* public fields */
+
+  j_compress_ptr cinfo;         /* back link saves passing separate parm */
+
+  JSAMPARRAY colormap;          /* BMP colormap (converted to my format) */
+
+  jvirt_sarray_ptr whole_image; /* Needed to reverse row order */
+  JDIMENSION source_row;        /* Current source row number */
+  JDIMENSION row_width;         /* Physical width of scanlines in file */
+
+  int bits_per_pixel;           /* remembers 8-, 24-, or 32-bit format */
+  int cmap_length;              /* colormap length */
+
+  boolean use_inversion_array;  /* TRUE = preload the whole image, which is
+                                   stored in bottom-up order, and feed it to
+                                   the calling program in top-down order
+
+                                   FALSE = the calling program will maintain
+                                   its own image buffer and read the rows in
+                                   bottom-up order */
+
+  U_CHAR *iobuffer;             /* I/O buffer (used to buffer a single row from
+                                   disk if use_inversion_array == FALSE) */
+} bmp_source_struct;
+
+
+LOCAL(int)
+read_byte(bmp_source_ptr sinfo)
+/* Read next byte from BMP file */
+{
+  register FILE *infile = sinfo->pub.input_file;
+  register int c;
+
+  if ((c = getc(infile)) == EOF)
+    ERREXIT(sinfo->cinfo, JERR_INPUT_EOF);
+  return c;
+}
+
+
+LOCAL(void)
+read_colormap(bmp_source_ptr sinfo, int cmaplen, int mapentrysize)
+/* Read the colormap from a BMP file */
+{
+  int i, gray = 1;
+
+  switch (mapentrysize) {
+  case 3:
+    /* BGR format (occurs in OS/2 files) */
+    for (i = 0; i < cmaplen; i++) {
+      sinfo->colormap[2][i] = (JSAMPLE)read_byte(sinfo);
+      sinfo->colormap[1][i] = (JSAMPLE)read_byte(sinfo);
+      sinfo->colormap[0][i] = (JSAMPLE)read_byte(sinfo);
+      if (sinfo->colormap[2][i] != sinfo->colormap[1][i] ||
+          sinfo->colormap[1][i] != sinfo->colormap[0][i])
+        gray = 0;
+    }
+    break;
+  case 4:
+    /* BGR0 format (occurs in MS Windows files) */
+    for (i = 0; i < cmaplen; i++) {
+      sinfo->colormap[2][i] = (JSAMPLE)read_byte(sinfo);
+      sinfo->colormap[1][i] = (JSAMPLE)read_byte(sinfo);
+      sinfo->colormap[0][i] = (JSAMPLE)read_byte(sinfo);
+      (void)read_byte(sinfo);
+      if (sinfo->colormap[2][i] != sinfo->colormap[1][i] ||
+          sinfo->colormap[1][i] != sinfo->colormap[0][i])
+        gray = 0;
+    }
+    break;
+  default:
+    ERREXIT(sinfo->cinfo, JERR_BMP_BADCMAP);
+    break;
+  }
+
+  if ((sinfo->cinfo->in_color_space == JCS_UNKNOWN ||
+       sinfo->cinfo->in_color_space == JCS_RGB) && gray)
+    sinfo->cinfo->in_color_space = JCS_GRAYSCALE;
+
+  if (sinfo->cinfo->in_color_space == JCS_GRAYSCALE && !gray)
+    ERREXIT(sinfo->cinfo, JERR_BAD_IN_COLORSPACE);
+}
+
+
+/*
+ * Read one row of pixels.
+ * The image has been read into the whole_image array, but is otherwise
+ * unprocessed.  We must read it out in top-to-bottom row order, and if
+ * it is an 8-bit image, we must expand colormapped pixels to 24bit format.
+ */
+
+METHODDEF(JDIMENSION)
+get_8bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading 8-bit colormap indexes */
+{
+  bmp_source_ptr source = (bmp_source_ptr)sinfo;
+  register JSAMPARRAY colormap = source->colormap;
+  int cmaplen = source->cmap_length;
+  JSAMPARRAY image_ptr;
+  register int t;
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+
+  if (source->use_inversion_array) {
+    /* Fetch next row from virtual array */
+    source->source_row--;
+    image_ptr = (*cinfo->mem->access_virt_sarray)
+      ((j_common_ptr)cinfo, source->whole_image,
+       source->source_row, (JDIMENSION)1, FALSE);
+    inptr = image_ptr[0];
+  } else {
+    if (!ReadOK(source->pub.input_file, source->iobuffer, source->row_width))
+      ERREXIT(cinfo, JERR_INPUT_EOF);
+    inptr = source->iobuffer;
+  }
+
+  /* Expand the colormap indexes to real data */
+  outptr = source->pub.buffer[0];
+  if (cinfo->in_color_space == JCS_GRAYSCALE) {
+    for (col = cinfo->image_width; col > 0; col--) {
+      t = *inptr++;
+      if (t >= cmaplen)
+        ERREXIT(cinfo, JERR_BMP_OUTOFRANGE);
+      *outptr++ = colormap[0][t];
+    }
+  } else if (cinfo->in_color_space == JCS_CMYK) {
+    for (col = cinfo->image_width; col > 0; col--) {
+      t = *inptr++;
+      if (t >= cmaplen)
+        ERREXIT(cinfo, JERR_BMP_OUTOFRANGE);
+      rgb_to_cmyk(colormap[0][t], colormap[1][t], colormap[2][t], outptr,
+                  outptr + 1, outptr + 2, outptr + 3);
+      outptr += 4;
+    }
+  } else {
+    register int rindex = rgb_red[cinfo->in_color_space];
+    register int gindex = rgb_green[cinfo->in_color_space];
+    register int bindex = rgb_blue[cinfo->in_color_space];
+    register int aindex = alpha_index[cinfo->in_color_space];
+    register int ps = rgb_pixelsize[cinfo->in_color_space];
+
+    if (aindex >= 0) {
+      for (col = cinfo->image_width; col > 0; col--) {
+        t = *inptr++;
+        if (t >= cmaplen)
+          ERREXIT(cinfo, JERR_BMP_OUTOFRANGE);
+        outptr[rindex] = colormap[0][t];
+        outptr[gindex] = colormap[1][t];
+        outptr[bindex] = colormap[2][t];
+        outptr[aindex] = 0xFF;
+        outptr += ps;
+      }
+    } else {
+      for (col = cinfo->image_width; col > 0; col--) {
+        t = *inptr++;
+        if (t >= cmaplen)
+          ERREXIT(cinfo, JERR_BMP_OUTOFRANGE);
+        outptr[rindex] = colormap[0][t];
+        outptr[gindex] = colormap[1][t];
+        outptr[bindex] = colormap[2][t];
+        outptr += ps;
+      }
+    }
+  }
+
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_24bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading 24-bit pixels */
+{
+  bmp_source_ptr source = (bmp_source_ptr)sinfo;
+  JSAMPARRAY image_ptr;
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+
+  if (source->use_inversion_array) {
+    /* Fetch next row from virtual array */
+    source->source_row--;
+    image_ptr = (*cinfo->mem->access_virt_sarray)
+      ((j_common_ptr)cinfo, source->whole_image,
+       source->source_row, (JDIMENSION)1, FALSE);
+    inptr = image_ptr[0];
+  } else {
+    if (!ReadOK(source->pub.input_file, source->iobuffer, source->row_width))
+      ERREXIT(cinfo, JERR_INPUT_EOF);
+    inptr = source->iobuffer;
+  }
+
+  /* Transfer data.  Note source values are in BGR order
+   * (even though Microsoft's own documents say the opposite).
+   */
+  outptr = source->pub.buffer[0];
+  if (cinfo->in_color_space == JCS_EXT_BGR) {
+    memcpy(outptr, inptr, source->row_width);
+  } else if (cinfo->in_color_space == JCS_CMYK) {
+    for (col = cinfo->image_width; col > 0; col--) {
+      JSAMPLE b = *inptr++, g = *inptr++, r = *inptr++;
+      rgb_to_cmyk(r, g, b, outptr, outptr + 1, outptr + 2, outptr + 3);
+      outptr += 4;
+    }
+  } else {
+    register int rindex = rgb_red[cinfo->in_color_space];
+    register int gindex = rgb_green[cinfo->in_color_space];
+    register int bindex = rgb_blue[cinfo->in_color_space];
+    register int aindex = alpha_index[cinfo->in_color_space];
+    register int ps = rgb_pixelsize[cinfo->in_color_space];
+
+    if (aindex >= 0) {
+      for (col = cinfo->image_width; col > 0; col--) {
+        outptr[bindex] = *inptr++;
+        outptr[gindex] = *inptr++;
+        outptr[rindex] = *inptr++;
+        outptr[aindex] = 0xFF;
+        outptr += ps;
+      }
+    } else {
+      for (col = cinfo->image_width; col > 0; col--) {
+        outptr[bindex] = *inptr++;
+        outptr[gindex] = *inptr++;
+        outptr[rindex] = *inptr++;
+        outptr += ps;
+      }
+    }
+  }
+
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_32bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading 32-bit pixels */
+{
+  bmp_source_ptr source = (bmp_source_ptr)sinfo;
+  JSAMPARRAY image_ptr;
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+
+  if (source->use_inversion_array) {
+    /* Fetch next row from virtual array */
+    source->source_row--;
+    image_ptr = (*cinfo->mem->access_virt_sarray)
+      ((j_common_ptr)cinfo, source->whole_image,
+       source->source_row, (JDIMENSION)1, FALSE);
+    inptr = image_ptr[0];
+  } else {
+    if (!ReadOK(source->pub.input_file, source->iobuffer, source->row_width))
+      ERREXIT(cinfo, JERR_INPUT_EOF);
+    inptr = source->iobuffer;
+  }
+
+  /* Transfer data.  Note source values are in BGR order
+   * (even though Microsoft's own documents say the opposite).
+   */
+  outptr = source->pub.buffer[0];
+  if (cinfo->in_color_space == JCS_EXT_BGRX ||
+      cinfo->in_color_space == JCS_EXT_BGRA) {
+    memcpy(outptr, inptr, source->row_width);
+  } else if (cinfo->in_color_space == JCS_CMYK) {
+    for (col = cinfo->image_width; col > 0; col--) {
+      JSAMPLE b = *inptr++, g = *inptr++, r = *inptr++;
+      rgb_to_cmyk(r, g, b, outptr, outptr + 1, outptr + 2, outptr + 3);
+      inptr++;                          /* skip the 4th byte (Alpha channel) */
+      outptr += 4;
+    }
+  } else {
+    register int rindex = rgb_red[cinfo->in_color_space];
+    register int gindex = rgb_green[cinfo->in_color_space];
+    register int bindex = rgb_blue[cinfo->in_color_space];
+    register int aindex = alpha_index[cinfo->in_color_space];
+    register int ps = rgb_pixelsize[cinfo->in_color_space];
+
+    if (aindex >= 0) {
+      for (col = cinfo->image_width; col > 0; col--) {
+        outptr[bindex] = *inptr++;
+        outptr[gindex] = *inptr++;
+        outptr[rindex] = *inptr++;
+        outptr[aindex] = *inptr++;
+        outptr += ps;
+      }
+    } else {
+      for (col = cinfo->image_width; col > 0; col--) {
+        outptr[bindex] = *inptr++;
+        outptr[gindex] = *inptr++;
+        outptr[rindex] = *inptr++;
+        inptr++;                        /* skip the 4th byte (Alpha channel) */
+        outptr += ps;
+      }
+    }
+  }
+
+  return 1;
+}
+
+
+/*
+ * This method loads the image into whole_image during the first call on
+ * get_pixel_rows.  The get_pixel_rows pointer is then adjusted to call
+ * get_8bit_row, get_24bit_row, or get_32bit_row on subsequent calls.
+ */
+
+METHODDEF(JDIMENSION)
+preload_image(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  bmp_source_ptr source = (bmp_source_ptr)sinfo;
+  register FILE *infile = source->pub.input_file;
+  register JSAMPROW out_ptr;
+  JSAMPARRAY image_ptr;
+  JDIMENSION row;
+  cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress;
+
+  /* Read the data into a virtual array in input-file row order. */
+  for (row = 0; row < cinfo->image_height; row++) {
+    if (progress != NULL) {
+      progress->pub.pass_counter = (long)row;
+      progress->pub.pass_limit = (long)cinfo->image_height;
+      (*progress->pub.progress_monitor) ((j_common_ptr)cinfo);
+    }
+    image_ptr = (*cinfo->mem->access_virt_sarray)
+      ((j_common_ptr)cinfo, source->whole_image, row, (JDIMENSION)1, TRUE);
+    out_ptr = image_ptr[0];
+    if (fread(out_ptr, 1, source->row_width, infile) != source->row_width) {
+      if (feof(infile))
+        ERREXIT(cinfo, JERR_INPUT_EOF);
+      else
+        ERREXIT(cinfo, JERR_FILE_READ);
+    }
+  }
+  if (progress != NULL)
+    progress->completed_extra_passes++;
+
+  /* Set up to read from the virtual array in top-to-bottom order */
+  switch (source->bits_per_pixel) {
+  case 8:
+    source->pub.get_pixel_rows = get_8bit_row;
+    break;
+  case 24:
+    source->pub.get_pixel_rows = get_24bit_row;
+    break;
+  case 32:
+    source->pub.get_pixel_rows = get_32bit_row;
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BMP_BADDEPTH);
+  }
+  source->source_row = cinfo->image_height;
+
+  /* And read the first row */
+  return (*source->pub.get_pixel_rows) (cinfo, sinfo);
+}
+
+
+/*
+ * Read the file header; return image size and component count.
+ */
+
+METHODDEF(void)
+start_input_bmp(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  bmp_source_ptr source = (bmp_source_ptr)sinfo;
+  U_CHAR bmpfileheader[14];
+  U_CHAR bmpinfoheader[64];
+
+#define GET_2B(array, offset) \
+  ((unsigned short)UCH(array[offset]) + \
+   (((unsigned short)UCH(array[offset + 1])) << 8))
+#define GET_4B(array, offset) \
+  ((unsigned int)UCH(array[offset]) + \
+   (((unsigned int)UCH(array[offset + 1])) << 8) + \
+   (((unsigned int)UCH(array[offset + 2])) << 16) + \
+   (((unsigned int)UCH(array[offset + 3])) << 24))
+
+  int bfOffBits;
+  int headerSize;
+  int biWidth;
+  int biHeight;
+  unsigned short biPlanes;
+  unsigned int biCompression;
+  int biXPelsPerMeter, biYPelsPerMeter;
+  int biClrUsed = 0;
+  int mapentrysize = 0;         /* 0 indicates no colormap */
+  int bPad;
+  JDIMENSION row_width = 0;
+
+  /* Read and verify the bitmap file header */
+  if (!ReadOK(source->pub.input_file, bmpfileheader, 14))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  if (GET_2B(bmpfileheader, 0) != 0x4D42) /* 'BM' */
+    ERREXIT(cinfo, JERR_BMP_NOT);
+  bfOffBits = GET_4B(bmpfileheader, 10);
+  /* We ignore the remaining fileheader fields */
+
+  /* The infoheader might be 12 bytes (OS/2 1.x), 40 bytes (Windows),
+   * or 64 bytes (OS/2 2.x).  Check the first 4 bytes to find out which.
+   */
+  if (!ReadOK(source->pub.input_file, bmpinfoheader, 4))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  headerSize = GET_4B(bmpinfoheader, 0);
+  if (headerSize < 12 || headerSize > 64 || (headerSize + 14) > bfOffBits)
+    ERREXIT(cinfo, JERR_BMP_BADHEADER);
+  if (!ReadOK(source->pub.input_file, bmpinfoheader + 4, headerSize - 4))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+
+  switch (headerSize) {
+  case 12:
+    /* Decode OS/2 1.x header (Microsoft calls this a BITMAPCOREHEADER) */
+    biWidth = (int)GET_2B(bmpinfoheader, 4);
+    biHeight = (int)GET_2B(bmpinfoheader, 6);
+    biPlanes = GET_2B(bmpinfoheader, 8);
+    source->bits_per_pixel = (int)GET_2B(bmpinfoheader, 10);
+
+    switch (source->bits_per_pixel) {
+    case 8:                     /* colormapped image */
+      mapentrysize = 3;         /* OS/2 uses RGBTRIPLE colormap */
+      TRACEMS2(cinfo, 1, JTRC_BMP_OS2_MAPPED, biWidth, biHeight);
+      break;
+    case 24:                    /* RGB image */
+    case 32:                    /* RGB image + Alpha channel */
+      TRACEMS3(cinfo, 1, JTRC_BMP_OS2, biWidth, biHeight,
+               source->bits_per_pixel);
+      break;
+    default:
+      ERREXIT(cinfo, JERR_BMP_BADDEPTH);
+      break;
+    }
+    break;
+  case 40:
+  case 64:
+    /* Decode Windows 3.x header (Microsoft calls this a BITMAPINFOHEADER) */
+    /* or OS/2 2.x header, which has additional fields that we ignore */
+    biWidth = (int)GET_4B(bmpinfoheader, 4);
+    biHeight = (int)GET_4B(bmpinfoheader, 8);
+    biPlanes = GET_2B(bmpinfoheader, 12);
+    source->bits_per_pixel = (int)GET_2B(bmpinfoheader, 14);
+    biCompression = GET_4B(bmpinfoheader, 16);
+    biXPelsPerMeter = (int)GET_4B(bmpinfoheader, 24);
+    biYPelsPerMeter = (int)GET_4B(bmpinfoheader, 28);
+    biClrUsed = GET_4B(bmpinfoheader, 32);
+    /* biSizeImage, biClrImportant fields are ignored */
+
+    switch (source->bits_per_pixel) {
+    case 8:                     /* colormapped image */
+      mapentrysize = 4;         /* Windows uses RGBQUAD colormap */
+      TRACEMS2(cinfo, 1, JTRC_BMP_MAPPED, biWidth, biHeight);
+      break;
+    case 24:                    /* RGB image */
+    case 32:                    /* RGB image + Alpha channel */
+      TRACEMS3(cinfo, 1, JTRC_BMP, biWidth, biHeight, source->bits_per_pixel);
+      break;
+    default:
+      ERREXIT(cinfo, JERR_BMP_BADDEPTH);
+      break;
+    }
+    if (biCompression != 0)
+      ERREXIT(cinfo, JERR_BMP_COMPRESSED);
+
+    if (biXPelsPerMeter > 0 && biYPelsPerMeter > 0) {
+      /* Set JFIF density parameters from the BMP data */
+      cinfo->X_density = (UINT16)(biXPelsPerMeter / 100); /* 100 cm per meter */
+      cinfo->Y_density = (UINT16)(biYPelsPerMeter / 100);
+      cinfo->density_unit = 2;  /* dots/cm */
+    }
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BMP_BADHEADER);
+    return;
+  }
+
+  if (biWidth <= 0 || biHeight <= 0)
+    ERREXIT(cinfo, JERR_BMP_EMPTY);
+  if (sinfo->max_pixels &&
+      (unsigned long long)biWidth * biHeight > sinfo->max_pixels)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, sinfo->max_pixels);
+  if (biPlanes != 1)
+    ERREXIT(cinfo, JERR_BMP_BADPLANES);
+
+  /* Compute distance to bitmap data --- will adjust for colormap below */
+  bPad = bfOffBits - (headerSize + 14);
+
+  /* Read the colormap, if any */
+  if (mapentrysize > 0) {
+    if (biClrUsed <= 0)
+      biClrUsed = 256;          /* assume it's 256 */
+    else if (biClrUsed > 256)
+      ERREXIT(cinfo, JERR_BMP_BADCMAP);
+    /* Allocate space to store the colormap */
+    source->colormap = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE, (JDIMENSION)biClrUsed, (JDIMENSION)3);
+    source->cmap_length = (int)biClrUsed;
+    /* and read it from the file */
+    read_colormap(source, (int)biClrUsed, mapentrysize);
+    /* account for size of colormap */
+    bPad -= biClrUsed * mapentrysize;
+  }
+
+  /* Skip any remaining pad bytes */
+  if (bPad < 0)                 /* incorrect bfOffBits value? */
+    ERREXIT(cinfo, JERR_BMP_BADHEADER);
+  while (--bPad >= 0) {
+    (void)read_byte(source);
+  }
+
+  /* Compute row width in file, including padding to 4-byte boundary */
+  switch (source->bits_per_pixel) {
+  case 8:
+    if (cinfo->in_color_space == JCS_UNKNOWN)
+      cinfo->in_color_space = JCS_EXT_RGB;
+    if (IsExtRGB(cinfo->in_color_space))
+      cinfo->input_components = rgb_pixelsize[cinfo->in_color_space];
+    else if (cinfo->in_color_space == JCS_GRAYSCALE)
+      cinfo->input_components = 1;
+    else if (cinfo->in_color_space == JCS_CMYK)
+      cinfo->input_components = 4;
+    else
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    row_width = (JDIMENSION)biWidth;
+    break;
+  case 24:
+    if (cinfo->in_color_space == JCS_UNKNOWN)
+      cinfo->in_color_space = JCS_EXT_BGR;
+    if (IsExtRGB(cinfo->in_color_space))
+      cinfo->input_components = rgb_pixelsize[cinfo->in_color_space];
+    else if (cinfo->in_color_space == JCS_CMYK)
+      cinfo->input_components = 4;
+    else
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    if ((unsigned long long)biWidth * 3ULL > 0xFFFFFFFFULL)
+      ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+    row_width = (JDIMENSION)biWidth * 3;
+    break;
+  case 32:
+    if (cinfo->in_color_space == JCS_UNKNOWN)
+      cinfo->in_color_space = JCS_EXT_BGRA;
+    if (IsExtRGB(cinfo->in_color_space))
+      cinfo->input_components = rgb_pixelsize[cinfo->in_color_space];
+    else if (cinfo->in_color_space == JCS_CMYK)
+      cinfo->input_components = 4;
+    else
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    if ((unsigned long long)biWidth * 4ULL > 0xFFFFFFFFULL)
+      ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+    row_width = (JDIMENSION)biWidth * 4;
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BMP_BADDEPTH);
+  }
+  while ((row_width & 3) != 0) row_width++;
+  source->row_width = row_width;
+
+  if (source->use_inversion_array) {
+    /* Allocate space for inversion array, prepare for preload pass */
+    source->whole_image = (*cinfo->mem->request_virt_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE,
+       row_width, (JDIMENSION)biHeight, (JDIMENSION)1);
+    source->pub.get_pixel_rows = preload_image;
+    if (cinfo->progress != NULL) {
+      cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress;
+      progress->total_extra_passes++; /* count file input as separate pass */
+    }
+  } else {
+    source->iobuffer = (U_CHAR *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, row_width);
+    switch (source->bits_per_pixel) {
+    case 8:
+      source->pub.get_pixel_rows = get_8bit_row;
+      break;
+    case 24:
+      source->pub.get_pixel_rows = get_24bit_row;
+      break;
+    case 32:
+      source->pub.get_pixel_rows = get_32bit_row;
+      break;
+    default:
+      ERREXIT(cinfo, JERR_BMP_BADDEPTH);
+    }
+  }
+
+  /* Ensure that biWidth * cinfo->input_components doesn't exceed the maximum
+     value of the JDIMENSION type.  This is only a danger with BMP files, since
+     their width and height fields are 32-bit integers. */
+  if ((unsigned long long)biWidth *
+      (unsigned long long)cinfo->input_components > 0xFFFFFFFFULL)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  /* Allocate one-row buffer for returned data */
+  source->pub.buffer = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr)cinfo, JPOOL_IMAGE,
+     (JDIMENSION)biWidth * (JDIMENSION)cinfo->input_components, (JDIMENSION)1);
+  source->pub.buffer_height = 1;
+
+  cinfo->data_precision = 8;
+  cinfo->image_width = (JDIMENSION)biWidth;
+  cinfo->image_height = (JDIMENSION)biHeight;
+}
+
+
+/*
+ * Finish up at the end of the file.
+ */
+
+METHODDEF(void)
+finish_input_bmp(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  /* no work */
+}
+
+
+/*
+ * The module selection routine for BMP format input.
+ */
+
+GLOBAL(cjpeg_source_ptr)
+jinit_read_bmp(j_compress_ptr cinfo, boolean use_inversion_array)
+{
+  bmp_source_ptr source;
+
+  if (cinfo->data_precision != 8)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Create module interface object */
+  source = (bmp_source_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(bmp_source_struct));
+  source->cinfo = cinfo;        /* make back link for subroutines */
+  /* Fill in method ptrs, except get_pixel_rows which start_input sets */
+  source->pub.start_input = start_input_bmp;
+  source->pub.finish_input = finish_input_bmp;
+  source->pub.max_pixels = 0;
+
+  source->use_inversion_array = use_inversion_array;
+
+  return (cjpeg_source_ptr)source;
+}
+
+#endif /* BMP_SUPPORTED */
diff --git a/3rdparty/libjpeg-turbo/src/rdcolmap.c b/3rdparty/libjpeg-turbo/src/rdcolmap.c
new file mode 100644
index 0000000000..836685e1b8
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/rdcolmap.c
@@ -0,0 +1,261 @@
+/*
+ * rdcolmap.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file implements djpeg's "-map file" switch.  It reads a source image
+ * and constructs a colormap to be supplied to the JPEG decompressor.
+ *
+ * Currently, these file formats are supported for the map file:
+ *   GIF: the contents of the GIF's global colormap are used.
+ *   PPM (either text or raw flavor): the entire file is read and
+ *      each unique pixel value is entered in the map.
+ * Note that reading a large PPM file will be horrendously slow.
+ * Typically, a PPM-format map file should contain just one pixel
+ * of each desired color.  Such a file can be extracted from an
+ * ordinary image PPM file with ppmtomap(1).
+ *
+ * Rescaling a PPM that has a maxval unequal to _MAXJSAMPLE is not
+ * currently implemented.
+ */
+
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include "jsamplecomp.h"
+
+#ifdef QUANT_2PASS_SUPPORTED    /* otherwise can't quantize to supplied map */
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
+/* Portions of this code are based on the PBMPLUS library, which is:
+**
+** Copyright (C) 1988 by Jef Poskanzer.
+**
+** Permission to use, copy, modify, and distribute this software and its
+** documentation for any purpose and without fee is hereby granted, provided
+** that the above copyright notice appear in all copies and that both that
+** copyright notice and this permission notice appear in supporting
+** documentation.  This software is provided "as is" without express or
+** implied warranty.
+*/
+
+
+/*
+ * Add a (potentially) new color to the color map.
+ */
+
+LOCAL(void)
+add_map_entry(j_decompress_ptr cinfo, int R, int G, int B)
+{
+  _JSAMPROW colormap0 = ((_JSAMPARRAY)cinfo->colormap)[0];
+  _JSAMPROW colormap1 = ((_JSAMPARRAY)cinfo->colormap)[1];
+  _JSAMPROW colormap2 = ((_JSAMPARRAY)cinfo->colormap)[2];
+  int ncolors = cinfo->actual_number_of_colors;
+  int index;
+
+  /* Check for duplicate color. */
+  for (index = 0; index < ncolors; index++) {
+    if (colormap0[index] == R && colormap1[index] == G &&
+        colormap2[index] == B)
+      return;                   /* color is already in map */
+  }
+
+  /* Check for map overflow. */
+  if (ncolors >= (_MAXJSAMPLE + 1))
+    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, (_MAXJSAMPLE + 1));
+
+  /* OK, add color to map. */
+  colormap0[ncolors] = (_JSAMPLE)R;
+  colormap1[ncolors] = (_JSAMPLE)G;
+  colormap2[ncolors] = (_JSAMPLE)B;
+  cinfo->actual_number_of_colors++;
+}
+
+
+/*
+ * Extract color map from a GIF file.
+ */
+
+LOCAL(void)
+read_gif_map(j_decompress_ptr cinfo, FILE *infile)
+{
+  int header[13];
+  int i, colormaplen;
+  int R, G, B;
+
+  /* Initial 'G' has already been read by read_color_map */
+  /* Read the rest of the GIF header and logical screen descriptor */
+  for (i = 1; i < 13; i++) {
+    if ((header[i] = getc(infile)) == EOF)
+      ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+  }
+
+  /* Verify GIF Header */
+  if (header[1] != 'I' || header[2] != 'F')
+    ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+
+  /* There must be a global color map. */
+  if ((header[10] & 0x80) == 0)
+    ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+
+  /* OK, fetch it. */
+  colormaplen = 2 << (header[10] & 0x07);
+
+  for (i = 0; i < colormaplen; i++) {
+    R = getc(infile);
+    G = getc(infile);
+    B = getc(infile);
+    if (R == EOF || G == EOF || B == EOF)
+      ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+    add_map_entry(cinfo,
+                  R << (BITS_IN_JSAMPLE - 8),
+                  G << (BITS_IN_JSAMPLE - 8),
+                  B << (BITS_IN_JSAMPLE - 8));
+  }
+}
+
+
+/* Support routines for reading PPM */
+
+
+LOCAL(int)
+pbm_getc(FILE *infile)
+/* Read next char, skipping over any comments */
+/* A comment/newline sequence is returned as a newline */
+{
+  register int ch;
+
+  ch = getc(infile);
+  if (ch == '#') {
+    do {
+      ch = getc(infile);
+    } while (ch != '\n' && ch != EOF);
+  }
+  return ch;
+}
+
+
+LOCAL(unsigned int)
+read_pbm_integer(j_decompress_ptr cinfo, FILE *infile)
+/* Read an unsigned decimal integer from the PPM file */
+/* Swallows one trailing character after the integer */
+/* Note that on a 16-bit-int machine, only values up to 64k can be read. */
+/* This should not be a problem in practice. */
+{
+  register int ch;
+  register unsigned int val;
+
+  /* Skip any leading whitespace */
+  do {
+    ch = pbm_getc(infile);
+    if (ch == EOF)
+      ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+  } while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
+
+  if (ch < '0' || ch > '9')
+    ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+
+  val = ch - '0';
+  while ((ch = pbm_getc(infile)) >= '0' && ch <= '9') {
+    val *= 10;
+    val += ch - '0';
+  }
+  return val;
+}
+
+
+/*
+ * Extract color map from a PPM file.
+ */
+
+LOCAL(void)
+read_ppm_map(j_decompress_ptr cinfo, FILE *infile)
+{
+  int c;
+  unsigned int w, h, maxval, row, col;
+  int R, G, B;
+
+  /* Initial 'P' has already been read by read_color_map */
+  c = getc(infile);             /* save format discriminator for a sec */
+
+  /* while we fetch the remaining header info */
+  w = read_pbm_integer(cinfo, infile);
+  h = read_pbm_integer(cinfo, infile);
+  maxval = read_pbm_integer(cinfo, infile);
+
+  if (w <= 0 || h <= 0 || maxval <= 0) /* error check */
+    ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+
+  /* For now, we don't support rescaling from an unusual maxval. */
+  if (maxval != (unsigned int)_MAXJSAMPLE)
+    ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+
+  switch (c) {
+  case '3':                     /* it's a text-format PPM file */
+    for (row = 0; row < h; row++) {
+      for (col = 0; col < w; col++) {
+        R = read_pbm_integer(cinfo, infile);
+        G = read_pbm_integer(cinfo, infile);
+        B = read_pbm_integer(cinfo, infile);
+        add_map_entry(cinfo, R, G, B);
+      }
+    }
+    break;
+
+  case '6':                     /* it's a raw-format PPM file */
+    for (row = 0; row < h; row++) {
+      for (col = 0; col < w; col++) {
+        R = getc(infile);
+        G = getc(infile);
+        B = getc(infile);
+        if (R == EOF || G == EOF || B == EOF)
+          ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+        add_map_entry(cinfo, R, G, B);
+      }
+    }
+    break;
+
+  default:
+    ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+    break;
+  }
+}
+
+
+/*
+ * Main entry point from djpeg.c.
+ *  Input: opened input file (from file name argument on command line).
+ *  Output: colormap and actual_number_of_colors fields are set in cinfo.
+ */
+
+GLOBAL(void)
+_read_color_map(j_decompress_ptr cinfo, FILE *infile)
+{
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Allocate space for a color map of maximum supported size. */
+  cinfo->colormap = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr)cinfo, JPOOL_IMAGE,
+     (JDIMENSION)(_MAXJSAMPLE + 1), (JDIMENSION)3);
+  cinfo->actual_number_of_colors = 0; /* initialize map to empty */
+
+  /* Read first byte to determine file format */
+  switch (getc(infile)) {
+  case 'G':
+    read_gif_map(cinfo, infile);
+    break;
+  case 'P':
+    read_ppm_map(cinfo, infile);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+    break;
+  }
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
+#endif /* QUANT_2PASS_SUPPORTED */
diff --git a/3rdparty/libjpeg-turbo/src/rdgif.c b/3rdparty/libjpeg-turbo/src/rdgif.c
new file mode 100644
index 0000000000..23e8b9e128
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/rdgif.c
@@ -0,0 +1,720 @@
+/*
+ * rdgif.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2019 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2021-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains routines to read input images in GIF format.
+ *
+ * These routines may need modification for non-Unix environments or
+ * specialized applications.  As they stand, they assume input from
+ * an ordinary stdio stream.  They further assume that reading begins
+ * at the start of the file; start_input may need work if the
+ * user interface has already read some data (e.g., to determine that
+ * the file is indeed GIF format).
+ */
+
+/*
+ * This code is loosely based on giftoppm from the PBMPLUS distribution
+ * of Feb. 1991.  That file contains the following copyright notice:
+ * +-------------------------------------------------------------------+
+ * | Copyright 1990, David Koblas.                                     |
+ * |   Permission to use, copy, modify, and distribute this software   |
+ * |   and its documentation for any purpose and without fee is hereby |
+ * |   granted, provided that the above copyright notice appear in all |
+ * |   copies and that both that copyright notice and this permission  |
+ * |   notice appear in supporting documentation.  This software is    |
+ * |   provided "as is" without express or implied warranty.           |
+ * +-------------------------------------------------------------------+
+ */
+
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include "jsamplecomp.h"
+
+#if defined(GIF_SUPPORTED) && \
+    (BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED))
+
+
+/* Macros to deal with unsigned chars as efficiently as compiler allows */
+
+typedef unsigned char U_CHAR;
+#define UCH(x)  ((int)(x))
+
+
+#define ReadOK(file, buffer, len) \
+  (fread(buffer, 1, len, file) == ((size_t)(len)))
+
+
+#define MAXCOLORMAPSIZE  256    /* max # of colors in a GIF colormap */
+#define NUMCOLORS        3      /* # of colors */
+#define CM_RED           0      /* color component numbers */
+#define CM_GREEN         1
+#define CM_BLUE          2
+
+#define MAX_LZW_BITS     12     /* maximum LZW code size */
+#define LZW_TABLE_SIZE   (1 << MAX_LZW_BITS) /* # of possible LZW symbols */
+
+/* Macros for extracting header data --- note we assume chars may be signed */
+
+#define LM_to_uint(array, offset) \
+  ((unsigned int)UCH(array[offset]) + \
+   (((unsigned int)UCH(array[offset + 1])) << 8))
+
+#define BitSet(byte, bit)       ((byte) & (bit))
+#define INTERLACE       0x40    /* mask for bit signifying interlaced image */
+#define COLORMAPFLAG    0x80    /* mask for bit signifying colormap presence */
+
+
+/*
+ * LZW decompression tables look like this:
+ *   symbol_head[K] = prefix symbol of any LZW symbol K (0..LZW_TABLE_SIZE-1)
+ *   symbol_tail[K] = suffix byte   of any LZW symbol K (0..LZW_TABLE_SIZE-1)
+ * Note that entries 0..end_code of the above tables are not used,
+ * since those symbols represent raw bytes or special codes.
+ *
+ * The stack represents the not-yet-used expansion of the last LZW symbol.
+ * In the worst case, a symbol could expand to as many bytes as there are
+ * LZW symbols, so we allocate LZW_TABLE_SIZE bytes for the stack.
+ * (This is conservative since that number includes the raw-byte symbols.)
+ */
+
+
+/* Private version of data source object */
+
+typedef struct {
+  struct cjpeg_source_struct pub; /* public fields */
+
+  j_compress_ptr cinfo;         /* back link saves passing separate parm */
+
+  _JSAMPARRAY colormap;         /* GIF colormap (converted to my format) */
+
+  /* State for GetCode and LZWReadByte */
+  U_CHAR code_buf[256 + 4];     /* current input data block */
+  int last_byte;                /* # of bytes in code_buf */
+  int last_bit;                 /* # of bits in code_buf */
+  int cur_bit;                  /* next bit index to read */
+  boolean first_time;           /* flags first call to GetCode */
+  boolean out_of_blocks;        /* TRUE if hit terminator data block */
+
+  int input_code_size;          /* codesize given in GIF file */
+  int clear_code, end_code;     /* values for Clear and End codes */
+
+  int code_size;                /* current actual code size */
+  int limit_code;               /* 2^code_size */
+  int max_code;                 /* first unused code value */
+
+  /* Private state for LZWReadByte */
+  int oldcode;                  /* previous LZW symbol */
+  int firstcode;                /* first byte of oldcode's expansion */
+
+  /* LZW symbol table and expansion stack */
+  UINT16 *symbol_head;          /* => table of prefix symbols */
+  UINT8  *symbol_tail;          /* => table of suffix bytes */
+  UINT8  *symbol_stack;         /* => stack for symbol expansions */
+  UINT8  *sp;                   /* stack pointer */
+
+  /* State for interlaced image processing */
+  boolean is_interlaced;        /* TRUE if have interlaced image */
+  jvirt_sarray_ptr interlaced_image; /* full image in interlaced order */
+  JDIMENSION cur_row_number;    /* need to know actual row number */
+  JDIMENSION pass2_offset;      /* # of pixel rows in pass 1 */
+  JDIMENSION pass3_offset;      /* # of pixel rows in passes 1&2 */
+  JDIMENSION pass4_offset;      /* # of pixel rows in passes 1,2,3 */
+} gif_source_struct;
+
+typedef gif_source_struct *gif_source_ptr;
+
+
+/* Forward declarations */
+METHODDEF(JDIMENSION) get_pixel_rows(j_compress_ptr cinfo,
+                                     cjpeg_source_ptr sinfo);
+METHODDEF(JDIMENSION) load_interlaced_image(j_compress_ptr cinfo,
+                                            cjpeg_source_ptr sinfo);
+METHODDEF(JDIMENSION) get_interlaced_row(j_compress_ptr cinfo,
+                                         cjpeg_source_ptr sinfo);
+
+
+LOCAL(int)
+ReadByte(gif_source_ptr sinfo)
+/* Read next byte from GIF file */
+{
+  register FILE *infile = sinfo->pub.input_file;
+  register int c;
+
+  if ((c = getc(infile)) == EOF)
+    ERREXIT(sinfo->cinfo, JERR_INPUT_EOF);
+  return c;
+}
+
+
+LOCAL(int)
+GetDataBlock(gif_source_ptr sinfo, U_CHAR *buf)
+/* Read a GIF data block, which has a leading count byte */
+/* A zero-length block marks the end of a data block sequence */
+{
+  int count;
+
+  count = ReadByte(sinfo);
+  if (count > 0) {
+    if (!ReadOK(sinfo->pub.input_file, buf, count))
+      ERREXIT(sinfo->cinfo, JERR_INPUT_EOF);
+  }
+  return count;
+}
+
+
+LOCAL(void)
+SkipDataBlocks(gif_source_ptr sinfo)
+/* Skip a series of data blocks, until a block terminator is found */
+{
+  U_CHAR buf[256];
+
+  while (GetDataBlock(sinfo, buf) > 0)
+    /* skip */;
+}
+
+
+LOCAL(void)
+ReInitLZW(gif_source_ptr sinfo)
+/* (Re)initialize LZW state; shared code for startup and Clear processing */
+{
+  sinfo->code_size = sinfo->input_code_size + 1;
+  sinfo->limit_code = sinfo->clear_code << 1;   /* 2^code_size */
+  sinfo->max_code = sinfo->clear_code + 2;      /* first unused code value */
+  sinfo->sp = sinfo->symbol_stack;              /* init stack to empty */
+}
+
+
+LOCAL(void)
+InitLZWCode(gif_source_ptr sinfo)
+/* Initialize for a series of LZWReadByte (and hence GetCode) calls */
+{
+  /* GetCode initialization */
+  sinfo->last_byte = 2;         /* make safe to "recopy last two bytes" */
+  sinfo->code_buf[0] = 0;
+  sinfo->code_buf[1] = 0;
+  sinfo->last_bit = 0;          /* nothing in the buffer */
+  sinfo->cur_bit = 0;           /* force buffer load on first call */
+  sinfo->first_time = TRUE;
+  sinfo->out_of_blocks = FALSE;
+
+  /* LZWReadByte initialization: */
+  /* compute special code values (note that these do not change later) */
+  sinfo->clear_code = 1 << sinfo->input_code_size;
+  sinfo->end_code = sinfo->clear_code + 1;
+  ReInitLZW(sinfo);
+}
+
+
+LOCAL(int)
+GetCode(gif_source_ptr sinfo)
+/* Fetch the next code_size bits from the GIF data */
+/* We assume code_size is less than 16 */
+{
+  register int accum;
+  int offs, count;
+
+  while (sinfo->cur_bit + sinfo->code_size > sinfo->last_bit) {
+    /* Time to reload the buffer */
+    /* First time, share code with Clear case */
+    if (sinfo->first_time) {
+      sinfo->first_time = FALSE;
+      return sinfo->clear_code;
+    }
+    if (sinfo->out_of_blocks) {
+      WARNMS(sinfo->cinfo, JWRN_GIF_NOMOREDATA);
+      return sinfo->end_code;   /* fake something useful */
+    }
+    /* preserve last two bytes of what we have -- assume code_size <= 16 */
+    sinfo->code_buf[0] = sinfo->code_buf[sinfo->last_byte-2];
+    sinfo->code_buf[1] = sinfo->code_buf[sinfo->last_byte-1];
+    /* Load more bytes; set flag if we reach the terminator block */
+    if ((count = GetDataBlock(sinfo, &sinfo->code_buf[2])) == 0) {
+      sinfo->out_of_blocks = TRUE;
+      WARNMS(sinfo->cinfo, JWRN_GIF_NOMOREDATA);
+      return sinfo->end_code;   /* fake something useful */
+    }
+    /* Reset counters */
+    sinfo->cur_bit = (sinfo->cur_bit - sinfo->last_bit) + 16;
+    sinfo->last_byte = 2 + count;
+    sinfo->last_bit = sinfo->last_byte * 8;
+  }
+
+  /* Form up next 24 bits in accum */
+  offs = sinfo->cur_bit >> 3;   /* byte containing cur_bit */
+  accum = UCH(sinfo->code_buf[offs + 2]);
+  accum <<= 8;
+  accum |= UCH(sinfo->code_buf[offs + 1]);
+  accum <<= 8;
+  accum |= UCH(sinfo->code_buf[offs]);
+
+  /* Right-align cur_bit in accum, then mask off desired number of bits */
+  accum >>= (sinfo->cur_bit & 7);
+  sinfo->cur_bit += sinfo->code_size;
+  return accum & ((1 << sinfo->code_size) - 1);
+}
+
+
+LOCAL(int)
+LZWReadByte(gif_source_ptr sinfo)
+/* Read an LZW-compressed byte */
+{
+  register int code;            /* current working code */
+  int incode;                   /* saves actual input code */
+
+  /* If any codes are stacked from a previously read symbol, return them */
+  if (sinfo->sp > sinfo->symbol_stack)
+    return (int)(*(--sinfo->sp));
+
+  /* Time to read a new symbol */
+  code = GetCode(sinfo);
+
+  if (code == sinfo->clear_code) {
+    /* Reinit state, swallow any extra Clear codes, and */
+    /* return next code, which is expected to be a raw byte. */
+    ReInitLZW(sinfo);
+    do {
+      code = GetCode(sinfo);
+    } while (code == sinfo->clear_code);
+    if (code > sinfo->clear_code) { /* make sure it is a raw byte */
+      WARNMS(sinfo->cinfo, JWRN_GIF_BADDATA);
+      code = 0;                 /* use something valid */
+    }
+    /* make firstcode, oldcode valid! */
+    sinfo->firstcode = sinfo->oldcode = code;
+    return code;
+  }
+
+  if (code == sinfo->end_code) {
+    /* Skip the rest of the image, unless GetCode already read terminator */
+    if (!sinfo->out_of_blocks) {
+      SkipDataBlocks(sinfo);
+      sinfo->out_of_blocks = TRUE;
+    }
+    /* Complain that there's not enough data */
+    WARNMS(sinfo->cinfo, JWRN_GIF_ENDCODE);
+    /* Pad data with 0's */
+    return 0;                   /* fake something usable */
+  }
+
+  /* Got normal raw byte or LZW symbol */
+  incode = code;                /* save for a moment */
+
+  if (code >= sinfo->max_code) { /* special case for not-yet-defined symbol */
+    /* code == max_code is OK; anything bigger is bad data */
+    if (code > sinfo->max_code) {
+      WARNMS(sinfo->cinfo, JWRN_GIF_BADDATA);
+      incode = 0;               /* prevent creation of loops in symbol table */
+    }
+    /* this symbol will be defined as oldcode/firstcode */
+    *(sinfo->sp++) = (UINT8)sinfo->firstcode;
+    code = sinfo->oldcode;
+  }
+
+  /* If it's a symbol, expand it into the stack */
+  while (code >= sinfo->clear_code) {
+    *(sinfo->sp++) = sinfo->symbol_tail[code]; /* tail is a byte value */
+    code = sinfo->symbol_head[code]; /* head is another LZW symbol */
+  }
+  /* At this point code just represents a raw byte */
+  sinfo->firstcode = code;      /* save for possible future use */
+
+  /* If there's room in table... */
+  if ((code = sinfo->max_code) < LZW_TABLE_SIZE) {
+    /* Define a new symbol = prev sym + head of this sym's expansion */
+    sinfo->symbol_head[code] = (UINT16)sinfo->oldcode;
+    sinfo->symbol_tail[code] = (UINT8)sinfo->firstcode;
+    sinfo->max_code++;
+    /* Is it time to increase code_size? */
+    if (sinfo->max_code >= sinfo->limit_code &&
+        sinfo->code_size < MAX_LZW_BITS) {
+      sinfo->code_size++;
+      sinfo->limit_code <<= 1;  /* keep equal to 2^code_size */
+    }
+  }
+
+  sinfo->oldcode = incode;      /* save last input symbol for future use */
+  return sinfo->firstcode;      /* return first byte of symbol's expansion */
+}
+
+
+LOCAL(void)
+ReadColorMap(gif_source_ptr sinfo, int cmaplen, _JSAMPARRAY cmap)
+/* Read a GIF colormap */
+{
+  int i, gray = 1;
+
+  for (i = 0; i < cmaplen; i++) {
+#if BITS_IN_JSAMPLE == 8
+#define UPSCALE(x)  (x)
+#else
+#define UPSCALE(x)  ((x) << (BITS_IN_JSAMPLE - 8))
+#endif
+    cmap[CM_RED][i]   = (_JSAMPLE)UPSCALE(ReadByte(sinfo));
+    cmap[CM_GREEN][i] = (_JSAMPLE)UPSCALE(ReadByte(sinfo));
+    cmap[CM_BLUE][i]  = (_JSAMPLE)UPSCALE(ReadByte(sinfo));
+    if (cmap[CM_RED][i] != cmap[CM_GREEN][i] ||
+        cmap[CM_GREEN][i] != cmap[CM_BLUE][i])
+      gray = 0;
+  }
+
+  if (sinfo->cinfo->in_color_space == JCS_RGB && gray) {
+    sinfo->cinfo->in_color_space = JCS_GRAYSCALE;
+    sinfo->cinfo->input_components = 1;
+  }
+}
+
+
+LOCAL(void)
+DoExtension(gif_source_ptr sinfo)
+/* Process an extension block */
+/* Currently we ignore 'em all */
+{
+  int extlabel;
+
+  /* Read extension label byte */
+  extlabel = ReadByte(sinfo);
+  TRACEMS1(sinfo->cinfo, 1, JTRC_GIF_EXTENSION, extlabel);
+  /* Skip the data block(s) associated with the extension */
+  SkipDataBlocks(sinfo);
+}
+
+
+/*
+ * Read the file header; return image size and component count.
+ */
+
+METHODDEF(void)
+start_input_gif(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  gif_source_ptr source = (gif_source_ptr)sinfo;
+  U_CHAR hdrbuf[10];            /* workspace for reading control blocks */
+  unsigned int width, height;   /* image dimensions */
+  int colormaplen, aspectRatio;
+  int c;
+
+  /* Read and verify GIF Header */
+  if (!ReadOK(source->pub.input_file, hdrbuf, 6))
+    ERREXIT(cinfo, JERR_GIF_NOT);
+  if (hdrbuf[0] != 'G' || hdrbuf[1] != 'I' || hdrbuf[2] != 'F')
+    ERREXIT(cinfo, JERR_GIF_NOT);
+  /* Check for expected version numbers.
+   * If unknown version, give warning and try to process anyway;
+   * this is per recommendation in GIF89a standard.
+   */
+  if ((hdrbuf[3] != '8' || hdrbuf[4] != '7' || hdrbuf[5] != 'a') &&
+      (hdrbuf[3] != '8' || hdrbuf[4] != '9' || hdrbuf[5] != 'a'))
+    TRACEMS3(cinfo, 1, JTRC_GIF_BADVERSION, hdrbuf[3], hdrbuf[4], hdrbuf[5]);
+
+  /* Read and decipher Logical Screen Descriptor */
+  if (!ReadOK(source->pub.input_file, hdrbuf, 7))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  width = LM_to_uint(hdrbuf, 0);
+  height = LM_to_uint(hdrbuf, 2);
+  if (width == 0 || height == 0)
+    ERREXIT(cinfo, JERR_GIF_EMPTY);
+  if (sinfo->max_pixels &&
+      (unsigned long long)width * height > sinfo->max_pixels)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, sinfo->max_pixels);
+  /* we ignore the color resolution, sort flag, and background color index */
+  aspectRatio = UCH(hdrbuf[6]);
+  if (aspectRatio != 0 && aspectRatio != 49)
+    TRACEMS(cinfo, 1, JTRC_GIF_NONSQUARE);
+
+  /* Allocate space to store the colormap */
+  source->colormap = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+    ((j_common_ptr)cinfo, JPOOL_IMAGE, (JDIMENSION)MAXCOLORMAPSIZE,
+     (JDIMENSION)NUMCOLORS);
+  colormaplen = 0;              /* indicate initialization */
+
+  /* Read global colormap if header indicates it is present */
+  if (BitSet(hdrbuf[4], COLORMAPFLAG)) {
+    colormaplen = 2 << (hdrbuf[4] & 0x07);
+    ReadColorMap(source, colormaplen, source->colormap);
+  }
+
+  /* Scan until we reach start of desired image.
+   * We don't currently support skipping images, but could add it easily.
+   */
+  for (;;) {
+    c = ReadByte(source);
+
+    if (c == ';')               /* GIF terminator?? */
+      ERREXIT(cinfo, JERR_GIF_IMAGENOTFOUND);
+
+    if (c == '!') {             /* Extension */
+      DoExtension(source);
+      continue;
+    }
+
+    if (c != ',') {             /* Not an image separator? */
+      WARNMS1(cinfo, JWRN_GIF_CHAR, c);
+      continue;
+    }
+
+    /* Read and decipher Local Image Descriptor */
+    if (!ReadOK(source->pub.input_file, hdrbuf, 9))
+      ERREXIT(cinfo, JERR_INPUT_EOF);
+    /* we ignore top/left position info, also sort flag */
+    width = LM_to_uint(hdrbuf, 4);
+    height = LM_to_uint(hdrbuf, 6);
+    if (width == 0 || height == 0)
+      ERREXIT(cinfo, JERR_GIF_EMPTY);
+    if (sinfo->max_pixels &&
+        (unsigned long long)width * height > sinfo->max_pixels)
+      ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, sinfo->max_pixels);
+    source->is_interlaced = (BitSet(hdrbuf[8], INTERLACE) != 0);
+
+    /* Read local colormap if header indicates it is present */
+    /* Note: if we wanted to support skipping images, */
+    /* we'd need to skip rather than read colormap for ignored images */
+    if (BitSet(hdrbuf[8], COLORMAPFLAG)) {
+      colormaplen = 2 << (hdrbuf[8] & 0x07);
+      ReadColorMap(source, colormaplen, source->colormap);
+    }
+
+    source->input_code_size = ReadByte(source); /* get min-code-size byte */
+    if (source->input_code_size < 2 || source->input_code_size > 8)
+      ERREXIT1(cinfo, JERR_GIF_CODESIZE, source->input_code_size);
+
+    /* Reached desired image, so break out of loop */
+    /* If we wanted to skip this image, */
+    /* we'd call SkipDataBlocks and then continue the loop */
+    break;
+  }
+
+  /* Prepare to read selected image: first initialize LZW decompressor */
+  source->symbol_head = (UINT16 *)
+    (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                LZW_TABLE_SIZE * sizeof(UINT16));
+  source->symbol_tail = (UINT8 *)
+    (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                LZW_TABLE_SIZE * sizeof(UINT8));
+  source->symbol_stack = (UINT8 *)
+    (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                LZW_TABLE_SIZE * sizeof(UINT8));
+  InitLZWCode(source);
+
+  /*
+   * If image is interlaced, we read it into a full-size sample array,
+   * decompressing as we go; then get_interlaced_row selects rows from the
+   * sample array in the proper order.
+   */
+  if (source->is_interlaced) {
+    /* We request the virtual array now, but can't access it until virtual
+     * arrays have been allocated.  Hence, the actual work of reading the
+     * image is postponed until the first call to get_pixel_rows.
+     */
+    source->interlaced_image = (*cinfo->mem->request_virt_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE,
+       (JDIMENSION)width, (JDIMENSION)height, (JDIMENSION)1);
+    if (cinfo->progress != NULL) {
+      cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress;
+      progress->total_extra_passes++; /* count file input as separate pass */
+    }
+    source->pub.get_pixel_rows = load_interlaced_image;
+  } else {
+    source->pub.get_pixel_rows = get_pixel_rows;
+  }
+
+  if (cinfo->in_color_space != JCS_GRAYSCALE) {
+    cinfo->in_color_space = JCS_RGB;
+    cinfo->input_components = NUMCOLORS;
+  }
+
+  /* Create compressor input buffer. */
+  source->pub._buffer = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+    ((j_common_ptr)cinfo, JPOOL_IMAGE,
+     (JDIMENSION)width * cinfo->input_components, (JDIMENSION)1);
+  source->pub.buffer_height = 1;
+
+  /* Pad colormap for safety. */
+  for (c = colormaplen; c < source->clear_code; c++) {
+    source->colormap[CM_RED][c]   =
+    source->colormap[CM_GREEN][c] =
+    source->colormap[CM_BLUE][c]  = _CENTERJSAMPLE;
+  }
+
+  /* Return info about the image. */
+  cinfo->data_precision = BITS_IN_JSAMPLE; /* we always rescale data to this */
+  cinfo->image_width = width;
+  cinfo->image_height = height;
+
+  TRACEMS3(cinfo, 1, JTRC_GIF, width, height, colormaplen);
+}
+
+
+/*
+ * Read one row of pixels.
+ * This version is used for noninterlaced GIF images:
+ * we read directly from the GIF file.
+ */
+
+METHODDEF(JDIMENSION)
+get_pixel_rows(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  gif_source_ptr source = (gif_source_ptr)sinfo;
+  register int c;
+  register _JSAMPROW ptr;
+  register JDIMENSION col;
+  register _JSAMPARRAY colormap = source->colormap;
+
+  ptr = source->pub._buffer[0];
+  if (cinfo->in_color_space == JCS_GRAYSCALE) {
+    for (col = cinfo->image_width; col > 0; col--) {
+      c = LZWReadByte(source);
+      *ptr++ = colormap[CM_RED][c];
+    }
+  } else {
+    for (col = cinfo->image_width; col > 0; col--) {
+      c = LZWReadByte(source);
+      *ptr++ = colormap[CM_RED][c];
+      *ptr++ = colormap[CM_GREEN][c];
+      *ptr++ = colormap[CM_BLUE][c];
+    }
+  }
+  return 1;
+}
+
+
+/*
+ * Read one row of pixels.
+ * This version is used for the first call on get_pixel_rows when
+ * reading an interlaced GIF file: we read the whole image into memory.
+ */
+
+METHODDEF(JDIMENSION)
+load_interlaced_image(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  gif_source_ptr source = (gif_source_ptr)sinfo;
+  register _JSAMPROW sptr;
+  register JDIMENSION col;
+  JDIMENSION row;
+  cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress;
+
+  /* Read the interlaced image into the virtual array we've created. */
+  for (row = 0; row < cinfo->image_height; row++) {
+    if (progress != NULL) {
+      progress->pub.pass_counter = (long)row;
+      progress->pub.pass_limit = (long)cinfo->image_height;
+      (*progress->pub.progress_monitor) ((j_common_ptr)cinfo);
+    }
+    sptr = *(_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
+      ((j_common_ptr)cinfo, source->interlaced_image, row, (JDIMENSION)1,
+       TRUE);
+    for (col = cinfo->image_width; col > 0; col--) {
+      *sptr++ = (_JSAMPLE)LZWReadByte(source);
+    }
+  }
+  if (progress != NULL)
+    progress->completed_extra_passes++;
+
+  /* Replace method pointer so subsequent calls don't come here. */
+  source->pub.get_pixel_rows = get_interlaced_row;
+  /* Initialize for get_interlaced_row, and perform first call on it. */
+  source->cur_row_number = 0;
+  source->pass2_offset = (cinfo->image_height + 7) / 8;
+  source->pass3_offset = source->pass2_offset + (cinfo->image_height + 3) / 8;
+  source->pass4_offset = source->pass3_offset + (cinfo->image_height + 1) / 4;
+
+  return get_interlaced_row(cinfo, sinfo);
+}
+
+
+/*
+ * Read one row of pixels.
+ * This version is used for interlaced GIF images:
+ * we read from the virtual array.
+ */
+
+METHODDEF(JDIMENSION)
+get_interlaced_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  gif_source_ptr source = (gif_source_ptr)sinfo;
+  register int c;
+  register _JSAMPROW sptr, ptr;
+  register JDIMENSION col;
+  register _JSAMPARRAY colormap = source->colormap;
+  JDIMENSION irow;
+
+  /* Figure out which row of interlaced image is needed, and access it. */
+  switch ((int)(source->cur_row_number & 7)) {
+  case 0:                       /* first-pass row */
+    irow = source->cur_row_number >> 3;
+    break;
+  case 4:                       /* second-pass row */
+    irow = (source->cur_row_number >> 3) + source->pass2_offset;
+    break;
+  case 2:                       /* third-pass row */
+  case 6:
+    irow = (source->cur_row_number >> 2) + source->pass3_offset;
+    break;
+  default:                      /* fourth-pass row */
+    irow = (source->cur_row_number >> 1) + source->pass4_offset;
+  }
+  sptr = *(_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
+    ((j_common_ptr)cinfo, source->interlaced_image, irow, (JDIMENSION)1,
+     FALSE);
+  /* Scan the row, expand colormap, and output */
+  ptr = source->pub._buffer[0];
+  if (cinfo->in_color_space == JCS_GRAYSCALE) {
+    for (col = cinfo->image_width; col > 0; col--) {
+      c = *sptr++;
+      *ptr++ = colormap[CM_RED][c];
+    }
+  } else {
+    for (col = cinfo->image_width; col > 0; col--) {
+      c = *sptr++;
+      *ptr++ = colormap[CM_RED][c];
+      *ptr++ = colormap[CM_GREEN][c];
+      *ptr++ = colormap[CM_BLUE][c];
+    }
+  }
+  source->cur_row_number++;     /* for next time */
+  return 1;
+}
+
+
+/*
+ * Finish up at the end of the file.
+ */
+
+METHODDEF(void)
+finish_input_gif(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  /* no work */
+}
+
+
+/*
+ * The module selection routine for GIF format input.
+ */
+
+GLOBAL(cjpeg_source_ptr)
+_jinit_read_gif(j_compress_ptr cinfo)
+{
+  gif_source_ptr source;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Create module interface object */
+  source = (gif_source_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(gif_source_struct));
+  source->cinfo = cinfo;        /* make back link for subroutines */
+  /* Fill in method ptrs, except get_pixel_rows which start_input sets */
+  source->pub.start_input = start_input_gif;
+  source->pub.finish_input = finish_input_gif;
+  source->pub.max_pixels = 0;
+
+  return (cjpeg_source_ptr)source;
+}
+
+#endif /* defined(GIF_SUPPORTED) &&
+          (BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)) */
diff --git a/3rdparty/libjpeg-turbo/src/rdjpgcom.c b/3rdparty/libjpeg-turbo/src/rdjpgcom.c
new file mode 100644
index 0000000000..d9a6f85a38
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/rdjpgcom.c
@@ -0,0 +1,493 @@
+/*
+ * rdjpgcom.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 2009 by Bill Allombert, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains a very simple stand-alone application that displays
+ * the text in COM (comment) markers in a JFIF file.
+ * This may be useful as an example of the minimum logic needed to parse
+ * JPEG markers.
+ */
+
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+#define JPEG_CJPEG_DJPEG        /* to get the command-line config symbols */
+#include "jinclude.h"           /* get auto-config symbols, <stdio.h> */
+
+#include <locale.h>             /* Bill Allombert: use locale for isprint */
+#include <ctype.h>              /* to declare isupper(), tolower() */
+#ifdef USE_SETMODE
+#include <fcntl.h>              /* to declare setmode()'s parameter macros */
+/* If you have setmode() but not <io.h>, just delete this line: */
+#include <io.h>                 /* to declare setmode() */
+#endif
+
+#ifdef DONT_USE_B_MODE          /* define mode parameters for fopen() */
+#define READ_BINARY     "r"
+#else
+#define READ_BINARY     "rb"
+#endif
+
+#ifndef EXIT_FAILURE            /* define exit() codes if not provided */
+#define EXIT_FAILURE  1
+#endif
+#ifndef EXIT_SUCCESS
+#define EXIT_SUCCESS  0
+#endif
+
+
+/*
+ * These macros are used to read the input file.
+ * To reuse this code in another application, you might need to change these.
+ */
+
+static FILE *infile;            /* input JPEG file */
+
+/* Return next input byte, or EOF if no more */
+#define NEXTBYTE()  getc(infile)
+
+
+/* Error exit handler */
+#define ERREXIT(msg)  (fprintf(stderr, "%s\n", msg), exit(EXIT_FAILURE))
+
+
+/* Read one byte, testing for EOF */
+static int
+read_1_byte(void)
+{
+  int c;
+
+  c = NEXTBYTE();
+  if (c == EOF)
+    ERREXIT("Premature EOF in JPEG file");
+  return c;
+}
+
+/* Read 2 bytes, convert to unsigned int */
+/* All 2-byte quantities in JPEG markers are MSB first */
+static unsigned int
+read_2_bytes(void)
+{
+  int c1, c2;
+
+  c1 = NEXTBYTE();
+  if (c1 == EOF)
+    ERREXIT("Premature EOF in JPEG file");
+  c2 = NEXTBYTE();
+  if (c2 == EOF)
+    ERREXIT("Premature EOF in JPEG file");
+  return (((unsigned int)c1) << 8) + ((unsigned int)c2);
+}
+
+
+/*
+ * JPEG markers consist of one or more 0xFF bytes, followed by a marker
+ * code byte (which is not an FF).  Here are the marker codes of interest
+ * in this program.  (See jdmarker.c for a more complete list.)
+ */
+
+#define M_SOF0   0xC0           /* Start Of Frame N */
+#define M_SOF1   0xC1           /* N indicates which compression process */
+#define M_SOF2   0xC2           /* Only SOF0-SOF2 are now in common use */
+#define M_SOF3   0xC3
+#define M_SOF5   0xC5           /* NB: codes C4 and CC are NOT SOF markers */
+#define M_SOF6   0xC6
+#define M_SOF7   0xC7
+#define M_SOF9   0xC9
+#define M_SOF10  0xCA
+#define M_SOF11  0xCB
+#define M_SOF13  0xCD
+#define M_SOF14  0xCE
+#define M_SOF15  0xCF
+#define M_SOI    0xD8           /* Start Of Image (beginning of datastream) */
+#define M_EOI    0xD9           /* End Of Image (end of datastream) */
+#define M_SOS    0xDA           /* Start Of Scan (begins compressed data) */
+#define M_APP12  0xEC           /* (we don't bother to list all 16 APPn's) */
+#define M_COM    0xFE           /* COMment */
+
+
+/*
+ * Find the next JPEG marker and return its marker code.
+ * We expect at least one FF byte, possibly more if the compressor used FFs
+ * to pad the file.
+ * There could also be non-FF garbage between markers.  The treatment of such
+ * garbage is unspecified; we choose to skip over it but emit a warning msg.
+ * NB: this routine must not be used after seeing SOS marker, since it will
+ * not deal correctly with FF/00 sequences in the compressed image data...
+ */
+
+static int
+next_marker(void)
+{
+  int c;
+  int discarded_bytes = 0;
+
+  /* Find 0xFF byte; count and skip any non-FFs. */
+  c = read_1_byte();
+  while (c != 0xFF) {
+    discarded_bytes++;
+    c = read_1_byte();
+  }
+  /* Get marker code byte, swallowing any duplicate FF bytes.  Extra FFs
+   * are legal as pad bytes, so don't count them in discarded_bytes.
+   */
+  do {
+    c = read_1_byte();
+  } while (c == 0xFF);
+
+  if (discarded_bytes != 0) {
+    fprintf(stderr, "Warning: garbage data found in JPEG file\n");
+  }
+
+  return c;
+}
+
+
+/*
+ * Read the initial marker, which should be SOI.
+ * For a JFIF file, the first two bytes of the file should be literally
+ * 0xFF M_SOI.  To be more general, we could use next_marker, but if the
+ * input file weren't actually JPEG at all, next_marker might read the whole
+ * file and then return a misleading error message...
+ */
+
+static int
+first_marker(void)
+{
+  int c1, c2;
+
+  c1 = NEXTBYTE();
+  c2 = NEXTBYTE();
+  if (c1 != 0xFF || c2 != M_SOI)
+    ERREXIT("Not a JPEG file");
+  return c2;
+}
+
+
+/*
+ * Most types of marker are followed by a variable-length parameter segment.
+ * This routine skips over the parameters for any marker we don't otherwise
+ * want to process.
+ * Note that we MUST skip the parameter segment explicitly in order not to
+ * be fooled by 0xFF bytes that might appear within the parameter segment;
+ * such bytes do NOT introduce new markers.
+ */
+
+static void
+skip_variable(void)
+/* Skip over an unknown or uninteresting variable-length marker */
+{
+  unsigned int length;
+
+  /* Get the marker parameter length count */
+  length = read_2_bytes();
+  /* Length includes itself, so must be at least 2 */
+  if (length < 2)
+    ERREXIT("Erroneous JPEG marker length");
+  length -= 2;
+  /* Skip over the remaining bytes */
+  while (length > 0) {
+    (void)read_1_byte();
+    length--;
+  }
+}
+
+
+/*
+ * Process a COM marker.
+ * We want to print out the marker contents as legible text;
+ * we must guard against non-text junk and varying newline representations.
+ */
+
+static void
+process_COM(int raw)
+{
+  unsigned int length;
+  int ch;
+  int lastch = 0;
+
+  /* Bill Allombert: set locale properly for isprint */
+  setlocale(LC_CTYPE, "");
+
+  /* Get the marker parameter length count */
+  length = read_2_bytes();
+  /* Length includes itself, so must be at least 2 */
+  if (length < 2)
+    ERREXIT("Erroneous JPEG marker length");
+  length -= 2;
+
+  while (length > 0) {
+    ch = read_1_byte();
+    if (raw) {
+      putc(ch, stdout);
+    /* Emit the character in a readable form.
+     * Nonprintables are converted to \nnn form,
+     * while \ is converted to \\.
+     * Newlines in CR, CR/LF, or LF form will be printed as one newline.
+     */
+    } else if (ch == '\r') {
+      printf("\n");
+    } else if (ch == '\n') {
+      if (lastch != '\r')
+        printf("\n");
+    } else if (ch == '\\') {
+      printf("\\\\");
+    } else if (isprint(ch)) {
+      putc(ch, stdout);
+    } else {
+      printf("\\%03o", (unsigned int)ch);
+    }
+    lastch = ch;
+    length--;
+  }
+  printf("\n");
+
+  /* Bill Allombert: revert to C locale */
+  setlocale(LC_CTYPE, "C");
+}
+
+
+/*
+ * Process a SOFn marker.
+ * This code is only needed if you want to know the image dimensions...
+ */
+
+static void
+process_SOFn(int marker)
+{
+  unsigned int length;
+  unsigned int image_height, image_width;
+  int data_precision, num_components;
+  const char *process;
+  int ci;
+
+  length = read_2_bytes();      /* usual parameter length count */
+
+  data_precision = read_1_byte();
+  image_height = read_2_bytes();
+  image_width = read_2_bytes();
+  num_components = read_1_byte();
+
+  switch (marker) {
+  case M_SOF0:  process = "Baseline";  break;
+  case M_SOF1:  process = "Extended sequential";  break;
+  case M_SOF2:  process = "Progressive";  break;
+  case M_SOF3:  process = "Lossless";  break;
+  case M_SOF5:  process = "Differential sequential";  break;
+  case M_SOF6:  process = "Differential progressive";  break;
+  case M_SOF7:  process = "Differential lossless";  break;
+  case M_SOF9:  process = "Extended sequential, arithmetic coding";  break;
+  case M_SOF10: process = "Progressive, arithmetic coding";  break;
+  case M_SOF11: process = "Lossless, arithmetic coding";  break;
+  case M_SOF13: process = "Differential sequential, arithmetic coding";  break;
+  case M_SOF14:
+    process = "Differential progressive, arithmetic coding";  break;
+  case M_SOF15: process = "Differential lossless, arithmetic coding";  break;
+  default:      process = "Unknown";  break;
+  }
+
+  printf("JPEG image is %uw * %uh, %d color components, %d bits per sample\n",
+         image_width, image_height, num_components, data_precision);
+  printf("JPEG process: %s\n", process);
+
+  if (length != (unsigned int)(8 + num_components * 3))
+    ERREXIT("Bogus SOF marker length");
+
+  for (ci = 0; ci < num_components; ci++) {
+    (void)read_1_byte();        /* Component ID code */
+    (void)read_1_byte();        /* H, V sampling factors */
+    (void)read_1_byte();        /* Quantization table number */
+  }
+}
+
+
+/*
+ * Parse the marker stream until SOS or EOI is seen;
+ * display any COM markers.
+ * While the companion program wrjpgcom will always insert COM markers before
+ * SOFn, other implementations might not, so we scan to SOS before stopping.
+ * If we were only interested in the image dimensions, we would stop at SOFn.
+ * (Conversely, if we only cared about COM markers, there would be no need
+ * for special code to handle SOFn; we could treat it like other markers.)
+ */
+
+static int
+scan_JPEG_header(int verbose, int raw)
+{
+  int marker;
+
+  /* Expect SOI at start of file */
+  if (first_marker() != M_SOI)
+    ERREXIT("Expected SOI marker first");
+
+  /* Scan miscellaneous markers until we reach SOS. */
+  for (;;) {
+    marker = next_marker();
+    switch (marker) {
+      /* Note that marker codes 0xC4, 0xC8, 0xCC are not, and must not be,
+       * treated as SOFn.  C4 in particular is actually DHT.
+       */
+    case M_SOF0:                /* Baseline */
+    case M_SOF1:                /* Extended sequential, Huffman */
+    case M_SOF2:                /* Progressive, Huffman */
+    case M_SOF3:                /* Lossless, Huffman */
+    case M_SOF5:                /* Differential sequential, Huffman */
+    case M_SOF6:                /* Differential progressive, Huffman */
+    case M_SOF7:                /* Differential lossless, Huffman */
+    case M_SOF9:                /* Extended sequential, arithmetic */
+    case M_SOF10:               /* Progressive, arithmetic */
+    case M_SOF11:               /* Lossless, arithmetic */
+    case M_SOF13:               /* Differential sequential, arithmetic */
+    case M_SOF14:               /* Differential progressive, arithmetic */
+    case M_SOF15:               /* Differential lossless, arithmetic */
+      if (verbose)
+        process_SOFn(marker);
+      else
+        skip_variable();
+      break;
+
+    case M_SOS:                 /* stop before hitting compressed data */
+      return marker;
+
+    case M_EOI:                 /* in case it's a tables-only JPEG stream */
+      return marker;
+
+    case M_COM:
+      process_COM(raw);
+      break;
+
+    case M_APP12:
+      /* Some digital camera makers put useful textual information into
+       * APP12 markers, so we print those out too when in -verbose mode.
+       */
+      if (verbose) {
+        printf("APP12 contains:\n");
+        process_COM(raw);
+      } else
+        skip_variable();
+      break;
+
+    default:                    /* Anything else just gets skipped */
+      skip_variable();          /* we assume it has a parameter count... */
+      break;
+    }
+  } /* end loop */
+}
+
+
+/* Command line parsing code */
+
+static const char *progname;    /* program name for error messages */
+
+
+static void
+usage(void)
+/* complain about bad command line */
+{
+  fprintf(stderr, "rdjpgcom displays any textual comments in a JPEG file.\n");
+
+  fprintf(stderr, "Usage: %s [switches] [inputfile]\n", progname);
+
+  fprintf(stderr, "Switches (names may be abbreviated):\n");
+  fprintf(stderr, "  -raw        Display non-printable characters in comments (unsafe)\n");
+  fprintf(stderr, "  -verbose    Also display dimensions of JPEG image\n");
+
+  exit(EXIT_FAILURE);
+}
+
+
+static int
+keymatch(char *arg, const char *keyword, int minchars)
+/* Case-insensitive matching of (possibly abbreviated) keyword switches. */
+/* keyword is the constant keyword (must be lower case already), */
+/* minchars is length of minimum legal abbreviation. */
+{
+  register int ca, ck;
+  register int nmatched = 0;
+
+  while ((ca = *arg++) != '\0') {
+    if ((ck = *keyword++) == '\0')
+      return 0;                 /* arg longer than keyword, no good */
+    if (isupper(ca))            /* force arg to lcase (assume ck is already) */
+      ca = tolower(ca);
+    if (ca != ck)
+      return 0;                 /* no good */
+    nmatched++;                 /* count matched characters */
+  }
+  /* reached end of argument; fail if it's too short for unique abbrev */
+  if (nmatched < minchars)
+    return 0;
+  return 1;                     /* A-OK */
+}
+
+
+/*
+ * The main program.
+ */
+
+int
+main(int argc, char **argv)
+{
+  int argn;
+  char *arg;
+  int verbose = 0, raw = 0;
+
+  progname = argv[0];
+  if (progname == NULL || progname[0] == 0)
+    progname = "rdjpgcom";      /* in case C library doesn't provide it */
+
+  /* Parse switches, if any */
+  for (argn = 1; argn < argc; argn++) {
+    arg = argv[argn];
+    if (arg[0] != '-')
+      break;                    /* not switch, must be file name */
+    arg++;                      /* advance over '-' */
+    if (keymatch(arg, "verbose", 1)) {
+      verbose++;
+    } else if (keymatch(arg, "raw", 1)) {
+      raw = 1;
+    } else
+      usage();
+  }
+
+  /* Open the input file. */
+  /* Unix style: expect zero or one file name */
+  if (argn < argc - 1) {
+    fprintf(stderr, "%s: only one input file\n", progname);
+    usage();
+  }
+  if (argn < argc) {
+    if ((infile = fopen(argv[argn], READ_BINARY)) == NULL) {
+      fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+    /* default input file is stdin */
+#ifdef USE_SETMODE              /* need to hack file mode? */
+    setmode(fileno(stdin), O_BINARY);
+#endif
+#ifdef USE_FDOPEN               /* need to re-open in binary mode? */
+    if ((infile = fdopen(fileno(stdin), READ_BINARY)) == NULL) {
+      fprintf(stderr, "%s: can't open stdin\n", progname);
+      exit(EXIT_FAILURE);
+    }
+#else
+    infile = stdin;
+#endif
+  }
+
+  /* Scan the JPEG headers. */
+  (void)scan_JPEG_header(verbose, raw);
+
+  /* All done. */
+  exit(EXIT_SUCCESS);
+  return 0;                     /* suppress no-return-value warnings */
+}
diff --git a/3rdparty/libjpeg-turbo/src/rdppm.c b/3rdparty/libjpeg-turbo/src/rdppm.c
new file mode 100644
index 0000000000..84e26f7b3f
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/rdppm.c
@@ -0,0 +1,890 @@
+/*
+ * rdppm.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2009 by Bill Allombert, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015-2017, 2020-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains routines to read input images in PPM/PGM format.
+ * The extended 2-byte-per-sample raw PPM/PGM formats are supported.
+ * The PBMPLUS library is NOT required to compile this software
+ * (but it is highly useful as a set of PPM image manipulation programs).
+ *
+ * These routines may need modification for non-Unix environments or
+ * specialized applications.  As they stand, they assume input from
+ * an ordinary stdio stream.  They further assume that reading begins
+ * at the start of the file; start_input may need work if the
+ * user interface has already read some data (e.g., to determine that
+ * the file is indeed PPM format).
+ */
+
+#include "cmyk.h"
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+
+#if defined(PPM_SUPPORTED) && \
+    (BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED))
+
+
+/* Portions of this code are based on the PBMPLUS library, which is:
+**
+** Copyright (C) 1988 by Jef Poskanzer.
+**
+** Permission to use, copy, modify, and distribute this software and its
+** documentation for any purpose and without fee is hereby granted, provided
+** that the above copyright notice appear in all copies and that both that
+** copyright notice and this permission notice appear in supporting
+** documentation.  This software is provided "as is" without express or
+** implied warranty.
+*/
+
+
+/* Macros to deal with unsigned chars as efficiently as compiler allows */
+
+typedef unsigned char U_CHAR;
+#define UCH(x)  ((int)(x))
+
+
+#define ReadOK(file, buffer, len) \
+  (fread(buffer, 1, len, file) == ((size_t)(len)))
+
+static int alpha_index[JPEG_NUMCS] = {
+  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1
+};
+
+
+/* Private version of data source object */
+
+typedef struct {
+  struct cjpeg_source_struct pub; /* public fields */
+
+  /* Usually these two pointers point to the same place: */
+  U_CHAR *iobuffer;             /* fread's I/O buffer */
+  _JSAMPROW pixrow;             /* compressor input buffer */
+  size_t buffer_width;          /* width of I/O buffer */
+  _JSAMPLE *rescale;            /* => maxval-remapping array, or NULL */
+  unsigned int maxval;
+} ppm_source_struct;
+
+typedef ppm_source_struct *ppm_source_ptr;
+
+
+LOCAL(int)
+pbm_getc(FILE *infile)
+/* Read next char, skipping over any comments */
+/* A comment/newline sequence is returned as a newline */
+{
+  register int ch;
+
+  ch = getc(infile);
+  if (ch == '#') {
+    do {
+      ch = getc(infile);
+    } while (ch != '\n' && ch != EOF);
+  }
+  return ch;
+}
+
+
+LOCAL(unsigned int)
+read_pbm_integer(j_compress_ptr cinfo, FILE *infile, unsigned int maxval)
+/* Read an unsigned decimal integer from the PPM file */
+/* Swallows one trailing character after the integer */
+/* Note that on a 16-bit-int machine, only values up to 64k can be read. */
+/* This should not be a problem in practice. */
+{
+  register int ch;
+  register unsigned int val;
+
+  /* Skip any leading whitespace */
+  do {
+    ch = pbm_getc(infile);
+    if (ch == EOF)
+      ERREXIT(cinfo, JERR_INPUT_EOF);
+  } while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
+
+  if (ch < '0' || ch > '9')
+    ERREXIT(cinfo, JERR_PPM_NONNUMERIC);
+
+  val = ch - '0';
+  while ((ch = pbm_getc(infile)) >= '0' && ch <= '9') {
+    val *= 10;
+    val += ch - '0';
+    if (val > maxval)
+      ERREXIT(cinfo, JERR_PPM_OUTOFRANGE);
+  }
+
+  return val;
+}
+
+
+/*
+ * Read one row of pixels.
+ *
+ * We provide several different versions depending on input file format.
+ * In all cases, input is scaled to the size of _JSAMPLE.
+ *
+ * A really fast path is provided for reading byte/sample raw files with
+ * maxval = _MAXJSAMPLE, which is the normal case for 8-bit data.
+ */
+
+
+METHODDEF(JDIMENSION)
+get_text_gray_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading text-format PGM files with any maxval */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  FILE *infile = source->pub.input_file;
+  register _JSAMPROW ptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+
+  ptr = source->pub._buffer[0];
+  for (col = cinfo->image_width; col > 0; col--) {
+    *ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)];
+  }
+  return 1;
+}
+
+
+#define GRAY_RGB_READ_LOOP(read_op, alpha_set_op) { \
+  for (col = cinfo->image_width; col > 0; col--) { \
+    ptr[rindex] = ptr[gindex] = ptr[bindex] = read_op; \
+    alpha_set_op \
+    ptr += ps; \
+  } \
+}
+
+METHODDEF(JDIMENSION)
+get_text_gray_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading text-format PGM files with any maxval and
+   converting to extended RGB */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  FILE *infile = source->pub.input_file;
+  register _JSAMPROW ptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+  register int rindex = rgb_red[cinfo->in_color_space];
+  register int gindex = rgb_green[cinfo->in_color_space];
+  register int bindex = rgb_blue[cinfo->in_color_space];
+  register int aindex = alpha_index[cinfo->in_color_space];
+  register int ps = rgb_pixelsize[cinfo->in_color_space];
+
+  ptr = source->pub._buffer[0];
+  if (maxval == _MAXJSAMPLE) {
+    if (aindex >= 0)
+      GRAY_RGB_READ_LOOP((_JSAMPLE)read_pbm_integer(cinfo, infile, maxval),
+                         ptr[aindex] = _MAXJSAMPLE;)
+    else
+      GRAY_RGB_READ_LOOP((_JSAMPLE)read_pbm_integer(cinfo, infile, maxval), {})
+  } else {
+    if (aindex >= 0)
+      GRAY_RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)],
+                         ptr[aindex] = _MAXJSAMPLE;)
+    else
+      GRAY_RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)], {})
+  }
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_text_gray_cmyk_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading text-format PGM files with any maxval and
+   converting to CMYK */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  FILE *infile = source->pub.input_file;
+  register _JSAMPROW ptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+
+  ptr = source->pub._buffer[0];
+  if (maxval == _MAXJSAMPLE) {
+    for (col = cinfo->image_width; col > 0; col--) {
+      _JSAMPLE gray = (_JSAMPLE)read_pbm_integer(cinfo, infile, maxval);
+      rgb_to_cmyk(gray, gray, gray, ptr, ptr + 1, ptr + 2, ptr + 3);
+      ptr += 4;
+    }
+  } else {
+    for (col = cinfo->image_width; col > 0; col--) {
+      _JSAMPLE gray = rescale[read_pbm_integer(cinfo, infile, maxval)];
+      rgb_to_cmyk(gray, gray, gray, ptr, ptr + 1, ptr + 2, ptr + 3);
+      ptr += 4;
+    }
+  }
+  return 1;
+}
+
+
+#define RGB_READ_LOOP(read_op, alpha_set_op) { \
+  for (col = cinfo->image_width; col > 0; col--) { \
+    ptr[rindex] = read_op; \
+    ptr[gindex] = read_op; \
+    ptr[bindex] = read_op; \
+    alpha_set_op \
+    ptr += ps; \
+  } \
+}
+
+METHODDEF(JDIMENSION)
+get_text_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading text-format PPM files with any maxval */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  FILE *infile = source->pub.input_file;
+  register _JSAMPROW ptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+  register int rindex = rgb_red[cinfo->in_color_space];
+  register int gindex = rgb_green[cinfo->in_color_space];
+  register int bindex = rgb_blue[cinfo->in_color_space];
+  register int aindex = alpha_index[cinfo->in_color_space];
+  register int ps = rgb_pixelsize[cinfo->in_color_space];
+
+  ptr = source->pub._buffer[0];
+  if (maxval == _MAXJSAMPLE) {
+    if (aindex >= 0)
+      RGB_READ_LOOP((_JSAMPLE)read_pbm_integer(cinfo, infile, maxval),
+                    ptr[aindex] = _MAXJSAMPLE;)
+    else
+      RGB_READ_LOOP((_JSAMPLE)read_pbm_integer(cinfo, infile, maxval), {})
+  } else {
+    if (aindex >= 0)
+      RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)],
+                    ptr[aindex] = _MAXJSAMPLE;)
+    else
+      RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)], {})
+  }
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_text_rgb_cmyk_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading text-format PPM files with any maxval and
+   converting to CMYK */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  FILE *infile = source->pub.input_file;
+  register _JSAMPROW ptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+
+  ptr = source->pub._buffer[0];
+  if (maxval == _MAXJSAMPLE) {
+    for (col = cinfo->image_width; col > 0; col--) {
+      _JSAMPLE r = (_JSAMPLE)read_pbm_integer(cinfo, infile, maxval);
+      _JSAMPLE g = (_JSAMPLE)read_pbm_integer(cinfo, infile, maxval);
+      _JSAMPLE b = (_JSAMPLE)read_pbm_integer(cinfo, infile, maxval);
+      rgb_to_cmyk(r, g, b, ptr, ptr + 1, ptr + 2, ptr + 3);
+      ptr += 4;
+    }
+  } else {
+    for (col = cinfo->image_width; col > 0; col--) {
+      _JSAMPLE r = rescale[read_pbm_integer(cinfo, infile, maxval)];
+      _JSAMPLE g = rescale[read_pbm_integer(cinfo, infile, maxval)];
+      _JSAMPLE b = rescale[read_pbm_integer(cinfo, infile, maxval)];
+      rgb_to_cmyk(r, g, b, ptr, ptr + 1, ptr + 2, ptr + 3);
+      ptr += 4;
+    }
+  }
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_scaled_gray_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading raw-byte-format PGM files with any maxval */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  register _JSAMPROW ptr;
+  register U_CHAR *bufferptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+
+  if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  ptr = source->pub._buffer[0];
+  bufferptr = source->iobuffer;
+  for (col = cinfo->image_width; col > 0; col--) {
+    *ptr++ = rescale[UCH(*bufferptr++)];
+  }
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_gray_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading raw-byte-format PGM files with any maxval
+   and converting to extended RGB */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  register _JSAMPROW ptr;
+  register U_CHAR *bufferptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+  register int rindex = rgb_red[cinfo->in_color_space];
+  register int gindex = rgb_green[cinfo->in_color_space];
+  register int bindex = rgb_blue[cinfo->in_color_space];
+  register int aindex = alpha_index[cinfo->in_color_space];
+  register int ps = rgb_pixelsize[cinfo->in_color_space];
+
+  if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  ptr = source->pub._buffer[0];
+  bufferptr = source->iobuffer;
+  if (maxval == _MAXJSAMPLE) {
+    if (aindex >= 0)
+      GRAY_RGB_READ_LOOP(*bufferptr++, ptr[aindex] = _MAXJSAMPLE;)
+    else
+      GRAY_RGB_READ_LOOP(*bufferptr++, {})
+  } else {
+    if (aindex >= 0)
+      GRAY_RGB_READ_LOOP(rescale[UCH(*bufferptr++)],
+                         ptr[aindex] = _MAXJSAMPLE;)
+    else
+      GRAY_RGB_READ_LOOP(rescale[UCH(*bufferptr++)], {})
+  }
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_gray_cmyk_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading raw-byte-format PGM files with any maxval
+   and converting to CMYK */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  register _JSAMPROW ptr;
+  register U_CHAR *bufferptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+
+  if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  ptr = source->pub._buffer[0];
+  bufferptr = source->iobuffer;
+  if (maxval == _MAXJSAMPLE) {
+    for (col = cinfo->image_width; col > 0; col--) {
+      _JSAMPLE gray = *bufferptr++;
+      rgb_to_cmyk(gray, gray, gray, ptr, ptr + 1, ptr + 2, ptr + 3);
+      ptr += 4;
+    }
+  } else {
+    for (col = cinfo->image_width; col > 0; col--) {
+      _JSAMPLE gray = rescale[UCH(*bufferptr++)];
+      rgb_to_cmyk(gray, gray, gray, ptr, ptr + 1, ptr + 2, ptr + 3);
+      ptr += 4;
+    }
+  }
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading raw-byte-format PPM files with any maxval */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  register _JSAMPROW ptr;
+  register U_CHAR *bufferptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+  register int rindex = rgb_red[cinfo->in_color_space];
+  register int gindex = rgb_green[cinfo->in_color_space];
+  register int bindex = rgb_blue[cinfo->in_color_space];
+  register int aindex = alpha_index[cinfo->in_color_space];
+  register int ps = rgb_pixelsize[cinfo->in_color_space];
+
+  if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  ptr = source->pub._buffer[0];
+  bufferptr = source->iobuffer;
+  if (maxval == _MAXJSAMPLE) {
+    if (aindex >= 0)
+      RGB_READ_LOOP(*bufferptr++, ptr[aindex] = _MAXJSAMPLE;)
+    else
+      RGB_READ_LOOP(*bufferptr++, {})
+  } else {
+    if (aindex >= 0)
+      RGB_READ_LOOP(rescale[UCH(*bufferptr++)], ptr[aindex] = _MAXJSAMPLE;)
+    else
+      RGB_READ_LOOP(rescale[UCH(*bufferptr++)], {})
+  }
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_rgb_cmyk_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading raw-byte-format PPM files with any maxval and
+   converting to CMYK */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  register _JSAMPROW ptr;
+  register U_CHAR *bufferptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+
+  if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  ptr = source->pub._buffer[0];
+  bufferptr = source->iobuffer;
+  if (maxval == _MAXJSAMPLE) {
+    for (col = cinfo->image_width; col > 0; col--) {
+      _JSAMPLE r = *bufferptr++;
+      _JSAMPLE g = *bufferptr++;
+      _JSAMPLE b = *bufferptr++;
+      rgb_to_cmyk(r, g, b, ptr, ptr + 1, ptr + 2, ptr + 3);
+      ptr += 4;
+    }
+  } else {
+    for (col = cinfo->image_width; col > 0; col--) {
+      _JSAMPLE r = rescale[UCH(*bufferptr++)];
+      _JSAMPLE g = rescale[UCH(*bufferptr++)];
+      _JSAMPLE b = rescale[UCH(*bufferptr++)];
+      rgb_to_cmyk(r, g, b, ptr, ptr + 1, ptr + 2, ptr + 3);
+      ptr += 4;
+    }
+  }
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_raw_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading raw-byte-format files with maxval = _MAXJSAMPLE.
+ * In this case we just read right into the _JSAMPLE buffer!
+ * Note that same code works for PPM and PGM files.
+ */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+
+  if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_word_gray_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading raw-word-format PGM files with any maxval */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  register _JSAMPROW ptr;
+  register U_CHAR *bufferptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+
+  if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  ptr = source->pub._buffer[0];
+  bufferptr = source->iobuffer;
+  for (col = cinfo->image_width; col > 0; col--) {
+    register unsigned int temp;
+    temp  = UCH(*bufferptr++) << 8;
+    temp |= UCH(*bufferptr++);
+    if (temp > maxval)
+      ERREXIT(cinfo, JERR_PPM_OUTOFRANGE);
+    *ptr++ = rescale[temp];
+  }
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_word_gray_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading raw-word-format PGM files with any maxval */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  register _JSAMPROW ptr;
+  register U_CHAR *bufferptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+  register int rindex = rgb_red[cinfo->in_color_space];
+  register int gindex = rgb_green[cinfo->in_color_space];
+  register int bindex = rgb_blue[cinfo->in_color_space];
+  register int aindex = alpha_index[cinfo->in_color_space];
+  register int ps = rgb_pixelsize[cinfo->in_color_space];
+
+  if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  ptr = source->pub._buffer[0];
+  bufferptr = source->iobuffer;
+  for (col = cinfo->image_width; col > 0; col--) {
+    register unsigned int temp;
+    temp  = UCH(*bufferptr++) << 8;
+    temp |= UCH(*bufferptr++);
+    if (temp > maxval)
+      ERREXIT(cinfo, JERR_PPM_OUTOFRANGE);
+    ptr[rindex] = ptr[gindex] = ptr[bindex] = rescale[temp];
+    if (aindex >= 0)
+      ptr[aindex] = _MAXJSAMPLE;
+    ptr += ps;
+  }
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_word_gray_cmyk_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading raw-word-format PGM files with any maxval */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  register _JSAMPROW ptr;
+  register U_CHAR *bufferptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+
+  if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  ptr = source->pub._buffer[0];
+  bufferptr = source->iobuffer;
+  for (col = cinfo->image_width; col > 0; col--) {
+    register unsigned int gray;
+    gray  = UCH(*bufferptr++) << 8;
+    gray |= UCH(*bufferptr++);
+    if (gray > maxval)
+      ERREXIT(cinfo, JERR_PPM_OUTOFRANGE);
+    rgb_to_cmyk(rescale[gray], rescale[gray], rescale[gray], ptr, ptr + 1,
+                ptr + 2, ptr + 3);
+    ptr += 4;
+  }
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_word_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading raw-word-format PPM files with any maxval */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  register _JSAMPROW ptr;
+  register U_CHAR *bufferptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+  register int rindex = rgb_red[cinfo->in_color_space];
+  register int gindex = rgb_green[cinfo->in_color_space];
+  register int bindex = rgb_blue[cinfo->in_color_space];
+  register int aindex = alpha_index[cinfo->in_color_space];
+  register int ps = rgb_pixelsize[cinfo->in_color_space];
+
+  if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  ptr = source->pub._buffer[0];
+  bufferptr = source->iobuffer;
+  for (col = cinfo->image_width; col > 0; col--) {
+    register unsigned int temp;
+    temp  = UCH(*bufferptr++) << 8;
+    temp |= UCH(*bufferptr++);
+    if (temp > maxval)
+      ERREXIT(cinfo, JERR_PPM_OUTOFRANGE);
+    ptr[rindex] = rescale[temp];
+    temp  = UCH(*bufferptr++) << 8;
+    temp |= UCH(*bufferptr++);
+    if (temp > maxval)
+      ERREXIT(cinfo, JERR_PPM_OUTOFRANGE);
+    ptr[gindex] = rescale[temp];
+    temp  = UCH(*bufferptr++) << 8;
+    temp |= UCH(*bufferptr++);
+    if (temp > maxval)
+      ERREXIT(cinfo, JERR_PPM_OUTOFRANGE);
+    ptr[bindex] = rescale[temp];
+    if (aindex >= 0)
+      ptr[aindex] = _MAXJSAMPLE;
+    ptr += ps;
+  }
+  return 1;
+}
+
+
+METHODDEF(JDIMENSION)
+get_word_rgb_cmyk_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading raw-word-format PPM files with any maxval */
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  register _JSAMPROW ptr;
+  register U_CHAR *bufferptr;
+  register _JSAMPLE *rescale = source->rescale;
+  JDIMENSION col;
+  unsigned int maxval = source->maxval;
+
+  if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+  ptr = source->pub._buffer[0];
+  bufferptr = source->iobuffer;
+  for (col = cinfo->image_width; col > 0; col--) {
+    register unsigned int r, g, b;
+    r  = UCH(*bufferptr++) << 8;
+    r |= UCH(*bufferptr++);
+    if (r > maxval)
+      ERREXIT(cinfo, JERR_PPM_OUTOFRANGE);
+    g  = UCH(*bufferptr++) << 8;
+    g |= UCH(*bufferptr++);
+    if (g > maxval)
+      ERREXIT(cinfo, JERR_PPM_OUTOFRANGE);
+    b  = UCH(*bufferptr++) << 8;
+    b |= UCH(*bufferptr++);
+    if (b > maxval)
+      ERREXIT(cinfo, JERR_PPM_OUTOFRANGE);
+    rgb_to_cmyk(rescale[r], rescale[g], rescale[b], ptr, ptr + 1, ptr + 2,
+                ptr + 3);
+    ptr += 4;
+  }
+  return 1;
+}
+
+
+/*
+ * Read the file header; return image size and component count.
+ */
+
+METHODDEF(void)
+start_input_ppm(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  ppm_source_ptr source = (ppm_source_ptr)sinfo;
+  int c;
+  unsigned int w, h, maxval;
+  boolean need_iobuffer, use_raw_buffer, need_rescale;
+
+  if (getc(source->pub.input_file) != 'P')
+    ERREXIT(cinfo, JERR_PPM_NOT);
+
+  c = getc(source->pub.input_file); /* subformat discriminator character */
+
+  /* detect unsupported variants (ie, PBM) before trying to read header */
+  switch (c) {
+  case '2':                     /* it's a text-format PGM file */
+  case '3':                     /* it's a text-format PPM file */
+  case '5':                     /* it's a raw-format PGM file */
+  case '6':                     /* it's a raw-format PPM file */
+    break;
+  default:
+    ERREXIT(cinfo, JERR_PPM_NOT);
+    break;
+  }
+
+  /* fetch the remaining header info */
+  w = read_pbm_integer(cinfo, source->pub.input_file, 65535);
+  h = read_pbm_integer(cinfo, source->pub.input_file, 65535);
+  maxval = read_pbm_integer(cinfo, source->pub.input_file, 65535);
+
+  if (w <= 0 || h <= 0 || maxval <= 0) /* error check */
+    ERREXIT(cinfo, JERR_PPM_NOT);
+  if (sinfo->max_pixels && (unsigned long long)w * h > sinfo->max_pixels)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, sinfo->max_pixels);
+
+  cinfo->data_precision = BITS_IN_JSAMPLE; /* we always rescale data to this */
+  cinfo->image_width = (JDIMENSION)w;
+  cinfo->image_height = (JDIMENSION)h;
+  source->maxval = maxval;
+
+  /* initialize flags to most common settings */
+  need_iobuffer = TRUE;         /* do we need an I/O buffer? */
+  use_raw_buffer = FALSE;       /* do we map input buffer onto I/O buffer? */
+  need_rescale = TRUE;          /* do we need a rescale array? */
+
+  switch (c) {
+  case '2':                     /* it's a text-format PGM file */
+    if (cinfo->in_color_space == JCS_UNKNOWN ||
+        cinfo->in_color_space == JCS_RGB)
+      cinfo->in_color_space = JCS_GRAYSCALE;
+    TRACEMS2(cinfo, 1, JTRC_PGM_TEXT, w, h);
+    if (cinfo->in_color_space == JCS_GRAYSCALE)
+      source->pub.get_pixel_rows = get_text_gray_row;
+    else if (IsExtRGB(cinfo->in_color_space))
+      source->pub.get_pixel_rows = get_text_gray_rgb_row;
+    else if (cinfo->in_color_space == JCS_CMYK)
+      source->pub.get_pixel_rows = get_text_gray_cmyk_row;
+    else
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    need_iobuffer = FALSE;
+    break;
+
+  case '3':                     /* it's a text-format PPM file */
+    if (cinfo->in_color_space == JCS_UNKNOWN)
+      cinfo->in_color_space = JCS_EXT_RGB;
+    TRACEMS2(cinfo, 1, JTRC_PPM_TEXT, w, h);
+    if (IsExtRGB(cinfo->in_color_space))
+      source->pub.get_pixel_rows = get_text_rgb_row;
+    else if (cinfo->in_color_space == JCS_CMYK)
+      source->pub.get_pixel_rows = get_text_rgb_cmyk_row;
+    else
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    need_iobuffer = FALSE;
+    break;
+
+  case '5':                     /* it's a raw-format PGM file */
+    if (cinfo->in_color_space == JCS_UNKNOWN ||
+        cinfo->in_color_space == JCS_RGB)
+      cinfo->in_color_space = JCS_GRAYSCALE;
+    TRACEMS2(cinfo, 1, JTRC_PGM, w, h);
+    if (maxval > 255) {
+      if (cinfo->in_color_space == JCS_GRAYSCALE)
+        source->pub.get_pixel_rows = get_word_gray_row;
+      else if (IsExtRGB(cinfo->in_color_space))
+        source->pub.get_pixel_rows = get_word_gray_rgb_row;
+      else if (cinfo->in_color_space == JCS_CMYK)
+        source->pub.get_pixel_rows = get_word_gray_cmyk_row;
+      else
+        ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    } else if (maxval == _MAXJSAMPLE && sizeof(_JSAMPLE) == sizeof(U_CHAR) &&
+               cinfo->in_color_space == JCS_GRAYSCALE) {
+      source->pub.get_pixel_rows = get_raw_row;
+      use_raw_buffer = TRUE;
+      need_rescale = FALSE;
+    } else {
+      if (cinfo->in_color_space == JCS_GRAYSCALE)
+        source->pub.get_pixel_rows = get_scaled_gray_row;
+      else if (IsExtRGB(cinfo->in_color_space))
+        source->pub.get_pixel_rows = get_gray_rgb_row;
+      else if (cinfo->in_color_space == JCS_CMYK)
+        source->pub.get_pixel_rows = get_gray_cmyk_row;
+      else
+        ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    }
+    break;
+
+  case '6':                     /* it's a raw-format PPM file */
+    if (cinfo->in_color_space == JCS_UNKNOWN)
+      cinfo->in_color_space = JCS_EXT_RGB;
+    TRACEMS2(cinfo, 1, JTRC_PPM, w, h);
+    if (maxval > 255) {
+      if (IsExtRGB(cinfo->in_color_space))
+        source->pub.get_pixel_rows = get_word_rgb_row;
+      else if (cinfo->in_color_space == JCS_CMYK)
+        source->pub.get_pixel_rows = get_word_rgb_cmyk_row;
+      else
+        ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    } else if (maxval == _MAXJSAMPLE && sizeof(_JSAMPLE) == sizeof(U_CHAR) &&
+#if RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 3
+               (cinfo->in_color_space == JCS_EXT_RGB ||
+                cinfo->in_color_space == JCS_RGB)) {
+#else
+               cinfo->in_color_space == JCS_EXT_RGB) {
+#endif
+      source->pub.get_pixel_rows = get_raw_row;
+      use_raw_buffer = TRUE;
+      need_rescale = FALSE;
+    } else {
+      if (IsExtRGB(cinfo->in_color_space))
+        source->pub.get_pixel_rows = get_rgb_row;
+      else if (cinfo->in_color_space == JCS_CMYK)
+        source->pub.get_pixel_rows = get_rgb_cmyk_row;
+      else
+        ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    }
+    break;
+  }
+
+  if (IsExtRGB(cinfo->in_color_space))
+    cinfo->input_components = rgb_pixelsize[cinfo->in_color_space];
+  else if (cinfo->in_color_space == JCS_GRAYSCALE)
+    cinfo->input_components = 1;
+  else if (cinfo->in_color_space == JCS_CMYK)
+    cinfo->input_components = 4;
+
+  /* Allocate space for I/O buffer: 1 or 3 bytes or words/pixel. */
+  if (need_iobuffer) {
+    if (c == '6')
+      source->buffer_width = (size_t)w * 3 *
+        ((maxval <= 255) ? sizeof(U_CHAR) : (2 * sizeof(U_CHAR)));
+    else
+      source->buffer_width = (size_t)w *
+        ((maxval <= 255) ? sizeof(U_CHAR) : (2 * sizeof(U_CHAR)));
+    source->iobuffer = (U_CHAR *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  source->buffer_width);
+  }
+
+  /* Create compressor input buffer. */
+  if (use_raw_buffer) {
+    /* For unscaled raw-input case, we can just map it onto the I/O buffer. */
+    /* Synthesize a _JSAMPARRAY pointer structure */
+    source->pixrow = (_JSAMPROW)source->iobuffer;
+    source->pub._buffer = &source->pixrow;
+    source->pub.buffer_height = 1;
+  } else {
+    /* Need to translate anyway, so make a separate sample buffer. */
+    source->pub._buffer = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE,
+       (JDIMENSION)w * cinfo->input_components, (JDIMENSION)1);
+    source->pub.buffer_height = 1;
+  }
+
+  /* Compute the rescaling array if required. */
+  if (need_rescale) {
+    long val, half_maxval;
+
+    /* On 16-bit-int machines we have to be careful of maxval = 65535 */
+    source->rescale = (_JSAMPLE *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  (size_t)(((long)MAX(maxval, 255) + 1L) *
+                                           sizeof(_JSAMPLE)));
+    memset(source->rescale, 0, (size_t)(((long)MAX(maxval, 255) + 1L) *
+                                        sizeof(_JSAMPLE)));
+    half_maxval = maxval / 2;
+    for (val = 0; val <= (long)maxval; val++) {
+      /* The multiplication here must be done in 32 bits to avoid overflow */
+      source->rescale[val] = (_JSAMPLE)((val * _MAXJSAMPLE + half_maxval) /
+                                        maxval);
+    }
+  }
+}
+
+
+/*
+ * Finish up at the end of the file.
+ */
+
+METHODDEF(void)
+finish_input_ppm(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  /* no work */
+}
+
+
+/*
+ * The module selection routine for PPM format input.
+ */
+
+GLOBAL(cjpeg_source_ptr)
+_jinit_read_ppm(j_compress_ptr cinfo)
+{
+  ppm_source_ptr source;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Create module interface object */
+  source = (ppm_source_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(ppm_source_struct));
+  /* Fill in method ptrs, except get_pixel_rows which start_input sets */
+  source->pub.start_input = start_input_ppm;
+  source->pub.finish_input = finish_input_ppm;
+  source->pub.max_pixels = 0;
+
+  return (cjpeg_source_ptr)source;
+}
+
+#endif /* defined(PPM_SUPPORTED) &&
+          (BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)) */
diff --git a/3rdparty/libjpeg-turbo/src/rdswitch.c b/3rdparty/libjpeg-turbo/src/rdswitch.c
new file mode 100644
index 0000000000..33449c86ba
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/rdswitch.c
@@ -0,0 +1,428 @@
+/*
+ * rdswitch.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2018, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains routines to process some of cjpeg's more complicated
+ * command-line switches.  Switches processed here are:
+ *      -qtables file           Read quantization tables from text file
+ *      -scans file             Read scan script from text file
+ *      -quality N[,N,...]      Set quality ratings
+ *      -qslots N[,N,...]       Set component quantization table selectors
+ *      -sample HxV[,HxV,...]   Set component sampling factors
+ */
+
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include <ctype.h>              /* to declare isdigit(), isspace() */
+
+
+LOCAL(int)
+text_getc(FILE *file)
+/* Read next char, skipping over any comments (# to end of line) */
+/* A comment/newline sequence is returned as a newline */
+{
+  register int ch;
+
+  ch = getc(file);
+  if (ch == '#') {
+    do {
+      ch = getc(file);
+    } while (ch != '\n' && ch != EOF);
+  }
+  return ch;
+}
+
+
+LOCAL(boolean)
+read_text_integer(FILE *file, long *result, int *termchar)
+/* Read an unsigned decimal integer from a file, store it in result */
+/* Reads one trailing character after the integer; returns it in termchar */
+{
+  register int ch;
+  register long val;
+
+  /* Skip any leading whitespace, detect EOF */
+  do {
+    ch = text_getc(file);
+    if (ch == EOF) {
+      *termchar = ch;
+      return FALSE;
+    }
+  } while (isspace(ch));
+
+  if (!isdigit(ch)) {
+    *termchar = ch;
+    return FALSE;
+  }
+
+  val = ch - '0';
+  while ((ch = text_getc(file)) != EOF) {
+    if (!isdigit(ch))
+      break;
+    val *= 10;
+    val += ch - '0';
+  }
+  *result = val;
+  *termchar = ch;
+  return TRUE;
+}
+
+
+#if JPEG_LIB_VERSION < 70
+static int q_scale_factor[NUM_QUANT_TBLS] = { 100, 100, 100, 100 };
+#endif
+
+GLOBAL(boolean)
+read_quant_tables(j_compress_ptr cinfo, char *filename, boolean force_baseline)
+/* Read a set of quantization tables from the specified file.
+ * The file is plain ASCII text: decimal numbers with whitespace between.
+ * Comments preceded by '#' may be included in the file.
+ * There may be one to NUM_QUANT_TBLS tables in the file, each of 64 values.
+ * The tables are implicitly numbered 0,1,etc.
+ * NOTE: does not affect the qslots mapping, which will default to selecting
+ * table 0 for luminance (or primary) components, 1 for chrominance components.
+ * You must use -qslots if you want a different component->table mapping.
+ */
+{
+  FILE *fp;
+  int tblno, i, termchar;
+  long val;
+  unsigned int table[DCTSIZE2];
+
+  if ((fp = fopen(filename, "r")) == NULL) {
+    fprintf(stderr, "Can't open table file %s\n", filename);
+    return FALSE;
+  }
+  tblno = 0;
+
+  while (read_text_integer(fp, &val, &termchar)) { /* read 1st element of table */
+    if (tblno >= NUM_QUANT_TBLS) {
+      fprintf(stderr, "Too many tables in file %s\n", filename);
+      fclose(fp);
+      return FALSE;
+    }
+    table[0] = (unsigned int)val;
+    for (i = 1; i < DCTSIZE2; i++) {
+      if (!read_text_integer(fp, &val, &termchar)) {
+        fprintf(stderr, "Invalid table data in file %s\n", filename);
+        fclose(fp);
+        return FALSE;
+      }
+      table[i] = (unsigned int)val;
+    }
+#if JPEG_LIB_VERSION >= 70
+    jpeg_add_quant_table(cinfo, tblno, table, cinfo->q_scale_factor[tblno],
+                         force_baseline);
+#else
+    jpeg_add_quant_table(cinfo, tblno, table, q_scale_factor[tblno],
+                         force_baseline);
+#endif
+    tblno++;
+  }
+
+  if (termchar != EOF) {
+    fprintf(stderr, "Non-numeric data in file %s\n", filename);
+    fclose(fp);
+    return FALSE;
+  }
+
+  fclose(fp);
+  return TRUE;
+}
+
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+
+LOCAL(boolean)
+read_scan_integer(FILE *file, long *result, int *termchar)
+/* Variant of read_text_integer that always looks for a non-space termchar;
+ * this simplifies parsing of punctuation in scan scripts.
+ */
+{
+  register int ch;
+
+  if (!read_text_integer(file, result, termchar))
+    return FALSE;
+  ch = *termchar;
+  while (ch != EOF && isspace(ch))
+    ch = text_getc(file);
+  if (isdigit(ch)) {            /* oops, put it back */
+    if (ungetc(ch, file) == EOF)
+      return FALSE;
+    ch = ' ';
+  } else {
+    /* Any separators other than ';' and ':' are ignored;
+     * this allows user to insert commas, etc, if desired.
+     */
+    if (ch != EOF && ch != ';' && ch != ':')
+      ch = ' ';
+  }
+  *termchar = ch;
+  return TRUE;
+}
+
+
+GLOBAL(boolean)
+read_scan_script(j_compress_ptr cinfo, char *filename)
+/* Read a scan script from the specified text file.
+ * Each entry in the file defines one scan to be emitted.
+ * Entries are separated by semicolons ';'.
+ * An entry contains one to four component indexes,
+ * optionally followed by a colon ':' and four progressive-JPEG parameters.
+ * The component indexes denote which component(s) are to be transmitted
+ * in the current scan.  The first component has index 0.
+ * Sequential JPEG is used if the progressive-JPEG parameters are omitted.
+ * The file is free format text: any whitespace may appear between numbers
+ * and the ':' and ';' punctuation marks.  Also, other punctuation (such
+ * as commas or dashes) can be placed between numbers if desired.
+ * Comments preceded by '#' may be included in the file.
+ * Note: we do very little validity checking here;
+ * jcmaster.c will validate the script parameters.
+ */
+{
+  FILE *fp;
+  int scanno, ncomps, termchar;
+  long val;
+  jpeg_scan_info *scanptr;
+#define MAX_SCANS  100          /* quite arbitrary limit */
+  jpeg_scan_info scans[MAX_SCANS];
+
+  if ((fp = fopen(filename, "r")) == NULL) {
+    fprintf(stderr, "Can't open scan definition file %s\n", filename);
+    return FALSE;
+  }
+  scanptr = scans;
+  scanno = 0;
+
+  while (read_scan_integer(fp, &val, &termchar)) {
+    if (scanno >= MAX_SCANS) {
+      fprintf(stderr, "Too many scans defined in file %s\n", filename);
+      fclose(fp);
+      return FALSE;
+    }
+    scanptr->component_index[0] = (int)val;
+    ncomps = 1;
+    while (termchar == ' ') {
+      if (ncomps >= MAX_COMPS_IN_SCAN) {
+        fprintf(stderr, "Too many components in one scan in file %s\n",
+                filename);
+        fclose(fp);
+        return FALSE;
+      }
+      if (!read_scan_integer(fp, &val, &termchar))
+        goto bogus;
+      scanptr->component_index[ncomps] = (int)val;
+      ncomps++;
+    }
+    scanptr->comps_in_scan = ncomps;
+    if (termchar == ':') {
+      if (!read_scan_integer(fp, &val, &termchar) || termchar != ' ')
+        goto bogus;
+      scanptr->Ss = (int)val;
+      if (!read_scan_integer(fp, &val, &termchar) || termchar != ' ')
+        goto bogus;
+      scanptr->Se = (int)val;
+      if (!read_scan_integer(fp, &val, &termchar) || termchar != ' ')
+        goto bogus;
+      scanptr->Ah = (int)val;
+      if (!read_scan_integer(fp, &val, &termchar))
+        goto bogus;
+      scanptr->Al = (int)val;
+    } else {
+      /* set non-progressive parameters */
+      scanptr->Ss = 0;
+      scanptr->Se = DCTSIZE2 - 1;
+      scanptr->Ah = 0;
+      scanptr->Al = 0;
+    }
+    if (termchar != ';' && termchar != EOF) {
+bogus:
+      fprintf(stderr, "Invalid scan entry format in file %s\n", filename);
+      fclose(fp);
+      return FALSE;
+    }
+    scanptr++, scanno++;
+  }
+
+  if (termchar != EOF) {
+    fprintf(stderr, "Non-numeric data in file %s\n", filename);
+    fclose(fp);
+    return FALSE;
+  }
+
+  if (scanno > 0) {
+    /* Stash completed scan list in cinfo structure.
+     * NOTE: for cjpeg's use, JPOOL_IMAGE is the right lifetime for this data,
+     * but if you want to compress multiple images you'd want JPOOL_PERMANENT.
+     */
+    scanptr = (jpeg_scan_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  scanno * sizeof(jpeg_scan_info));
+    memcpy(scanptr, scans, scanno * sizeof(jpeg_scan_info));
+    cinfo->scan_info = scanptr;
+    cinfo->num_scans = scanno;
+  }
+
+  fclose(fp);
+  return TRUE;
+}
+
+#endif /* C_MULTISCAN_FILES_SUPPORTED */
+
+
+#if JPEG_LIB_VERSION < 70
+/* These are the sample quantization tables given in Annex K (Clause K.1) of
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
+ * The spec says that the values given produce "good" quality, and
+ * when divided by 2, "very good" quality.
+ */
+static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
+  16,  11,  10,  16,  24,  40,  51,  61,
+  12,  12,  14,  19,  26,  58,  60,  55,
+  14,  13,  16,  24,  40,  57,  69,  56,
+  14,  17,  22,  29,  51,  87,  80,  62,
+  18,  22,  37,  56,  68, 109, 103,  77,
+  24,  35,  55,  64,  81, 104, 113,  92,
+  49,  64,  78,  87, 103, 121, 120, 101,
+  72,  92,  95,  98, 112, 100, 103,  99
+};
+static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
+  17,  18,  24,  47,  99,  99,  99,  99,
+  18,  21,  26,  66,  99,  99,  99,  99,
+  24,  26,  56,  99,  99,  99,  99,  99,
+  47,  66,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99
+};
+
+
+LOCAL(void)
+jpeg_default_qtables(j_compress_ptr cinfo, boolean force_baseline)
+{
+  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, q_scale_factor[0],
+                       force_baseline);
+  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, q_scale_factor[1],
+                       force_baseline);
+}
+#endif
+
+
+GLOBAL(boolean)
+set_quality_ratings(j_compress_ptr cinfo, char *arg, boolean force_baseline)
+/* Process a quality-ratings parameter string, of the form
+ *     N[,N,...]
+ * If there are more q-table slots than parameters, the last value is replicated.
+ */
+{
+  int val = 75;                 /* default value */
+  int tblno;
+  char ch;
+
+  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+    if (*arg) {
+      ch = ',';                 /* if not set by sscanf, will be ',' */
+      if (sscanf(arg, "%d%c", &val, &ch) < 1)
+        return FALSE;
+      if (ch != ',')            /* syntax check */
+        return FALSE;
+      /* Convert user 0-100 rating to percentage scaling */
+#if JPEG_LIB_VERSION >= 70
+      cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val);
+#else
+      q_scale_factor[tblno] = jpeg_quality_scaling(val);
+#endif
+      while (*arg && *arg++ != ','); /* advance to next segment of arg
+                                        string */
+    } else {
+      /* reached end of parameter, set remaining factors to last value */
+#if JPEG_LIB_VERSION >= 70
+      cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val);
+#else
+      q_scale_factor[tblno] = jpeg_quality_scaling(val);
+#endif
+    }
+  }
+  jpeg_default_qtables(cinfo, force_baseline);
+  return TRUE;
+}
+
+
+GLOBAL(boolean)
+set_quant_slots(j_compress_ptr cinfo, char *arg)
+/* Process a quantization-table-selectors parameter string, of the form
+ *     N[,N,...]
+ * If there are more components than parameters, the last value is replicated.
+ */
+{
+  int val = 0;                  /* default table # */
+  int ci;
+  char ch;
+
+  for (ci = 0; ci < MAX_COMPONENTS; ci++) {
+    if (*arg) {
+      ch = ',';                 /* if not set by sscanf, will be ',' */
+      if (sscanf(arg, "%d%c", &val, &ch) < 1)
+        return FALSE;
+      if (ch != ',')            /* syntax check */
+        return FALSE;
+      if (val < 0 || val >= NUM_QUANT_TBLS) {
+        fprintf(stderr, "JPEG quantization tables are numbered 0..%d\n",
+                NUM_QUANT_TBLS - 1);
+        return FALSE;
+      }
+      cinfo->comp_info[ci].quant_tbl_no = val;
+      while (*arg && *arg++ != ','); /* advance to next segment of arg
+                                        string */
+    } else {
+      /* reached end of parameter, set remaining components to last table */
+      cinfo->comp_info[ci].quant_tbl_no = val;
+    }
+  }
+  return TRUE;
+}
+
+
+GLOBAL(boolean)
+set_sample_factors(j_compress_ptr cinfo, char *arg)
+/* Process a sample-factors parameter string, of the form
+ *     HxV[,HxV,...]
+ * If there are more components than parameters, "1x1" is assumed for the rest.
+ */
+{
+  int ci, val1, val2;
+  char ch1, ch2;
+
+  for (ci = 0; ci < MAX_COMPONENTS; ci++) {
+    if (*arg) {
+      ch2 = ',';                /* if not set by sscanf, will be ',' */
+      if (sscanf(arg, "%d%c%d%c", &val1, &ch1, &val2, &ch2) < 3)
+        return FALSE;
+      if ((ch1 != 'x' && ch1 != 'X') || ch2 != ',') /* syntax check */
+        return FALSE;
+      if (val1 <= 0 || val1 > 4 || val2 <= 0 || val2 > 4) {
+        fprintf(stderr, "JPEG sampling factors must be 1..4\n");
+        return FALSE;
+      }
+      cinfo->comp_info[ci].h_samp_factor = val1;
+      cinfo->comp_info[ci].v_samp_factor = val2;
+      while (*arg && *arg++ != ',');  /* advance to next segment of arg
+                                         string */
+    } else {
+      /* reached end of parameter, set remaining components to 1x1 sampling */
+      cinfo->comp_info[ci].h_samp_factor = 1;
+      cinfo->comp_info[ci].v_samp_factor = 1;
+    }
+  }
+  return TRUE;
+}
diff --git a/3rdparty/libjpeg-turbo/src/rdtarga.c b/3rdparty/libjpeg-turbo/src/rdtarga.c
new file mode 100644
index 0000000000..b78a16539e
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/rdtarga.c
@@ -0,0 +1,507 @@
+/*
+ * rdtarga.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2017 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2018, 2021-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains routines to read input images in Targa format.
+ *
+ * These routines may need modification for non-Unix environments or
+ * specialized applications.  As they stand, they assume input from
+ * an ordinary stdio stream.  They further assume that reading begins
+ * at the start of the file; start_input may need work if the
+ * user interface has already read some data (e.g., to determine that
+ * the file is indeed Targa format).
+ *
+ * Based on code contributed by Lee Daniel Crocker.
+ */
+
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+
+#ifdef TARGA_SUPPORTED
+
+
+/* Macros to deal with unsigned chars as efficiently as compiler allows */
+
+typedef unsigned char U_CHAR;
+#define UCH(x)  ((int)(x))
+
+
+#define ReadOK(file, buffer, len) \
+  (fread(buffer, 1, len, file) == ((size_t)(len)))
+
+
+/* Private version of data source object */
+
+typedef struct _tga_source_struct *tga_source_ptr;
+
+typedef struct _tga_source_struct {
+  struct cjpeg_source_struct pub; /* public fields */
+
+  j_compress_ptr cinfo;         /* back link saves passing separate parm */
+
+  JSAMPARRAY colormap;          /* Targa colormap (converted to my format) */
+
+  jvirt_sarray_ptr whole_image; /* Needed if funny input row order */
+  JDIMENSION current_row;       /* Current logical row number to read */
+
+  /* Pointer to routine to extract next Targa pixel from input file */
+  void (*read_pixel) (tga_source_ptr sinfo);
+
+  /* Result of read_pixel is delivered here: */
+  U_CHAR tga_pixel[4];
+
+  int pixel_size;               /* Bytes per Targa pixel (1 to 4) */
+  int cmap_length;              /* colormap length */
+
+  /* State info for reading RLE-coded pixels; both counts must be init to 0 */
+  int block_count;              /* # of pixels remaining in RLE block */
+  int dup_pixel_count;          /* # of times to duplicate previous pixel */
+
+  /* This saves the correct pixel-row-expansion method for preload_image */
+  JDIMENSION (*get_pixel_rows) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo);
+} tga_source_struct;
+
+
+/* For expanding 5-bit pixel values to 8-bit with best rounding */
+
+static const UINT8 c5to8bits[32] = {
+    0,   8,  16,  25,  33,  41,  49,  58,
+   66,  74,  82,  90,  99, 107, 115, 123,
+  132, 140, 148, 156, 165, 173, 181, 189,
+  197, 206, 214, 222, 230, 239, 247, 255
+};
+
+
+
+LOCAL(int)
+read_byte(tga_source_ptr sinfo)
+/* Read next byte from Targa file */
+{
+  register FILE *infile = sinfo->pub.input_file;
+  register int c;
+
+  if ((c = getc(infile)) == EOF)
+    ERREXIT(sinfo->cinfo, JERR_INPUT_EOF);
+  return c;
+}
+
+
+LOCAL(void)
+read_colormap(tga_source_ptr sinfo, int cmaplen, int mapentrysize)
+/* Read the colormap from a Targa file */
+{
+  int i;
+
+  /* Presently only handles 24-bit BGR format */
+  if (mapentrysize != 24)
+    ERREXIT(sinfo->cinfo, JERR_TGA_BADCMAP);
+
+  for (i = 0; i < cmaplen; i++) {
+    sinfo->colormap[2][i] = (JSAMPLE)read_byte(sinfo);
+    sinfo->colormap[1][i] = (JSAMPLE)read_byte(sinfo);
+    sinfo->colormap[0][i] = (JSAMPLE)read_byte(sinfo);
+  }
+}
+
+
+/*
+ * read_pixel methods: get a single pixel from Targa file into tga_pixel[]
+ */
+
+METHODDEF(void)
+read_non_rle_pixel(tga_source_ptr sinfo)
+/* Read one Targa pixel from the input file; no RLE expansion */
+{
+  register int i;
+
+  for (i = 0; i < sinfo->pixel_size; i++) {
+    sinfo->tga_pixel[i] = (U_CHAR)read_byte(sinfo);
+  }
+}
+
+
+METHODDEF(void)
+read_rle_pixel(tga_source_ptr sinfo)
+/* Read one Targa pixel from the input file, expanding RLE data as needed */
+{
+  register int i;
+
+  /* Duplicate previously read pixel? */
+  if (sinfo->dup_pixel_count > 0) {
+    sinfo->dup_pixel_count--;
+    return;
+  }
+
+  /* Time to read RLE block header? */
+  if (--sinfo->block_count < 0) { /* decrement pixels remaining in block */
+    i = read_byte(sinfo);
+    if (i & 0x80) {             /* Start of duplicate-pixel block? */
+      sinfo->dup_pixel_count = i & 0x7F; /* number of dups after this one */
+      sinfo->block_count = 0;   /* then read new block header */
+    } else {
+      sinfo->block_count = i & 0x7F; /* number of pixels after this one */
+    }
+  }
+
+  /* Read next pixel */
+  for (i = 0; i < sinfo->pixel_size; i++) {
+    sinfo->tga_pixel[i] = (U_CHAR)read_byte(sinfo);
+  }
+}
+
+
+/*
+ * Read one row of pixels.
+ *
+ * We provide several different versions depending on input file format.
+ */
+
+
+METHODDEF(JDIMENSION)
+get_8bit_gray_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading 8-bit grayscale pixels */
+{
+  tga_source_ptr source = (tga_source_ptr)sinfo;
+  register JSAMPROW ptr;
+  register JDIMENSION col;
+
+  ptr = source->pub.buffer[0];
+  for (col = cinfo->image_width; col > 0; col--) {
+    (*source->read_pixel) (source); /* Load next pixel into tga_pixel */
+    *ptr++ = (JSAMPLE)UCH(source->tga_pixel[0]);
+  }
+  return 1;
+}
+
+METHODDEF(JDIMENSION)
+get_8bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading 8-bit colormap indexes */
+{
+  tga_source_ptr source = (tga_source_ptr)sinfo;
+  register int t;
+  register JSAMPROW ptr;
+  register JDIMENSION col;
+  register JSAMPARRAY colormap = source->colormap;
+  int cmaplen = source->cmap_length;
+
+  ptr = source->pub.buffer[0];
+  for (col = cinfo->image_width; col > 0; col--) {
+    (*source->read_pixel) (source); /* Load next pixel into tga_pixel */
+    t = UCH(source->tga_pixel[0]);
+    if (t >= cmaplen)
+      ERREXIT(cinfo, JERR_TGA_BADPARMS);
+    *ptr++ = colormap[0][t];
+    *ptr++ = colormap[1][t];
+    *ptr++ = colormap[2][t];
+  }
+  return 1;
+}
+
+METHODDEF(JDIMENSION)
+get_16bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading 16-bit pixels */
+{
+  tga_source_ptr source = (tga_source_ptr)sinfo;
+  register int t;
+  register JSAMPROW ptr;
+  register JDIMENSION col;
+
+  ptr = source->pub.buffer[0];
+  for (col = cinfo->image_width; col > 0; col--) {
+    (*source->read_pixel) (source); /* Load next pixel into tga_pixel */
+    t = UCH(source->tga_pixel[0]);
+    t += UCH(source->tga_pixel[1]) << 8;
+    /* We expand 5 bit data to 8 bit sample width.
+     * The format of the 16-bit (LSB first) input word is
+     *     xRRRRRGGGGGBBBBB
+     */
+    ptr[2] = (JSAMPLE)c5to8bits[t & 0x1F];
+    t >>= 5;
+    ptr[1] = (JSAMPLE)c5to8bits[t & 0x1F];
+    t >>= 5;
+    ptr[0] = (JSAMPLE)c5to8bits[t & 0x1F];
+    ptr += 3;
+  }
+  return 1;
+}
+
+METHODDEF(JDIMENSION)
+get_24bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading 24-bit pixels */
+{
+  tga_source_ptr source = (tga_source_ptr)sinfo;
+  register JSAMPROW ptr;
+  register JDIMENSION col;
+
+  ptr = source->pub.buffer[0];
+  for (col = cinfo->image_width; col > 0; col--) {
+    (*source->read_pixel) (source); /* Load next pixel into tga_pixel */
+    *ptr++ = (JSAMPLE)UCH(source->tga_pixel[2]); /* change BGR to RGB order */
+    *ptr++ = (JSAMPLE)UCH(source->tga_pixel[1]);
+    *ptr++ = (JSAMPLE)UCH(source->tga_pixel[0]);
+  }
+  return 1;
+}
+
+/*
+ * Targa also defines a 32-bit pixel format with order B,G,R,A.
+ * We presently ignore the attribute byte, so the code for reading
+ * these pixels is identical to the 24-bit routine above.
+ * This works because the actual pixel length is only known to read_pixel.
+ */
+
+#define get_32bit_row  get_24bit_row
+
+
+/*
+ * This method is for re-reading the input data in standard top-down
+ * row order.  The entire image has already been read into whole_image
+ * with proper conversion of pixel format, but it's in a funny row order.
+ */
+
+METHODDEF(JDIMENSION)
+get_memory_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  tga_source_ptr source = (tga_source_ptr)sinfo;
+  JDIMENSION source_row;
+
+  /* Compute row of source that maps to current_row of normal order */
+  /* For now, assume image is bottom-up and not interlaced. */
+  /* NEEDS WORK to support interlaced images! */
+  source_row = cinfo->image_height - source->current_row - 1;
+
+  /* Fetch that row from virtual array */
+  source->pub.buffer = (*cinfo->mem->access_virt_sarray)
+    ((j_common_ptr)cinfo, source->whole_image,
+     source_row, (JDIMENSION)1, FALSE);
+
+  source->current_row++;
+  return 1;
+}
+
+
+/*
+ * This method loads the image into whole_image during the first call on
+ * get_pixel_rows.  The get_pixel_rows pointer is then adjusted to call
+ * get_memory_row on subsequent calls.
+ */
+
+METHODDEF(JDIMENSION)
+preload_image(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  tga_source_ptr source = (tga_source_ptr)sinfo;
+  JDIMENSION row;
+  cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress;
+
+  /* Read the data into a virtual array in input-file row order. */
+  for (row = 0; row < cinfo->image_height; row++) {
+    if (progress != NULL) {
+      progress->pub.pass_counter = (long)row;
+      progress->pub.pass_limit = (long)cinfo->image_height;
+      (*progress->pub.progress_monitor) ((j_common_ptr)cinfo);
+    }
+    source->pub.buffer = (*cinfo->mem->access_virt_sarray)
+      ((j_common_ptr)cinfo, source->whole_image, row, (JDIMENSION)1, TRUE);
+    (*source->get_pixel_rows) (cinfo, sinfo);
+  }
+  if (progress != NULL)
+    progress->completed_extra_passes++;
+
+  /* Set up to read from the virtual array in unscrambled order */
+  source->pub.get_pixel_rows = get_memory_row;
+  source->current_row = 0;
+  /* And read the first row */
+  return get_memory_row(cinfo, sinfo);
+}
+
+
+/*
+ * Read the file header; return image size and component count.
+ */
+
+METHODDEF(void)
+start_input_tga(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  tga_source_ptr source = (tga_source_ptr)sinfo;
+  U_CHAR targaheader[18];
+  int idlen, cmaptype, subtype, flags, interlace_type, components;
+  unsigned int width, height, maplen;
+  boolean is_bottom_up;
+
+#define GET_2B(offset) \
+  ((unsigned int)UCH(targaheader[offset]) + \
+   (((unsigned int)UCH(targaheader[offset + 1])) << 8))
+
+  if (!ReadOK(source->pub.input_file, targaheader, 18))
+    ERREXIT(cinfo, JERR_INPUT_EOF);
+
+  /* Pretend "15-bit" pixels are 16-bit --- we ignore attribute bit anyway */
+  if (targaheader[16] == 15)
+    targaheader[16] = 16;
+
+  idlen = UCH(targaheader[0]);
+  cmaptype = UCH(targaheader[1]);
+  subtype = UCH(targaheader[2]);
+  maplen = GET_2B(5);
+  width = GET_2B(12);
+  height = GET_2B(14);
+  source->pixel_size = UCH(targaheader[16]) >> 3;
+  flags = UCH(targaheader[17]); /* Image Descriptor byte */
+
+  is_bottom_up = ((flags & 0x20) == 0); /* bit 5 set => top-down */
+  interlace_type = flags >> 6;  /* bits 6/7 are interlace code */
+
+  if (cmaptype > 1 ||           /* cmaptype must be 0 or 1 */
+      source->pixel_size < 1 || source->pixel_size > 4 ||
+      (UCH(targaheader[16]) & 7) != 0 || /* bits/pixel must be multiple of 8 */
+      interlace_type != 0 ||      /* currently don't allow interlaced image */
+      width == 0 || height == 0)  /* image width/height must be non-zero */
+    ERREXIT(cinfo, JERR_TGA_BADPARMS);
+  if (sinfo->max_pixels &&
+      (unsigned long long)width * height > sinfo->max_pixels)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, sinfo->max_pixels);
+
+  if (subtype > 8) {
+    /* It's an RLE-coded file */
+    source->read_pixel = read_rle_pixel;
+    source->block_count = source->dup_pixel_count = 0;
+    subtype -= 8;
+  } else {
+    /* Non-RLE file */
+    source->read_pixel = read_non_rle_pixel;
+  }
+
+  /* Now should have subtype 1, 2, or 3 */
+  components = 3;               /* until proven different */
+  cinfo->in_color_space = JCS_RGB;
+
+  switch (subtype) {
+  case 1:                       /* Colormapped image */
+    if (source->pixel_size == 1 && cmaptype == 1)
+      source->get_pixel_rows = get_8bit_row;
+    else
+      ERREXIT(cinfo, JERR_TGA_BADPARMS);
+    TRACEMS2(cinfo, 1, JTRC_TGA_MAPPED, width, height);
+    break;
+  case 2:                       /* RGB image */
+    switch (source->pixel_size) {
+    case 2:
+      source->get_pixel_rows = get_16bit_row;
+      break;
+    case 3:
+      source->get_pixel_rows = get_24bit_row;
+      break;
+    case 4:
+      source->get_pixel_rows = get_32bit_row;
+      break;
+    default:
+      ERREXIT(cinfo, JERR_TGA_BADPARMS);
+      break;
+    }
+    TRACEMS2(cinfo, 1, JTRC_TGA, width, height);
+    break;
+  case 3:                       /* Grayscale image */
+    components = 1;
+    cinfo->in_color_space = JCS_GRAYSCALE;
+    if (source->pixel_size == 1)
+      source->get_pixel_rows = get_8bit_gray_row;
+    else
+      ERREXIT(cinfo, JERR_TGA_BADPARMS);
+    TRACEMS2(cinfo, 1, JTRC_TGA_GRAY, width, height);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_TGA_BADPARMS);
+    break;
+  }
+
+  if (is_bottom_up) {
+    /* Create a virtual array to buffer the upside-down image. */
+    source->whole_image = (*cinfo->mem->request_virt_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE,
+       (JDIMENSION)width * components, (JDIMENSION)height, (JDIMENSION)1);
+    if (cinfo->progress != NULL) {
+      cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress;
+      progress->total_extra_passes++; /* count file input as separate pass */
+    }
+    /* source->pub.buffer will point to the virtual array. */
+    source->pub.buffer_height = 1; /* in case anyone looks at it */
+    source->pub.get_pixel_rows = preload_image;
+  } else {
+    /* Don't need a virtual array, but do need a one-row input buffer. */
+    source->whole_image = NULL;
+    source->pub.buffer = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE,
+       (JDIMENSION)width * components, (JDIMENSION)1);
+    source->pub.buffer_height = 1;
+    source->pub.get_pixel_rows = source->get_pixel_rows;
+  }
+
+  while (idlen--)               /* Throw away ID field */
+    (void)read_byte(source);
+
+  if (maplen > 0) {
+    if (maplen > 256 || GET_2B(3) != 0)
+      ERREXIT(cinfo, JERR_TGA_BADCMAP);
+    /* Allocate space to store the colormap */
+    source->colormap = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE, (JDIMENSION)maplen, (JDIMENSION)3);
+    source->cmap_length = (int)maplen;
+    /* and read it from the file */
+    read_colormap(source, (int)maplen, UCH(targaheader[7]));
+  } else {
+    if (cmaptype)               /* but you promised a cmap! */
+      ERREXIT(cinfo, JERR_TGA_BADPARMS);
+    source->colormap = NULL;
+    source->cmap_length = 0;
+  }
+
+  cinfo->input_components = components;
+  cinfo->data_precision = 8;
+  cinfo->image_width = width;
+  cinfo->image_height = height;
+}
+
+
+/*
+ * Finish up at the end of the file.
+ */
+
+METHODDEF(void)
+finish_input_tga(j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+{
+  /* no work */
+}
+
+
+/*
+ * The module selection routine for Targa format input.
+ */
+
+GLOBAL(cjpeg_source_ptr)
+jinit_read_targa(j_compress_ptr cinfo)
+{
+  tga_source_ptr source;
+
+  if (cinfo->data_precision != 8)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Create module interface object */
+  source = (tga_source_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(tga_source_struct));
+  source->cinfo = cinfo;        /* make back link for subroutines */
+  /* Fill in method ptrs, except get_pixel_rows which start_input sets */
+  source->pub.start_input = start_input_tga;
+  source->pub.finish_input = finish_input_tga;
+  source->pub.max_pixels = 0;
+
+  return (cjpeg_source_ptr)source;
+}
+
+#endif /* TARGA_SUPPORTED */
diff --git a/3rdparty/libjpeg-turbo/src/simd/CMakeLists.txt b/3rdparty/libjpeg-turbo/src/simd/CMakeLists.txt
index 5055302004..023795598a 100644
--- a/3rdparty/libjpeg-turbo/src/simd/CMakeLists.txt
+++ b/3rdparty/libjpeg-turbo/src/simd/CMakeLists.txt
@@ -1,13 +1,9 @@
 macro(simd_fail message)
-    message(STATUS "libjpeg-turbo(SIMD): ${message}.  Performance will suffer.")
-    set(WITH_SIMD 0 PARENT_SCOPE)
-endmacro()
-
-macro(boolean_number var)
-  if(${var})
-    set(${var} 1 ${ARGN})
+  if(REQUIRE_SIMD)
+    message(FATAL_ERROR "${message}.")
   else()
-    set(${var} 0 ${ARGN})
+    message(WARNING "${message}.  Performance will suffer.")
+    set(WITH_SIMD 0 PARENT_SCOPE)
   endif()
 endmacro()
 
@@ -45,14 +41,14 @@ elseif(CPU_TYPE STREQUAL "i386")
   endif()
 endif()
 
-
-include(CheckLanguage)
-check_language(ASM_NASM)
-if(NOT CMAKE_ASM_NASM_COMPILER)
-  simd_fail("SIMD extensions disabled: could not find NASM compiler")
-  return()
+if(NOT REQUIRE_SIMD)
+  include(CheckLanguage)
+  check_language(ASM_NASM)
+  if(NOT CMAKE_ASM_NASM_COMPILER)
+    simd_fail("SIMD extensions disabled: could not find NASM compiler")
+    return()
+  endif()
 endif()
-
 enable_language(ASM_NASM)
 message(STATUS "CMAKE_ASM_NASM_COMPILER = ${CMAKE_ASM_NASM_COMPILER}")
 
@@ -75,6 +71,8 @@ elseif(CPU_TYPE STREQUAL "i386")
   endif()
 endif()
 
+message(STATUS "CMAKE_ASM_NASM_OBJECT_FORMAT = ${CMAKE_ASM_NASM_OBJECT_FORMAT}")
+
 if(NOT CMAKE_ASM_NASM_OBJECT_FORMAT)
   simd_fail("SIMD extensions disabled: could not determine NASM object format")
   return()
@@ -98,8 +96,21 @@ if(NOT WIN32 AND (CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED))
   set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DPIC")
 endif()
 
+if(CPU_TYPE STREQUAL "x86_64" AND CMAKE_ASM_NASM_OBJECT_FORMAT MATCHES "^elf")
+  check_c_source_compiles("
+    #if (__CET__ & 3) == 0
+    #error \"CET not enabled\"
+    #endif
+    int main(void) { return 0; }" HAVE_CET)
+
+  if(HAVE_CET)
+    set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -D__CET__")
+  endif()
+endif()
+
 string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
 set(EFFECTIVE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} ${CMAKE_ASM_NASM_FLAGS_${CMAKE_BUILD_TYPE_UC}}")
+message(STATUS "CMAKE_ASM_NASM_FLAGS = ${EFFECTIVE_ASM_NASM_FLAGS}")
 
 set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -I\"${CMAKE_CURRENT_SOURCE_DIR}/nasm/\" -I\"${CMAKE_CURRENT_SOURCE_DIR}/${CPU_TYPE}/\"")
 
@@ -113,7 +124,6 @@ add_custom_target(jsimdcfg COMMAND
     ${CMAKE_CURRENT_SOURCE_DIR}/nasm/jsimdcfg.inc.h |
   ${GREP} -E '^[\;%]|^\ %' | sed 's%_cpp_protection_%%' |
   sed 's@% define@%define@g' >${CMAKE_CURRENT_SOURCE_DIR}/nasm/jsimdcfg.inc)
-set_target_properties(jsimdcfg PROPERTIES FOLDER "3rdparty")
 
 if(CPU_TYPE STREQUAL "x86_64")
   set(SIMD_SOURCES x86_64/jsimdcpu.asm x86_64/jfdctflt-sse.asm
@@ -198,16 +208,14 @@ endforeach()
 
 if(MSVC_IDE OR XCODE)
   set(SIMD_OBJS ${SIMD_OBJS} PARENT_SCOPE)
-  add_library(jsimd OBJECT ${CPU_TYPE}/jsimd.c)
-  add_custom_target(jsimd-objs DEPENDS ${SIMD_OBJS})
-  add_dependencies(jsimd jsimd-objs)
-  set_target_properties(jsimd PROPERTIES FOLDER "3rdparty")
-  set_target_properties(jsimd-objs PROPERTIES FOLDER "3rdparty")
+  add_library(simd OBJECT ${CPU_TYPE}/jsimd.c)
+  add_custom_target(simd-objs DEPENDS ${SIMD_OBJS})
+  add_dependencies(simd simd-objs)
 else()
-  add_library(jsimd OBJECT ${SIMD_SOURCES} ${CPU_TYPE}/jsimd.c)
+  add_library(simd OBJECT ${SIMD_SOURCES} ${CPU_TYPE}/jsimd.c)
 endif()
 if(NOT WIN32 AND (CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED))
-  set_target_properties(jsimd PROPERTIES POSITION_INDEPENDENT_CODE 1)
+  set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1)
 endif()
 
 
@@ -228,7 +236,6 @@ elseif(CPU_TYPE STREQUAL "arm64" OR CPU_TYPE STREQUAL "arm")
 # following test determines whether -mfloat-abi=softfp should be explicitly
 # added to the compile flags for the intrinsics implementation of the Neon SIMD
 # extensions.
-
 if(BITS EQUAL 32)
   check_c_source_compiles("
     #if defined(__ARM_NEON__) || (!defined(__linux__) && !defined(ANDROID) && !defined(__ANDROID__))
@@ -361,7 +368,7 @@ if(NOT NEON_INTRINSICS)
       -x assembler-with-cpp -c ${CMAKE_CURRENT_BINARY_DIR}/gastest.S
     RESULT_VARIABLE RESULT OUTPUT_VARIABLE OUTPUT ERROR_VARIABLE ERROR)
   if(NOT RESULT EQUAL 0)
-    message(STATUS "libjpeg-turbo(SIMD): GAS appears to be broken.  Using the full Neon SIMD intrinsics implementation.")
+    message(WARNING "GAS appears to be broken.  Using the full Neon SIMD intrinsics implementation.")
     set(NEON_INTRINSICS 1 CACHE INTERNAL "" FORCE)
   endif()
 endif()
@@ -397,10 +404,10 @@ if(NOT NEON_INTRINSICS)
   set(SIMD_SOURCES ${SIMD_SOURCES} arm/aarch${BITS}/jsimd_neon.S)
 endif()
 
-add_library(jsimd OBJECT ${SIMD_SOURCES} arm/aarch${BITS}/jsimd.c)
+add_library(simd OBJECT ${SIMD_SOURCES} arm/aarch${BITS}/jsimd.c)
 
 if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED)
-  set_target_properties(jsimd PROPERTIES POSITION_INDEPENDENT_CODE 1)
+  set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1)
 endif()
 
 
@@ -439,10 +446,10 @@ if(NOT HAVE_DSPR2)
   return()
 endif()
 
-add_library(jsimd OBJECT mips/jsimd_dspr2.S mips/jsimd.c)
+add_library(simd OBJECT mips/jsimd_dspr2.S mips/jsimd.c)
 
 if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED)
-  set_target_properties(jsimd PROPERTIES POSITION_INDEPENDENT_CODE 1)
+  set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1)
 endif()
 
 ###############################################################################
@@ -454,6 +461,9 @@ elseif(CPU_TYPE STREQUAL "loongson" OR CPU_TYPE MATCHES "^mips64")
 set(CMAKE_REQUIRED_FLAGS -Wa,-mloongson-mmi,-mloongson-ext)
 
 check_c_source_compiles("
+  #if !(defined(__mips__) && __mips_isa_rev < 6)
+  #error Loongson MMI can't work with MIPS Release 6+
+  #endif
   int main(void) {
     int c = 0, a = 0, b = 0;
     asm (
@@ -487,10 +497,10 @@ foreach(file ${SIMD_SOURCES})
     " -Wa,-mloongson-mmi,-mloongson-ext")
 endforeach()
 
-add_library(jsimd OBJECT ${SIMD_SOURCES} mips64/jsimd.c)
+add_library(simd OBJECT ${SIMD_SOURCES} mips64/jsimd.c)
 
 if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED)
-  set_target_properties(jsimd PROPERTIES POSITION_INDEPENDENT_CODE 1)
+  set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1)
 endif()
 
 ###############################################################################
@@ -527,10 +537,10 @@ set(SIMD_SOURCES powerpc/jccolor-altivec.c powerpc/jcgray-altivec.c
 set_source_files_properties(${SIMD_SOURCES} PROPERTIES
   COMPILE_FLAGS -maltivec)
 
-add_library(jsimd OBJECT ${SIMD_SOURCES} powerpc/jsimd.c)
+add_library(simd OBJECT ${SIMD_SOURCES} powerpc/jsimd.c)
 
 if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED)
-  set_target_properties(jsimd PROPERTIES POSITION_INDEPENDENT_CODE 1)
+  set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1)
 endif()
 
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jsimd.c b/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jsimd.c
index e3adf23d50..04d64526fb 100644
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jsimd.c
@@ -4,7 +4,7 @@
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
  * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2019, Google LLC.
  * Copyright (C) 2020, Arm Limited.
  *
@@ -25,12 +25,10 @@
 #include "../../../jsimddct.h"
 #include "../../jsimd.h"
 
-#include <stdio.h>
-#include <string.h>
 #include <ctype.h>
 
-static unsigned int simd_support = ~0;
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = ~0;
+static THREAD_LOCAL unsigned int simd_huffman = 1;
 
 #if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
 
@@ -98,8 +96,6 @@ parse_proc_cpuinfo(int bufsize)
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 LOCAL(void)
 init_simd(void)
@@ -947,7 +943,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
   jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
                                          Sl, Al, values, zerobits);
@@ -972,7 +968,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return jsimd_encode_mcu_AC_refine_prepare_neon(block,
                                                  jpeg_natural_order_start, Sl,
diff --git a/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jsimd.c b/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jsimd.c
index 604d5472f6..358e1597b1 100644
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jsimd.c
@@ -4,7 +4,7 @@
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
  * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2020, Arm Limited.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
@@ -23,20 +23,17 @@
 #include "../../../jdct.h"
 #include "../../../jsimddct.h"
 #include "../../jsimd.h"
-#include "jconfigint.h"
 
-#include <stdio.h>
-#include <string.h>
 #include <ctype.h>
 
 #define JSIMD_FASTLD3  1
 #define JSIMD_FASTST3  2
 #define JSIMD_FASTTBL  4
 
-static unsigned int simd_support = ~0;
-static unsigned int simd_huffman = 1;
-static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 |
-                                    JSIMD_FASTTBL;
+static THREAD_LOCAL unsigned int simd_support = ~0;
+static THREAD_LOCAL unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_features = JSIMD_FASTLD3 |
+                                                 JSIMD_FASTST3 | JSIMD_FASTTBL;
 
 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
 
@@ -111,8 +108,6 @@ parse_proc_cpuinfo(int bufsize)
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 
 /*
@@ -1023,7 +1018,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
   jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
                                          Sl, Al, values, zerobits);
@@ -1050,7 +1045,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return jsimd_encode_mcu_AC_refine_prepare_neon(block,
                                                  jpeg_natural_order_start,
diff --git a/3rdparty/libjpeg-turbo/src/simd/arm/jcphuff-neon.c b/3rdparty/libjpeg-turbo/src/simd/arm/jcphuff-neon.c
index b91c5db478..51db3c5f39 100644
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jcphuff-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jcphuff-neon.c
@@ -2,6 +2,8 @@
  * jcphuff-neon.c - prepare data for progressive Huffman encoding (Arm Neon)
  *
  * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2022, Matthieu Darbois.  All Rights Reserved.
+ * Copyright (C) 2022, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -21,7 +23,6 @@
  */
 
 #define JPEG_INTERNALS
-#include "jconfigint.h"
 #include "../../jinclude.h"
 #include "../../jpeglib.h"
 #include "../../jsimd.h"
@@ -41,10 +42,10 @@
 
 void jsimd_encode_mcu_AC_first_prepare_neon
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits)
+   UJCOEF *values, size_t *zerobits)
 {
-  JCOEF *values_ptr = values;
-  JCOEF *diff_values_ptr = values + DCTSIZE2;
+  UJCOEF *values_ptr = values;
+  UJCOEF *diff_values_ptr = values + DCTSIZE2;
 
   /* Rows of coefficients to zero (since they haven't been processed) */
   int i, rows_to_zero = 8;
@@ -68,23 +69,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
     coefs2 = vld1q_lane_s16(block + jpeg_natural_order_start[15], coefs2, 7);
 
     /* Isolate sign of coefficients. */
-    int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15);
-    int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15);
+    uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15));
+    uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15));
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs1 = vabsq_s16(coefs1);
-    int16x8_t abs_coefs2 = vabsq_s16(coefs2);
-    coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
-    coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
+    uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+    uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+    abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+    abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
 
     /* Compute diff values. */
-    int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1);
-    int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2);
+    uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1);
+    uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2);
 
     /* Store transformed coefficients and diff values. */
-    vst1q_s16(values_ptr, coefs1);
-    vst1q_s16(values_ptr + DCTSIZE, coefs2);
-    vst1q_s16(diff_values_ptr, diff1);
-    vst1q_s16(diff_values_ptr + DCTSIZE, diff2);
+    vst1q_u16(values_ptr, abs_coefs1);
+    vst1q_u16(values_ptr + DCTSIZE, abs_coefs2);
+    vst1q_u16(diff_values_ptr, diff1);
+    vst1q_u16(diff_values_ptr + DCTSIZE, diff2);
     values_ptr += 16;
     diff_values_ptr += 16;
     jpeg_natural_order_start += 16;
@@ -130,23 +131,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
     }
 
     /* Isolate sign of coefficients. */
-    int16x8_t sign_coefs1 = vshrq_n_s16(coefs1, 15);
-    int16x8_t sign_coefs2 = vshrq_n_s16(coefs2, 15);
+    uint16x8_t sign_coefs1 = vreinterpretq_u16_s16(vshrq_n_s16(coefs1, 15));
+    uint16x8_t sign_coefs2 = vreinterpretq_u16_s16(vshrq_n_s16(coefs2, 15));
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs1 = vabsq_s16(coefs1);
-    int16x8_t abs_coefs2 = vabsq_s16(coefs2);
-    coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
-    coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
+    uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+    uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+    abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+    abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
 
     /* Compute diff values. */
-    int16x8_t diff1 = veorq_s16(coefs1, sign_coefs1);
-    int16x8_t diff2 = veorq_s16(coefs2, sign_coefs2);
+    uint16x8_t diff1 = veorq_u16(abs_coefs1, sign_coefs1);
+    uint16x8_t diff2 = veorq_u16(abs_coefs2, sign_coefs2);
 
     /* Store transformed coefficients and diff values. */
-    vst1q_s16(values_ptr, coefs1);
-    vst1q_s16(values_ptr + DCTSIZE, coefs2);
-    vst1q_s16(diff_values_ptr, diff1);
-    vst1q_s16(diff_values_ptr + DCTSIZE, diff2);
+    vst1q_u16(values_ptr, abs_coefs1);
+    vst1q_u16(values_ptr + DCTSIZE, abs_coefs2);
+    vst1q_u16(diff_values_ptr, diff1);
+    vst1q_u16(diff_values_ptr + DCTSIZE, diff2);
     values_ptr += 16;
     diff_values_ptr += 16;
     rows_to_zero -= 2;
@@ -184,17 +185,17 @@ void jsimd_encode_mcu_AC_first_prepare_neon
     }
 
     /* Isolate sign of coefficients. */
-    int16x8_t sign_coefs = vshrq_n_s16(coefs, 15);
+    uint16x8_t sign_coefs = vreinterpretq_u16_s16(vshrq_n_s16(coefs, 15));
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs = vabsq_s16(coefs);
-    coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al));
+    uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs));
+    abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al));
 
     /* Compute diff values. */
-    int16x8_t diff = veorq_s16(coefs, sign_coefs);
+    uint16x8_t diff = veorq_u16(abs_coefs, sign_coefs);
 
     /* Store transformed coefficients and diff values. */
-    vst1q_s16(values_ptr, coefs);
-    vst1q_s16(diff_values_ptr, diff);
+    vst1q_u16(values_ptr, abs_coefs);
+    vst1q_u16(diff_values_ptr, diff);
     values_ptr += 8;
     diff_values_ptr += 8;
     rows_to_zero--;
@@ -202,8 +203,8 @@ void jsimd_encode_mcu_AC_first_prepare_neon
 
   /* Zero remaining memory in the values and diff_values blocks. */
   for (i = 0; i < rows_to_zero; i++) {
-    vst1q_s16(values_ptr, vdupq_n_s16(0));
-    vst1q_s16(diff_values_ptr, vdupq_n_s16(0));
+    vst1q_u16(values_ptr, vdupq_n_u16(0));
+    vst1q_u16(diff_values_ptr, vdupq_n_u16(0));
     values_ptr += 8;
     diff_values_ptr += 8;
   }
@@ -211,23 +212,23 @@ void jsimd_encode_mcu_AC_first_prepare_neon
   /* Construct zerobits bitmap.  A set bit means that the corresponding
    * coefficient != 0.
    */
-  int16x8_t row0 = vld1q_s16(values + 0 * DCTSIZE);
-  int16x8_t row1 = vld1q_s16(values + 1 * DCTSIZE);
-  int16x8_t row2 = vld1q_s16(values + 2 * DCTSIZE);
-  int16x8_t row3 = vld1q_s16(values + 3 * DCTSIZE);
-  int16x8_t row4 = vld1q_s16(values + 4 * DCTSIZE);
-  int16x8_t row5 = vld1q_s16(values + 5 * DCTSIZE);
-  int16x8_t row6 = vld1q_s16(values + 6 * DCTSIZE);
-  int16x8_t row7 = vld1q_s16(values + 7 * DCTSIZE);
+  uint16x8_t row0 = vld1q_u16(values + 0 * DCTSIZE);
+  uint16x8_t row1 = vld1q_u16(values + 1 * DCTSIZE);
+  uint16x8_t row2 = vld1q_u16(values + 2 * DCTSIZE);
+  uint16x8_t row3 = vld1q_u16(values + 3 * DCTSIZE);
+  uint16x8_t row4 = vld1q_u16(values + 4 * DCTSIZE);
+  uint16x8_t row5 = vld1q_u16(values + 5 * DCTSIZE);
+  uint16x8_t row6 = vld1q_u16(values + 6 * DCTSIZE);
+  uint16x8_t row7 = vld1q_u16(values + 7 * DCTSIZE);
 
-  uint8x8_t row0_eq0 = vmovn_u16(vceqq_s16(row0, vdupq_n_s16(0)));
-  uint8x8_t row1_eq0 = vmovn_u16(vceqq_s16(row1, vdupq_n_s16(0)));
-  uint8x8_t row2_eq0 = vmovn_u16(vceqq_s16(row2, vdupq_n_s16(0)));
-  uint8x8_t row3_eq0 = vmovn_u16(vceqq_s16(row3, vdupq_n_s16(0)));
-  uint8x8_t row4_eq0 = vmovn_u16(vceqq_s16(row4, vdupq_n_s16(0)));
-  uint8x8_t row5_eq0 = vmovn_u16(vceqq_s16(row5, vdupq_n_s16(0)));
-  uint8x8_t row6_eq0 = vmovn_u16(vceqq_s16(row6, vdupq_n_s16(0)));
-  uint8x8_t row7_eq0 = vmovn_u16(vceqq_s16(row7, vdupq_n_s16(0)));
+  uint8x8_t row0_eq0 = vmovn_u16(vceqq_u16(row0, vdupq_n_u16(0)));
+  uint8x8_t row1_eq0 = vmovn_u16(vceqq_u16(row1, vdupq_n_u16(0)));
+  uint8x8_t row2_eq0 = vmovn_u16(vceqq_u16(row2, vdupq_n_u16(0)));
+  uint8x8_t row3_eq0 = vmovn_u16(vceqq_u16(row3, vdupq_n_u16(0)));
+  uint8x8_t row4_eq0 = vmovn_u16(vceqq_u16(row4, vdupq_n_u16(0)));
+  uint8x8_t row5_eq0 = vmovn_u16(vceqq_u16(row5, vdupq_n_u16(0)));
+  uint8x8_t row6_eq0 = vmovn_u16(vceqq_u16(row6, vdupq_n_u16(0)));
+  uint8x8_t row7_eq0 = vmovn_u16(vceqq_u16(row7, vdupq_n_u16(0)));
 
   /* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */
   const uint8x8_t bitmap_mask =
@@ -274,7 +275,7 @@ void jsimd_encode_mcu_AC_first_prepare_neon
 
 int jsimd_encode_mcu_AC_refine_prepare_neon
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits)
+   UJCOEF *absvalues, size_t *bits)
 {
   /* Temporary storage buffers for data used to compute the signbits bitmap and
    * the end-of-block (EOB) position
@@ -282,7 +283,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
   uint8_t coef_sign_bits[64];
   uint8_t coef_eq1_bits[64];
 
-  JCOEF *absvalues_ptr = absvalues;
+  UJCOEF *absvalues_ptr = absvalues;
   uint8_t *coef_sign_bits_ptr = coef_sign_bits;
   uint8_t *eq1_bits_ptr = coef_eq1_bits;
 
@@ -316,18 +317,18 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
     vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2);
 
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs1 = vabsq_s16(coefs1);
-    int16x8_t abs_coefs2 = vabsq_s16(coefs2);
-    coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
-    coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
-    vst1q_s16(absvalues_ptr, coefs1);
-    vst1q_s16(absvalues_ptr + DCTSIZE, coefs2);
+    uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+    uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+    abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+    abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
+    vst1q_u16(absvalues_ptr, abs_coefs1);
+    vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2);
 
     /* Test whether transformed coefficient values == 1 (used to find EOB
      * position.)
      */
-    uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1)));
-    uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1)));
+    uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1)));
+    uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1)));
     vst1_u8(eq1_bits_ptr, coefs_eq11);
     vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12);
 
@@ -385,18 +386,18 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
     vst1_u8(coef_sign_bits_ptr + DCTSIZE, sign_coefs2);
 
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs1 = vabsq_s16(coefs1);
-    int16x8_t abs_coefs2 = vabsq_s16(coefs2);
-    coefs1 = vshlq_s16(abs_coefs1, vdupq_n_s16(-Al));
-    coefs2 = vshlq_s16(abs_coefs2, vdupq_n_s16(-Al));
-    vst1q_s16(absvalues_ptr, coefs1);
-    vst1q_s16(absvalues_ptr + DCTSIZE, coefs2);
+    uint16x8_t abs_coefs1 = vreinterpretq_u16_s16(vabsq_s16(coefs1));
+    uint16x8_t abs_coefs2 = vreinterpretq_u16_s16(vabsq_s16(coefs2));
+    abs_coefs1 = vshlq_u16(abs_coefs1, vdupq_n_s16(-Al));
+    abs_coefs2 = vshlq_u16(abs_coefs2, vdupq_n_s16(-Al));
+    vst1q_u16(absvalues_ptr, abs_coefs1);
+    vst1q_u16(absvalues_ptr + DCTSIZE, abs_coefs2);
 
     /* Test whether transformed coefficient values == 1 (used to find EOB
      * position.)
      */
-    uint8x8_t coefs_eq11 = vmovn_u16(vceqq_s16(coefs1, vdupq_n_s16(1)));
-    uint8x8_t coefs_eq12 = vmovn_u16(vceqq_s16(coefs2, vdupq_n_s16(1)));
+    uint8x8_t coefs_eq11 = vmovn_u16(vceqq_u16(abs_coefs1, vdupq_n_u16(1)));
+    uint8x8_t coefs_eq12 = vmovn_u16(vceqq_u16(abs_coefs2, vdupq_n_u16(1)));
     vst1_u8(eq1_bits_ptr, coefs_eq11);
     vst1_u8(eq1_bits_ptr + DCTSIZE, coefs_eq12);
 
@@ -444,14 +445,14 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
     vst1_u8(coef_sign_bits_ptr, sign_coefs);
 
     /* Compute absolute value of coefficients and apply point transform Al. */
-    int16x8_t abs_coefs = vabsq_s16(coefs);
-    coefs = vshlq_s16(abs_coefs, vdupq_n_s16(-Al));
-    vst1q_s16(absvalues_ptr, coefs);
+    uint16x8_t abs_coefs = vreinterpretq_u16_s16(vabsq_s16(coefs));
+    abs_coefs = vshlq_u16(abs_coefs, vdupq_n_s16(-Al));
+    vst1q_u16(absvalues_ptr, abs_coefs);
 
     /* Test whether transformed coefficient values == 1 (used to find EOB
      * position.)
      */
-    uint8x8_t coefs_eq1 = vmovn_u16(vceqq_s16(coefs, vdupq_n_s16(1)));
+    uint8x8_t coefs_eq1 = vmovn_u16(vceqq_u16(abs_coefs, vdupq_n_u16(1)));
     vst1_u8(eq1_bits_ptr, coefs_eq1);
 
     absvalues_ptr += 8;
@@ -462,7 +463,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
 
   /* Zero remaining memory in blocks. */
   for (i = 0; i < rows_to_zero; i++) {
-    vst1q_s16(absvalues_ptr, vdupq_n_s16(0));
+    vst1q_u16(absvalues_ptr, vdupq_n_u16(0));
     vst1_u8(coef_sign_bits_ptr, vdup_n_u8(0));
     vst1_u8(eq1_bits_ptr, vdup_n_u8(0));
     absvalues_ptr += 8;
@@ -471,23 +472,23 @@ int jsimd_encode_mcu_AC_refine_prepare_neon
   }
 
   /* Construct zerobits bitmap. */
-  int16x8_t abs_row0 = vld1q_s16(absvalues + 0 * DCTSIZE);
-  int16x8_t abs_row1 = vld1q_s16(absvalues + 1 * DCTSIZE);
-  int16x8_t abs_row2 = vld1q_s16(absvalues + 2 * DCTSIZE);
-  int16x8_t abs_row3 = vld1q_s16(absvalues + 3 * DCTSIZE);
-  int16x8_t abs_row4 = vld1q_s16(absvalues + 4 * DCTSIZE);
-  int16x8_t abs_row5 = vld1q_s16(absvalues + 5 * DCTSIZE);
-  int16x8_t abs_row6 = vld1q_s16(absvalues + 6 * DCTSIZE);
-  int16x8_t abs_row7 = vld1q_s16(absvalues + 7 * DCTSIZE);
+  uint16x8_t abs_row0 = vld1q_u16(absvalues + 0 * DCTSIZE);
+  uint16x8_t abs_row1 = vld1q_u16(absvalues + 1 * DCTSIZE);
+  uint16x8_t abs_row2 = vld1q_u16(absvalues + 2 * DCTSIZE);
+  uint16x8_t abs_row3 = vld1q_u16(absvalues + 3 * DCTSIZE);
+  uint16x8_t abs_row4 = vld1q_u16(absvalues + 4 * DCTSIZE);
+  uint16x8_t abs_row5 = vld1q_u16(absvalues + 5 * DCTSIZE);
+  uint16x8_t abs_row6 = vld1q_u16(absvalues + 6 * DCTSIZE);
+  uint16x8_t abs_row7 = vld1q_u16(absvalues + 7 * DCTSIZE);
 
-  uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_s16(abs_row0, vdupq_n_s16(0)));
-  uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_s16(abs_row1, vdupq_n_s16(0)));
-  uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_s16(abs_row2, vdupq_n_s16(0)));
-  uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_s16(abs_row3, vdupq_n_s16(0)));
-  uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_s16(abs_row4, vdupq_n_s16(0)));
-  uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_s16(abs_row5, vdupq_n_s16(0)));
-  uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_s16(abs_row6, vdupq_n_s16(0)));
-  uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_s16(abs_row7, vdupq_n_s16(0)));
+  uint8x8_t abs_row0_eq0 = vmovn_u16(vceqq_u16(abs_row0, vdupq_n_u16(0)));
+  uint8x8_t abs_row1_eq0 = vmovn_u16(vceqq_u16(abs_row1, vdupq_n_u16(0)));
+  uint8x8_t abs_row2_eq0 = vmovn_u16(vceqq_u16(abs_row2, vdupq_n_u16(0)));
+  uint8x8_t abs_row3_eq0 = vmovn_u16(vceqq_u16(abs_row3, vdupq_n_u16(0)));
+  uint8x8_t abs_row4_eq0 = vmovn_u16(vceqq_u16(abs_row4, vdupq_n_u16(0)));
+  uint8x8_t abs_row5_eq0 = vmovn_u16(vceqq_u16(abs_row5, vdupq_n_u16(0)));
+  uint8x8_t abs_row6_eq0 = vmovn_u16(vceqq_u16(abs_row6, vdupq_n_u16(0)));
+  uint8x8_t abs_row7_eq0 = vmovn_u16(vceqq_u16(abs_row7, vdupq_n_u16(0)));
 
   /* { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 } */
   const uint8x8_t bitmap_mask =
diff --git a/3rdparty/libjpeg-turbo/src/simd/arm/jdcolor-neon.c b/3rdparty/libjpeg-turbo/src/simd/arm/jdcolor-neon.c
index ea4668f1d3..28dbc57243 100644
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jdcolor-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jdcolor-neon.c
@@ -21,7 +21,6 @@
  */
 
 #define JPEG_INTERNALS
-#include "jconfigint.h"
 #include "../../jinclude.h"
 #include "../../jpeglib.h"
 #include "../../jsimd.h"
diff --git a/3rdparty/libjpeg-turbo/src/simd/arm/jdmerge-neon.c b/3rdparty/libjpeg-turbo/src/simd/arm/jdmerge-neon.c
index e4f91fdc0e..18fb9d8a55 100644
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jdmerge-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jdmerge-neon.c
@@ -21,7 +21,6 @@
  */
 
 #define JPEG_INTERNALS
-#include "jconfigint.h"
 #include "../../jinclude.h"
 #include "../../jpeglib.h"
 #include "../../jsimd.h"
diff --git a/3rdparty/libjpeg-turbo/src/simd/arm/jidctint-neon.c b/3rdparty/libjpeg-turbo/src/simd/arm/jidctint-neon.c
index 043b652e6c..d25112ef7f 100644
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jidctint-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jidctint-neon.c
@@ -22,7 +22,6 @@
  */
 
 #define JPEG_INTERNALS
-#include "jconfigint.h"
 #include "../../jinclude.h"
 #include "../../jpeglib.h"
 #include "../../jsimd.h"
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-avx2.asm
index c46d684436..af6418f0a6 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-avx2.asm
@@ -2,7 +2,7 @@
 ; jccolext.asm - colorspace conversion (AVX2)
 ;
 ; Copyright (C) 2015, Intel Corporation.
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,15 +49,15 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [img_width(eax)]
     test        ecx, ecx
@@ -80,9 +80,9 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
-    pushpic     eax
+    PUSHPIC     eax
     push        edx
     push        ebx
     push        edi
@@ -93,11 +93,11 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
     mov         edi, JSAMPROW [edi]     ; outptr0
     mov         ebx, JSAMPROW [ebx]     ; outptr1
     mov         edx, JSAMPROW [edx]     ; outptr2
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     cmp         ecx, byte SIZEOF_YMMWORD
     jae         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 %if RGB_PIXELSIZE == 3  ; ---------------
 
@@ -154,7 +154,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
     vmovdqu     ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
     jmp         short .rgb_ycc_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -278,7 +278,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
     vmovdqu     ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
     jmp         short .rgb_ycc_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -552,7 +552,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
     pop         edi
     pop         ebx
     pop         edx
-    poppic      eax
+    POPPIC      eax
 
     add         esi, byte SIZEOF_JSAMPROW  ; input_buf
     add         edi, byte SIZEOF_JSAMPROW
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-mmx.asm
index 6357a42b2c..dbec80e787 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-mmx.asm
@@ -2,7 +2,7 @@
 ; jccolext.asm - colorspace conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,15 +49,15 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [img_width(eax)]  ; num_cols
     test        ecx, ecx
@@ -80,9 +80,9 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
-    pushpic     eax
+    PUSHPIC     eax
     push        edx
     push        ebx
     push        edi
@@ -93,11 +93,11 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
     mov         edi, JSAMPROW [edi]     ; outptr0
     mov         ebx, JSAMPROW [ebx]     ; outptr1
     mov         edx, JSAMPROW [edx]     ; outptr2
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     cmp         ecx, byte SIZEOF_MMWORD
     jae         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 %if RGB_PIXELSIZE == 3  ; ---------------
 
@@ -143,7 +143,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
     movq        mmG, MMWORD [esi+1*SIZEOF_MMWORD]
     jmp         short .rgb_ycc_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -211,7 +211,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
     movq        mmF, MMWORD [esi+1*SIZEOF_MMWORD]
     jmp         short .rgb_ycc_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -449,7 +449,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
     pop         edi
     pop         ebx
     pop         edx
-    poppic      eax
+    POPPIC      eax
 
     add         esi, byte SIZEOF_JSAMPROW  ; input_buf
     add         edi, byte SIZEOF_JSAMPROW
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-sse2.asm
index c6c80852ac..8d41145178 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-sse2.asm
@@ -1,7 +1,7 @@
 ;
 ; jccolext.asm - colorspace conversion (SSE2)
 ;
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -48,15 +48,15 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [img_width(eax)]
     test        ecx, ecx
@@ -79,9 +79,9 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
-    pushpic     eax
+    PUSHPIC     eax
     push        edx
     push        ebx
     push        edi
@@ -92,11 +92,11 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
     mov         edi, JSAMPROW [edi]     ; outptr0
     mov         ebx, JSAMPROW [ebx]     ; outptr1
     mov         edx, JSAMPROW [edx]     ; outptr2
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     cmp         ecx, byte SIZEOF_XMMWORD
     jae         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 %if RGB_PIXELSIZE == 3  ; ---------------
 
@@ -147,7 +147,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
     movdqu      xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
     jmp         short .rgb_ycc_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -232,7 +232,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
     movdqu      xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
     jmp         short .rgb_ycc_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -478,7 +478,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
     pop         edi
     pop         ebx
     pop         edx
-    poppic      eax
+    POPPIC      eax
 
     add         esi, byte SIZEOF_JSAMPROW  ; input_buf
     add         edi, byte SIZEOF_JSAMPROW
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-avx2.asm
index 14944e952f..3d6dfa6f8c 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-avx2.asm
@@ -1,7 +1,7 @@
 ;
 ; jccolor.asm - colorspace conversion (AVX2)
 ;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -33,7 +33,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_ycc_convert_avx2)
 
 EXTN(jconst_rgb_ycc_convert_avx2):
@@ -46,7 +46,7 @@ PD_ONEHALFM1_CJ times 8 dd  (1 << (SCALEBITS - 1)) - 1 + \
                             (CENTERJSAMPLE << SCALEBITS)
 PD_ONEHALF      times 8 dd  (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-mmx.asm
index 8cb399bdc4..0527488500 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-mmx.asm
@@ -2,7 +2,7 @@
 ; jccolor.asm - colorspace conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -33,7 +33,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_ycc_convert_mmx)
 
 EXTN(jconst_rgb_ycc_convert_mmx):
@@ -46,7 +46,7 @@ PD_ONEHALFM1_CJ times 2 dd  (1 << (SCALEBITS - 1)) - 1 + \
                             (CENTERJSAMPLE << SCALEBITS)
 PD_ONEHALF      times 2 dd  (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-sse2.asm
index 686d222ff7..ff6a2ecd13 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-sse2.asm
@@ -1,7 +1,7 @@
 ;
 ; jccolor.asm - colorspace conversion (SSE2)
 ;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -32,7 +32,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_ycc_convert_sse2)
 
 EXTN(jconst_rgb_ycc_convert_sse2):
@@ -45,7 +45,7 @@ PD_ONEHALFM1_CJ times 4 dd  (1 << (SCALEBITS - 1)) - 1 + \
                             (CENTERJSAMPLE << SCALEBITS)
 PD_ONEHALF      times 4 dd  (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-avx2.asm
index 560ee0c71e..564974f849 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-avx2.asm
@@ -1,7 +1,7 @@
 ;
 ; jcgray.asm - grayscale colorspace conversion (AVX2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -29,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_gray_convert_avx2)
 
 EXTN(jconst_rgb_gray_convert_avx2):
@@ -38,7 +38,7 @@ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
 PW_F0114_F0250 times 8 dw F_0_114, F_0_250
 PD_ONEHALF     times 8 dd (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-mmx.asm
index 79fdf082a8..e791ea4aa6 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-mmx.asm
@@ -2,7 +2,7 @@
 ; jcgray.asm - grayscale colorspace conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -29,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_gray_convert_mmx)
 
 EXTN(jconst_rgb_gray_convert_mmx):
@@ -38,7 +38,7 @@ PW_F0299_F0337 times 2 dw F_0_299, F_0_337
 PW_F0114_F0250 times 2 dw F_0_114, F_0_250
 PD_ONEHALF     times 2 dd (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-sse2.asm
index cb4b28e8f4..70c0177db3 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-sse2.asm
@@ -1,7 +1,7 @@
 ;
 ; jcgray.asm - grayscale colorspace conversion (SSE2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -28,7 +28,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_gray_convert_sse2)
 
 EXTN(jconst_rgb_gray_convert_sse2):
@@ -37,7 +37,7 @@ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
 PW_F0114_F0250 times 4 dw F_0_114, F_0_250
 PD_ONEHALF     times 4 dd (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-avx2.asm
index 3fa7973d72..0fb284aaf9 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-avx2.asm
@@ -1,7 +1,7 @@
 ;
 ; jcgryext.asm - grayscale colorspace conversion (AVX2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -49,15 +49,15 @@ EXTN(jsimd_rgb_gray_convert_avx2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [img_width(eax)]
     test        ecx, ecx
@@ -76,20 +76,20 @@ EXTN(jsimd_rgb_gray_convert_avx2):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
-    pushpic     eax
+    PUSHPIC     eax
     push        edi
     push        esi
     push        ecx                     ; col
 
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr0
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     cmp         ecx, byte SIZEOF_YMMWORD
     jae         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 %if RGB_PIXELSIZE == 3  ; ---------------
 
@@ -146,7 +146,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
     vmovdqu     ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
     jmp         short .rgb_gray_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -270,7 +270,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
     vmovdqu     ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
     jmp         short .rgb_gray_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -433,7 +433,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
     pop         ecx                     ; col
     pop         esi
     pop         edi
-    poppic      eax
+    POPPIC      eax
 
     add         esi, byte SIZEOF_JSAMPROW  ; input_buf
     add         edi, byte SIZEOF_JSAMPROW
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-mmx.asm
index 8af42e5a33..1c69d38291 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-mmx.asm
@@ -2,7 +2,7 @@
 ; jcgryext.asm - grayscale colorspace conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,15 +49,15 @@ EXTN(jsimd_rgb_gray_convert_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [img_width(eax)]  ; num_cols
     test        ecx, ecx
@@ -76,20 +76,20 @@ EXTN(jsimd_rgb_gray_convert_mmx):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
-    pushpic     eax
+    PUSHPIC     eax
     push        edi
     push        esi
     push        ecx                     ; col
 
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr0
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     cmp         ecx, byte SIZEOF_MMWORD
     jae         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 %if RGB_PIXELSIZE == 3  ; ---------------
 
@@ -135,7 +135,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
     movq        mmG, MMWORD [esi+1*SIZEOF_MMWORD]
     jmp         short .rgb_gray_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -203,7 +203,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
     movq        mmF, MMWORD [esi+1*SIZEOF_MMWORD]
     jmp         short .rgb_gray_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movq        mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -330,7 +330,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
     pop         ecx                     ; col
     pop         esi
     pop         edi
-    poppic      eax
+    POPPIC      eax
 
     add         esi, byte SIZEOF_JSAMPROW  ; input_buf
     add         edi, byte SIZEOF_JSAMPROW
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-sse2.asm
index c9d6ff1e35..f710816a44 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-sse2.asm
@@ -1,7 +1,7 @@
 ;
 ; jcgryext.asm - grayscale colorspace conversion (SSE2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -48,15 +48,15 @@ EXTN(jsimd_rgb_gray_convert_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [img_width(eax)]
     test        ecx, ecx
@@ -75,20 +75,20 @@ EXTN(jsimd_rgb_gray_convert_sse2):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
-    pushpic     eax
+    PUSHPIC     eax
     push        edi
     push        esi
     push        ecx                     ; col
 
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr0
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     cmp         ecx, byte SIZEOF_XMMWORD
     jae         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 %if RGB_PIXELSIZE == 3  ; ---------------
 
@@ -139,7 +139,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
     movdqu      xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
     jmp         short .rgb_gray_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -224,7 +224,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
     movdqu      xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
     jmp         short .rgb_gray_cnv
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqu      xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -359,7 +359,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
     pop         ecx                     ; col
     pop         esi
     pop         edi
-    poppic      eax
+    POPPIC      eax
 
     add         esi, byte SIZEOF_JSAMPROW  ; input_buf
     add         edi, byte SIZEOF_JSAMPROW
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jchuff-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jchuff-sse2.asm
index 278cf5e83a..4adf5eb514 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jchuff-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jchuff-sse2.asm
@@ -1,7 +1,7 @@
 ;
 ; jchuff-sse2.asm - Huffman entropy encoding (SSE2)
 ;
-; Copyright (C) 2009-2011, 2014-2017, 2019, D. R. Commander.
+; Copyright (C) 2009-2011, 2014-2017, 2019, 2024, D. R. Commander.
 ; Copyright (C) 2015, Matthieu Darbois.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
@@ -42,7 +42,7 @@ endstruc
 
 EXTN(jconst_huff_encode_one_block):
 
-    alignz      32
+    ALIGNZ      32
 
 jpeg_mask_bits dq 0x0000, 0x0001, 0x0003, 0x0007
                dq 0x000f, 0x001f, 0x003f, 0x007f
@@ -65,7 +65,8 @@ times 1 <<  2 db  3
 times 1 <<  1 db  2
 times 1 <<  0 db  1
 times 1       db  0
-jpeg_nbits_table:
+GLOBAL_DATA(jpeg_nbits_table)
+EXTN(jpeg_nbits_table):
 times 1       db  0
 times 1 <<  0 db  1
 times 1 <<  1 db  2
@@ -83,14 +84,14 @@ times 1 << 12 db 13
 times 1 << 13 db 14
 times 1 << 14 db 15
 
-    alignz      32
+    ALIGNZ      32
 
 %ifdef PIC
 %define NBITS(x)      nbits_base + x
 %else
-%define NBITS(x)      jpeg_nbits_table + x
+%define NBITS(x)      EXTN(jpeg_nbits_table) + x
 %endif
-%define MASK_BITS(x)  NBITS((x) * 8) + (jpeg_mask_bits - jpeg_nbits_table)
+%define MASK_BITS(x)  NBITS((x) * 8) + (jpeg_mask_bits - EXTN(jpeg_nbits_table))
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -235,7 +236,7 @@ times 1 << 14 db 15
 
 ; If PIC is defined, load the address of a symbol defined in this file into a
 ; register.  Equivalent to
-;   get_GOT     %1
+;   GET_GOT     %1
 ;   lea         %1, [GOTOFF(%1, %2)]
 ; without using the GOT.
 ;
@@ -469,7 +470,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
     pcmpeqw     mm_all_0xff, mm_all_0xff                  ;Z:     all_0xff[i] = 0xFF;
 %endmacro
 
-    GET_SYM     nbits_base, jpeg_nbits_table, GET_SYM_BEFORE, GET_SYM_AFTER
+    GET_SYM     nbits_base, EXTN(jpeg_nbits_table), GET_SYM_BEFORE, GET_SYM_AFTER
 
     psrldq      xmm4, 1 * SIZEOF_WORD                     ;G: w4 = 37 44 45 38 39 46 47 --
     shufpd      xmm1, xmm5, 10b                           ;F: w1 = 36 37 44 45 50 51 58 59
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-avx2.asm
index 0a20802dd8..3d40f1d9fb 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-avx2.asm
@@ -3,7 +3,7 @@
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 ; Copyright (C) 2015, Intel Corporation.
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -70,7 +70,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
 
     cld
     mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .expandloop:
     push        eax
     push        ecx
@@ -106,7 +106,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
 
     mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
     mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        ecx
     push        edi
@@ -117,7 +117,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
 
     cmp         ecx, byte SIZEOF_YMMWORD
     jae         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_r24:
     ; ecx can possibly be 8, 16, 24
@@ -141,7 +141,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
     vpxor       ymm1, ymm1, ymm1
     mov         ecx, SIZEOF_YMMWORD
     jmp         short .downsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -243,7 +243,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
 
     cld
     mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .expandloop:
     push        eax
     push        ecx
@@ -279,7 +279,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
 
     mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
     mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        ecx
     push        edi
@@ -291,7 +291,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
 
     cmp         ecx, byte SIZEOF_YMMWORD
     jae         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_r24:
     cmp         ecx, 24
@@ -320,7 +320,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
     vpxor       ymm3, ymm3, ymm3
     mov         ecx, SIZEOF_YMMWORD
     jmp         short .downsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymm0, YMMWORD [edx+0*SIZEOF_YMMWORD]
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-mmx.asm
index 2c223eebe8..38d5b322b6 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-mmx.asm
@@ -2,7 +2,7 @@
 ; jcsample.asm - downsampling (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -69,7 +69,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
 
     cld
     mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .expandloop:
     push        eax
     push        ecx
@@ -104,7 +104,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
 
     mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
     mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        ecx
     push        edi
@@ -112,7 +112,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
 
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -212,7 +212,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
 
     cld
     mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .expandloop:
     push        eax
     push        ecx
@@ -247,7 +247,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
 
     mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
     mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        ecx
     push        edi
@@ -256,7 +256,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
     mov         edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; inptr0
     mov         esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; inptr1
     mov         edi, JSAMPROW [edi]                    ; outptr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [edx+0*SIZEOF_MMWORD]
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-sse2.asm
index 4fea60d2e2..26c5d7407e 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-sse2.asm
@@ -2,7 +2,7 @@
 ; jcsample.asm - downsampling (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -69,7 +69,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
 
     cld
     mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .expandloop:
     push        eax
     push        ecx
@@ -104,7 +104,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
 
     mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
     mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        ecx
     push        edi
@@ -115,14 +115,14 @@ EXTN(jsimd_h2v1_downsample_sse2):
 
     cmp         ecx, byte SIZEOF_XMMWORD
     jae         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_r8:
     movdqa      xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
     pxor        xmm1, xmm1
     mov         ecx, SIZEOF_XMMWORD
     jmp         short .downsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqa      xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -225,7 +225,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
 
     cld
     mov         esi, JSAMPARRAY [input_data(ebp)]  ; input_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .expandloop:
     push        eax
     push        ecx
@@ -260,7 +260,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
 
     mov         esi, JSAMPARRAY [input_data(ebp)]   ; input_data
     mov         edi, JSAMPARRAY [output_data(ebp)]  ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        ecx
     push        edi
@@ -272,7 +272,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
 
     cmp         ecx, byte SIZEOF_XMMWORD
     jae         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_r8:
     movdqa      xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
@@ -281,7 +281,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
     pxor        xmm3, xmm3
     mov         ecx, SIZEOF_XMMWORD
     jmp         short .downsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqa      xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-avx2.asm
index 015be0416c..53ea3128fc 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-avx2.asm
@@ -2,7 +2,7 @@
 ; jdcolext.asm - colorspace conversion (AVX2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2012, 2016, D. R. Commander.
+; Copyright (C) 2012, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -50,15 +50,15 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [out_width(eax)]  ; num_cols
     test        ecx, ecx
@@ -81,7 +81,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax
     push        edi
@@ -94,8 +94,8 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
     mov         ebx, JSAMPROW [ebx]     ; inptr1
     mov         edx, JSAMPROW [edx]     ; inptr2
     mov         edi, JSAMPROW [edi]     ; outptr
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
-    alignx      16, 7
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
+    ALIGNX      16, 7
 .columnloop:
 
     vmovdqu     ymm5, YMMWORD [ebx]     ; ymm5=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@@ -295,7 +295,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
     add         ebx, byte SIZEOF_YMMWORD  ; inptr1
     add         edx, byte SIZEOF_YMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st64:
     lea         ecx, [ecx+ecx*2]            ; imul ecx, RGB_PIXELSIZE
@@ -436,7 +436,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
     add         ebx, byte SIZEOF_YMMWORD  ; inptr1
     add         edx, byte SIZEOF_YMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st64:
     cmp         ecx, byte SIZEOF_YMMWORD/2
@@ -479,7 +479,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
 
 %endif  ; RGB_PIXELSIZE ; ---------------
 
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         ecx
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-mmx.asm
index 5813cfcb66..d97faee004 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-mmx.asm
@@ -2,7 +2,7 @@
 ; jdcolext.asm - colorspace conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,15 +49,15 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [out_width(eax)]  ; num_cols
     test        ecx, ecx
@@ -80,7 +80,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax
     push        edi
@@ -93,8 +93,8 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
     mov         ebx, JSAMPROW [ebx]     ; inptr1
     mov         edx, JSAMPROW [edx]     ; inptr2
     mov         edi, JSAMPROW [edi]     ; outptr
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
-    alignx      16, 7
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm5, MMWORD [ebx]       ; mm5=Cb(01234567)
@@ -255,7 +255,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
     add         edx, byte SIZEOF_MMWORD                ; inptr2
     add         edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD  ; outptr
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st16:
     lea         ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
@@ -344,7 +344,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
     add         edx, byte SIZEOF_MMWORD                ; inptr2
     add         edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD  ; outptr
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st16:
     cmp         ecx, byte SIZEOF_MMWORD/2
@@ -369,7 +369,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
 
 %endif  ; RGB_PIXELSIZE ; ---------------
 
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         ecx
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-sse2.asm
index d5572b3294..682efc730f 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-sse2.asm
@@ -2,7 +2,7 @@
 ; jdcolext.asm - colorspace conversion (SSE2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2012, 2016, D. R. Commander.
+; Copyright (C) 2012, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,15 +49,15 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [out_width(eax)]  ; num_cols
     test        ecx, ecx
@@ -80,7 +80,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
     mov         eax, INT [num_rows(eax)]
     test        eax, eax
     jle         near .return
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax
     push        edi
@@ -93,8 +93,8 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
     mov         ebx, JSAMPROW [ebx]     ; inptr1
     mov         edx, JSAMPROW [edx]     ; inptr2
     mov         edi, JSAMPROW [edi]     ; outptr
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
-    alignx      16, 7
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
+    ALIGNX      16, 7
 .columnloop:
 
     movdqa      xmm5, XMMWORD [ebx]     ; xmm5=Cb(0123456789ABCDEF)
@@ -275,7 +275,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
     add         ebx, byte SIZEOF_XMMWORD  ; inptr1
     add         edx, byte SIZEOF_XMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st32:
     lea         ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
@@ -387,7 +387,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
     add         ebx, byte SIZEOF_XMMWORD  ; inptr1
     add         edx, byte SIZEOF_XMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st32:
     cmp         ecx, byte SIZEOF_XMMWORD/2
@@ -423,7 +423,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
 
 %endif  ; RGB_PIXELSIZE ; ---------------
 
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         ecx
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-avx2.asm
index e05b60d001..0f9baf840c 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-avx2.asm
@@ -3,7 +3,7 @@
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 ; Copyright (C) 2015, Intel Corporation.
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -32,7 +32,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_ycc_rgb_convert_avx2)
 
 EXTN(jconst_ycc_rgb_convert_avx2):
@@ -43,7 +43,7 @@ PW_MF0344_F0285 times 8  dw -F_0_344, F_0_285
 PW_ONE          times 16 dw  1
 PD_ONEHALF      times 8  dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-mmx.asm
index fb7e7bcce4..21e833292c 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-mmx.asm
@@ -2,7 +2,7 @@
 ; jdcolor.asm - colorspace conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_ycc_rgb_convert_mmx)
 
 EXTN(jconst_ycc_rgb_convert_mmx):
@@ -42,7 +42,7 @@ PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
 PW_ONE          times 4 dw  1
 PD_ONEHALF      times 2 dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-sse2.asm
index b736255317..481d0e4c95 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-sse2.asm
@@ -2,7 +2,7 @@
 ; jdcolor.asm - colorspace conversion (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_ycc_rgb_convert_sse2)
 
 EXTN(jconst_ycc_rgb_convert_sse2):
@@ -42,7 +42,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
 PW_ONE          times 8 dw  1
 PD_ONEHALF      times 4 dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-avx2.asm
index 711e6792d0..00201dc419 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-avx2.asm
@@ -2,7 +2,7 @@
 ; jdmerge.asm - merged upsampling/color conversion (AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -32,7 +32,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_merged_upsample_avx2)
 
 EXTN(jconst_merged_upsample_avx2):
@@ -43,7 +43,7 @@ PW_MF0344_F0285 times 8  dw -F_0_344, F_0_285
 PW_ONE          times 16 dw  1
 PD_ONEHALF      times 8  dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-mmx.asm
index 6e8311d408..be28c63f53 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-mmx.asm
@@ -2,7 +2,7 @@
 ; jdmerge.asm - merged upsampling/color conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_merged_upsample_mmx)
 
 EXTN(jconst_merged_upsample_mmx):
@@ -42,7 +42,7 @@ PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
 PW_ONE          times 4 dw  1
 PD_ONEHALF      times 2 dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-sse2.asm
index e32f90aa17..9b40a67dbd 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-sse2.asm
@@ -2,7 +2,7 @@
 ; jdmerge.asm - merged upsampling/color conversion (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_merged_upsample_sse2)
 
 EXTN(jconst_merged_upsample_sse2):
@@ -42,7 +42,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
 PW_ONE          times 8 dw  1
 PD_ONEHALF      times 4 dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-avx2.asm
index e35f7282bc..97988eb602 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-avx2.asm
@@ -2,7 +2,7 @@
 ; jdmrgext.asm - merged upsampling/color conversion (AVX2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2012, 2016, D. R. Commander.
+; Copyright (C) 2012, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -50,15 +50,15 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [output_width(eax)]  ; col
     test        ecx, ecx
@@ -79,9 +79,9 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
 
     pop         ecx                     ; col
 
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     vmovdqu     ymm6, YMMWORD [ebx]     ; ymm6=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
     vmovdqu     ymm7, YMMWORD [edx]     ; ymm7=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@@ -168,13 +168,13 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
 
     mov         al, 2                   ; Yctr
     jmp         short .Yloop_1st
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .Yloop_2nd:
     vmovdqa     ymm0, YMMWORD [wk(1)]   ; ymm0=(R-Y)H
     vmovdqa     ymm2, YMMWORD [wk(2)]   ; ymm2=(G-Y)H
     vmovdqa     ymm4, YMMWORD [wk(0)]   ; ymm4=(B-Y)H
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .Yloop_1st:
     vmovdqu     ymm7, YMMWORD [esi]     ; ymm7=Y(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@@ -301,7 +301,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
     add         ebx, byte SIZEOF_YMMWORD  ; inptr1
     add         edx, byte SIZEOF_YMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st64:
     lea         ecx, [ecx+ecx*2]            ; imul ecx, RGB_PIXELSIZE
@@ -445,7 +445,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
     add         ebx, byte SIZEOF_YMMWORD  ; inptr1
     add         edx, byte SIZEOF_YMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st64:
     cmp         ecx, byte SIZEOF_YMMWORD/2
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-mmx.asm
index eb3e36b475..79cee73dbd 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-mmx.asm
@@ -2,7 +2,7 @@
 ; jdmrgext.asm - merged upsampling/color conversion (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -47,15 +47,15 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [output_width(eax)]  ; col
     test        ecx, ecx
@@ -76,9 +76,9 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
 
     pop         ecx                     ; col
 
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     movq        mm6, MMWORD [ebx]       ; mm6=Cb(01234567)
     movq        mm7, MMWORD [edx]       ; mm7=Cr(01234567)
@@ -171,13 +171,13 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
 
     mov         al, 2                   ; Yctr
     jmp         short .Yloop_1st
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .Yloop_2nd:
     movq        mm0, MMWORD [wk(1)]     ; mm0=(R-Y)H
     movq        mm2, MMWORD [wk(2)]     ; mm2=(G-Y)H
     movq        mm4, MMWORD [wk(0)]     ; mm4=(B-Y)H
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .Yloop_1st:
     movq        mm7, MMWORD [esi]       ; mm7=Y(01234567)
@@ -258,7 +258,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
     add         ebx, byte SIZEOF_MMWORD                ; inptr1
     add         edx, byte SIZEOF_MMWORD                ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st16:
     lea         ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
@@ -350,7 +350,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
     add         ebx, byte SIZEOF_MMWORD                ; inptr1
     add         edx, byte SIZEOF_MMWORD                ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st16:
     cmp         ecx, byte SIZEOF_MMWORD/2
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-sse2.asm
index c113dc4d27..331344358b 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-sse2.asm
@@ -2,7 +2,7 @@
 ; jdmrgext.asm - merged upsampling/color conversion (SSE2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2012, 2016, D. R. Commander.
+; Copyright (C) 2012, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,15 +49,15 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         ecx, JDIMENSION [output_width(eax)]  ; col
     test        ecx, ecx
@@ -78,9 +78,9 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 
     pop         ecx                     ; col
 
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
-    movpic      eax, POINTER [gotptr]   ; load GOT address (eax)
+    MOVPIC      eax, POINTER [gotptr]   ; load GOT address (eax)
 
     movdqa      xmm6, XMMWORD [ebx]     ; xmm6=Cb(0123456789ABCDEF)
     movdqa      xmm7, XMMWORD [edx]     ; xmm7=Cr(0123456789ABCDEF)
@@ -173,13 +173,13 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 
     mov         al, 2                   ; Yctr
     jmp         short .Yloop_1st
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .Yloop_2nd:
     movdqa      xmm0, XMMWORD [wk(1)]   ; xmm0=(R-Y)H
     movdqa      xmm2, XMMWORD [wk(2)]   ; xmm2=(G-Y)H
     movdqa      xmm4, XMMWORD [wk(0)]   ; xmm4=(B-Y)H
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .Yloop_1st:
     movdqa      xmm7, XMMWORD [esi]     ; xmm7=Y(0123456789ABCDEF)
@@ -280,7 +280,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
     add         ebx, byte SIZEOF_XMMWORD  ; inptr1
     add         edx, byte SIZEOF_XMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st32:
     lea         ecx, [ecx+ecx*2]            ; imul ecx, RGB_PIXELSIZE
@@ -395,7 +395,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
     add         ebx, byte SIZEOF_XMMWORD  ; inptr1
     add         edx, byte SIZEOF_XMMWORD  ; inptr2
     jmp         near .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .column_st32:
     cmp         ecx, byte SIZEOF_XMMWORD/2
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-avx2.asm
index a800c35e08..b0507aa5d6 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-avx2.asm
@@ -3,7 +3,7 @@
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 ; Copyright (C) 2015, Intel Corporation.
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -20,7 +20,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fancy_upsample_avx2)
 
 EXTN(jconst_fancy_upsample_avx2):
@@ -31,7 +31,7 @@ PW_THREE times 16 dw 3
 PW_SEVEN times 16 dw 7
 PW_EIGHT times 16 dw 8
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -62,13 +62,13 @@ PW_EIGHT times 16 dw 8
 EXTN(jsimd_h2v1_fancy_upsample_avx2):
     push        ebp
     mov         ebp, esp
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     mov         eax, JDIMENSION [downsamp_width(ebp)]  ; colctr
     test        eax, eax
@@ -81,7 +81,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax                     ; colctr
     push        edi
@@ -104,7 +104,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
     and         eax, byte -SIZEOF_YMMWORD
     cmp         eax, byte SIZEOF_YMMWORD
     ja          short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_last:
     vpcmpeqb    xmm6, xmm6, xmm6
@@ -112,7 +112,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
     vperm2i128  ymm6, ymm6, ymm6, 1             ; (---- ---- ... ---- ---- ff) MSB is ff
     vpand       ymm6, ymm6, YMMWORD [esi+0*SIZEOF_YMMWORD]
     jmp         short .upsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     vmovdqu     ymm6, YMMWORD [esi+1*SIZEOF_YMMWORD]
@@ -196,7 +196,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     pop         ebp
     ret
 
@@ -234,15 +234,15 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         edx, eax                ; edx = original ebp
     mov         eax, JDIMENSION [downsamp_width(edx)]  ; colctr
@@ -256,7 +256,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     mov         esi, JSAMPARRAY [input_data(edx)]    ; input_data
     mov         edi, POINTER [output_data_ptr(edx)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax                     ; colctr
     push        ecx
@@ -286,8 +286,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     vmovdqu     ymm1, YMMWORD [ecx+0*SIZEOF_YMMWORD]  ; ymm1=row[-1][0]
     vmovdqu     ymm2, YMMWORD [esi+0*SIZEOF_YMMWORD]  ; ymm2=row[+1][0]
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     vpxor       ymm3, ymm3, ymm3        ; ymm3=(all 0's)
 
@@ -328,19 +328,19 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     vmovdqa     YMMWORD [wk(0)], ymm1
     vmovdqa     YMMWORD [wk(1)], ymm2
 
-    poppic      ebx
+    POPPIC      ebx
 
     add         eax, byte SIZEOF_YMMWORD-1
     and         eax, byte -SIZEOF_YMMWORD
     cmp         eax, byte SIZEOF_YMMWORD
     ja          short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_last:
     ; -- process the last column block
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     vpcmpeqb    xmm1, xmm1, xmm1
     vpslldq     xmm1, xmm1, (SIZEOF_XMMWORD-2)
@@ -353,7 +353,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     vmovdqa     YMMWORD [wk(3)], ymm2          ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31)
 
     jmp         near .upsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     ; -- process the next column block
@@ -362,8 +362,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     vmovdqu     ymm1, YMMWORD [ecx+1*SIZEOF_YMMWORD]  ; ymm1=row[-1][1]
     vmovdqu     ymm2, YMMWORD [esi+1*SIZEOF_YMMWORD]  ; ymm2=row[+1][1]
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     vpxor       ymm3, ymm3, ymm3        ; ymm3=(all 0's)
 
@@ -516,7 +516,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     vmovdqu     YMMWORD [edi+0*SIZEOF_YMMWORD], ymm1
     vmovdqu     YMMWORD [edi+1*SIZEOF_YMMWORD], ymm0
 
-    poppic      ebx
+    POPPIC      ebx
 
     sub         eax, byte SIZEOF_YMMWORD
     add         ecx, byte 1*SIZEOF_YMMWORD  ; inptr1(above)
@@ -590,7 +590,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        edi
     push        esi
@@ -598,7 +598,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr
     mov         eax, edx                ; colctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     cmp         eax, byte SIZEOF_YMMWORD
@@ -629,7 +629,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
     add         esi, byte SIZEOF_YMMWORD    ; inptr
     add         edi, byte 2*SIZEOF_YMMWORD  ; outptr
     jmp         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         esi
@@ -689,7 +689,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        edi
     push        esi
@@ -698,7 +698,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
     mov         ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]  ; outptr0
     mov         edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]  ; outptr1
     mov         eax, edx                               ; colctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     cmp         eax, byte SIZEOF_YMMWORD
@@ -734,7 +734,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
     add         ebx, 2*SIZEOF_YMMWORD     ; outptr0
     add         edi, 2*SIZEOF_YMMWORD     ; outptr1
     jmp         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         esi
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-mmx.asm
index 12c49f0eab..6f70499c97 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-mmx.asm
@@ -2,7 +2,7 @@
 ; jdsample.asm - upsampling (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -19,7 +19,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fancy_upsample_mmx)
 
 EXTN(jconst_fancy_upsample_mmx):
@@ -30,7 +30,7 @@ PW_THREE times 4 dw 3
 PW_SEVEN times 4 dw 7
 PW_EIGHT times 4 dw 8
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -61,13 +61,13 @@ PW_EIGHT times 4 dw 8
 EXTN(jsimd_h2v1_fancy_upsample_mmx):
     push        ebp
     mov         ebp, esp
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     mov         eax, JDIMENSION [downsamp_width(ebp)]  ; colctr
     test        eax, eax
@@ -80,7 +80,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax                     ; colctr
     push        edi
@@ -103,14 +103,14 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
     and         eax, byte -SIZEOF_MMWORD
     cmp         eax, byte SIZEOF_MMWORD
     ja          short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_last:
     pcmpeqb     mm6, mm6
     psllq       mm6, (SIZEOF_MMWORD-1)*BYTE_BIT
     pand        mm6, MMWORD [esi+0*SIZEOF_MMWORD]
     jmp         short .upsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movq        mm6, MMWORD [esi+1*SIZEOF_MMWORD]
@@ -187,7 +187,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     pop         ebp
     ret
 
@@ -224,15 +224,15 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         edx, eax                ; edx = original ebp
     mov         eax, JDIMENSION [downsamp_width(edx)]  ; colctr
@@ -246,7 +246,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     mov         esi, JSAMPARRAY [input_data(edx)]    ; input_data
     mov         edi, POINTER [output_data_ptr(edx)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax                     ; colctr
     push        ecx
@@ -276,8 +276,8 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     movq        mm1, MMWORD [ecx+0*SIZEOF_MMWORD]  ; mm1=row[-1][0]
     movq        mm2, MMWORD [esi+0*SIZEOF_MMWORD]  ; mm2=row[+1][0]
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     pxor        mm3, mm3                ; mm3=(all 0's)
     movq        mm4, mm0
@@ -312,19 +312,19 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     movq        MMWORD [wk(0)], mm1
     movq        MMWORD [wk(1)], mm2
 
-    poppic      ebx
+    POPPIC      ebx
 
     add         eax, byte SIZEOF_MMWORD-1
     and         eax, byte -SIZEOF_MMWORD
     cmp         eax, byte SIZEOF_MMWORD
     ja          short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_last:
     ; -- process the last column block
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     pcmpeqb     mm1, mm1
     psllq       mm1, (SIZEOF_MMWORD-2)*BYTE_BIT
@@ -337,7 +337,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     movq        MMWORD [wk(3)], mm2
 
     jmp         short .upsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     ; -- process the next column block
@@ -346,8 +346,8 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     movq        mm1, MMWORD [ecx+1*SIZEOF_MMWORD]  ; mm1=row[-1][1]
     movq        mm2, MMWORD [esi+1*SIZEOF_MMWORD]  ; mm2=row[+1][1]
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     pxor        mm3, mm3                ; mm3=(all 0's)
     movq        mm4, mm0
@@ -486,7 +486,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
     movq        MMWORD [edi+0*SIZEOF_MMWORD], mm1
     movq        MMWORD [edi+1*SIZEOF_MMWORD], mm0
 
-    poppic      ebx
+    POPPIC      ebx
 
     sub         eax, byte SIZEOF_MMWORD
     add         ecx, byte 1*SIZEOF_MMWORD  ; inptr1(above)
@@ -561,7 +561,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        edi
     push        esi
@@ -569,7 +569,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr
     mov         eax, edx                ; colctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -599,7 +599,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
     add         esi, byte 2*SIZEOF_MMWORD  ; inptr
     add         edi, byte 4*SIZEOF_MMWORD  ; outptr
     jmp         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         esi
@@ -660,7 +660,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        edi
     push        esi
@@ -669,7 +669,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
     mov         ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]  ; outptr0
     mov         edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]  ; outptr1
     mov         eax, edx                               ; colctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -704,7 +704,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
     add         ebx, byte 4*SIZEOF_MMWORD  ; outptr0
     add         edi, byte 4*SIZEOF_MMWORD  ; outptr1
     jmp         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         esi
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-sse2.asm
index 4e28d2f4b8..f68c5ea545 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-sse2.asm
@@ -2,7 +2,7 @@
 ; jdsample.asm - upsampling (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -19,7 +19,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fancy_upsample_sse2)
 
 EXTN(jconst_fancy_upsample_sse2):
@@ -30,7 +30,7 @@ PW_THREE times 8 dw 3
 PW_SEVEN times 8 dw 7
 PW_EIGHT times 8 dw 8
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -61,13 +61,13 @@ PW_EIGHT times 8 dw 8
 EXTN(jsimd_h2v1_fancy_upsample_sse2):
     push        ebp
     mov         ebp, esp
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     mov         eax, JDIMENSION [downsamp_width(ebp)]  ; colctr
     test        eax, eax
@@ -80,7 +80,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax                     ; colctr
     push        edi
@@ -103,14 +103,14 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
     and         eax, byte -SIZEOF_XMMWORD
     cmp         eax, byte SIZEOF_XMMWORD
     ja          short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_last:
     pcmpeqb     xmm6, xmm6
     pslldq      xmm6, (SIZEOF_XMMWORD-1)
     pand        xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD]
     jmp         short .upsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     movdqa      xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD]
@@ -185,7 +185,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     pop         ebp
     ret
 
@@ -223,15 +223,15 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     eax                     ; make a room for GOT address
+    PUSHPIC     eax                     ; make a room for GOT address
     push        ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
-    movpic      POINTER [gotptr], ebx   ; save GOT address
+    GET_GOT     ebx                     ; get GOT address
+    MOVPIC      POINTER [gotptr], ebx   ; save GOT address
 
     mov         edx, eax                ; edx = original ebp
     mov         eax, JDIMENSION [downsamp_width(edx)]  ; colctr
@@ -245,7 +245,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     mov         esi, JSAMPARRAY [input_data(edx)]    ; input_data
     mov         edi, POINTER [output_data_ptr(edx)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        eax                     ; colctr
     push        ecx
@@ -275,8 +275,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     movdqa      xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD]  ; xmm1=row[-1][0]
     movdqa      xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD]  ; xmm2=row[+1][0]
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     pxor        xmm3, xmm3              ; xmm3=(all 0's)
     movdqa      xmm4, xmm0
@@ -311,19 +311,19 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     movdqa      XMMWORD [wk(0)], xmm1
     movdqa      XMMWORD [wk(1)], xmm2
 
-    poppic      ebx
+    POPPIC      ebx
 
     add         eax, byte SIZEOF_XMMWORD-1
     and         eax, byte -SIZEOF_XMMWORD
     cmp         eax, byte SIZEOF_XMMWORD
     ja          short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop_last:
     ; -- process the last column block
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     pcmpeqb     xmm1, xmm1
     pslldq      xmm1, (SIZEOF_XMMWORD-2)
@@ -336,7 +336,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     movdqa      XMMWORD [wk(3)], xmm2   ; xmm2=(-- -- -- -- -- -- -- 15)
 
     jmp         near .upsample
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .columnloop:
     ; -- process the next column block
@@ -345,8 +345,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     movdqa      xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD]  ; xmm1=row[-1][1]
     movdqa      xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]  ; xmm2=row[+1][1]
 
-    pushpic     ebx
-    movpic      ebx, POINTER [gotptr]   ; load GOT address
+    PUSHPIC     ebx
+    MOVPIC      ebx, POINTER [gotptr]   ; load GOT address
 
     pxor        xmm3, xmm3              ; xmm3=(all 0's)
     movdqa      xmm4, xmm0
@@ -485,7 +485,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     movdqa      XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1
     movdqa      XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0
 
-    poppic      ebx
+    POPPIC      ebx
 
     sub         eax, byte SIZEOF_XMMWORD
     add         ecx, byte 1*SIZEOF_XMMWORD  ; inptr1(above)
@@ -558,7 +558,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        edi
     push        esi
@@ -566,7 +566,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
     mov         esi, JSAMPROW [esi]     ; inptr
     mov         edi, JSAMPROW [edi]     ; outptr
     mov         eax, edx                ; colctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movdqa      xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -596,7 +596,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
     add         esi, byte 2*SIZEOF_XMMWORD  ; inptr
     add         edi, byte 4*SIZEOF_XMMWORD  ; outptr
     jmp         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         esi
@@ -655,7 +655,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
     mov         esi, JSAMPARRAY [input_data(ebp)]    ; input_data
     mov         edi, POINTER [output_data_ptr(ebp)]
     mov         edi, JSAMPARRAY [edi]                ; output_data
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
     push        edi
     push        esi
@@ -664,7 +664,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
     mov         ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]  ; outptr0
     mov         edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]  ; outptr1
     mov         eax, edx                               ; colctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movdqa      xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -699,7 +699,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
     add         ebx, byte 4*SIZEOF_XMMWORD  ; outptr0
     add         edi, byte 4*SIZEOF_XMMWORD  ; outptr1
     jmp         short .columnloop
-    alignx      16, 7
+    ALIGNX      16, 7
 
 .nextrow:
     pop         esi
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-3dn.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-3dn.asm
index 322ab16325..34af2bf0ba 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-3dn.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-3dn.asm
@@ -2,7 +2,7 @@
 ; jfdctflt.asm - floating-point FDCT (3DNow!)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -24,7 +24,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_float_3dnow)
 
 EXTN(jconst_fdct_float_3dnow):
@@ -34,7 +34,7 @@ PD_0_707 times 2 dd 0.707106781186547524400844
 PD_0_541 times 2 dd 0.541196100146196984399723
 PD_1_306 times 2 dd 1.306562964876376527856643
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -63,19 +63,19 @@ EXTN(jsimd_fdct_float_3dnow):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
     mov         edx, POINTER [data(eax)]  ; (FAST_FLOAT *)
     mov         ecx, DCTSIZE/2
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     movq        mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
@@ -190,7 +190,7 @@ EXTN(jsimd_fdct_float_3dnow):
 
     mov         edx, POINTER [data(eax)]  ; (FAST_FLOAT *)
     mov         ecx, DCTSIZE/2
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
@@ -307,7 +307,7 @@ EXTN(jsimd_fdct_float_3dnow):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-sse.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-sse.asm
index 86952c6499..d247094b64 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-sse.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-sse.asm
@@ -2,7 +2,7 @@
 ; jfdctflt.asm - floating-point FDCT (SSE)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -34,7 +34,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_float_sse)
 
 EXTN(jconst_fdct_float_sse):
@@ -44,7 +44,7 @@ PD_0_707 times 4 dd 0.707106781186547524400844
 PD_0_541 times 4 dd 0.541196100146196984399723
 PD_1_306 times 4 dd 1.306562964876376527856643
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -74,19 +74,19 @@ EXTN(jsimd_fdct_float_sse):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
     mov         edx, POINTER [data(eax)]  ; (FAST_FLOAT *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     movaps      xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
@@ -222,7 +222,7 @@ EXTN(jsimd_fdct_float_sse):
 
     mov         edx, POINTER [data(eax)]  ; (FAST_FLOAT *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movaps      xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
@@ -358,7 +358,7 @@ EXTN(jsimd_fdct_float_sse):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-mmx.asm
index 80645a50d7..8c55a9876d 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-mmx.asm
@@ -2,7 +2,7 @@
 ; jfdctfst.asm - fast integer FDCT (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,7 +49,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS)  ; FIX(1.306562965)
 %define PRE_MULTIPLY_SCALE_BITS  2
 %define CONST_SHIFT              (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_ifast_mmx)
 
 EXTN(jconst_fdct_ifast_mmx):
@@ -59,7 +59,7 @@ PW_F0382 times 4 dw F_0_382 << CONST_SHIFT
 PW_F0541 times 4 dw F_0_541 << CONST_SHIFT
 PW_F1306 times 4 dw F_1_306 << CONST_SHIFT
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -88,19 +88,19 @@ EXTN(jsimd_fdct_ifast_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
     mov         edx, POINTER [data(eax)]  ; (DCTELEM *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     movq        mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -241,7 +241,7 @@ EXTN(jsimd_fdct_ifast_mmx):
 
     mov         edx, POINTER [data(eax)]  ; (DCTELEM *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -384,7 +384,7 @@ EXTN(jsimd_fdct_ifast_mmx):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-sse2.asm
index 446fa7a68f..c1ba533d6d 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-sse2.asm
@@ -2,7 +2,7 @@
 ; jfdctfst.asm - fast integer FDCT (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,7 +49,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS)  ; FIX(1.306562965)
 %define PRE_MULTIPLY_SCALE_BITS  2
 %define CONST_SHIFT              (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_ifast_sse2)
 
 EXTN(jconst_fdct_ifast_sse2):
@@ -59,7 +59,7 @@ PW_F0382 times 8 dw F_0_382 << CONST_SHIFT
 PW_F0541 times 8 dw F_0_541 << CONST_SHIFT
 PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -89,13 +89,13 @@ EXTN(jsimd_fdct_ifast_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
@@ -392,7 +392,7 @@ EXTN(jsimd_fdct_ifast_sse2):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-avx2.asm
index 23cf733135..21c3d5b223 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-avx2.asm
@@ -2,7 +2,7 @@
 ; jfdctint.asm - accurate integer FDCT (AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -65,7 +65,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %1-%4: Input/output registers
 ; %5-%8: Temp registers
 
-%macro dotranspose 8
+%macro DOTRANSPOSE 8
     ; %1=(00 01 02 03 04 05 06 07  40 41 42 43 44 45 46 47)
     ; %2=(10 11 12 13 14 15 16 17  50 51 52 53 54 55 56 57)
     ; %3=(20 21 22 23 24 25 26 27  60 61 62 63 64 65 66 67)
@@ -108,7 +108,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %5-%8: Temp registers
 ; %9:    Pass (1 or 2)
 
-%macro dodct 9
+%macro DODCT 9
     vpsubw      %5, %1, %4              ; %5=data1_0-data6_7=tmp6_7
     vpaddw      %6, %1, %4              ; %6=data1_0+data6_7=tmp1_0
     vpaddw      %7, %2, %3              ; %7=data3_2+data4_5=tmp3_2
@@ -223,7 +223,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_islow_avx2)
 
 EXTN(jconst_fdct_islow_avx2):
@@ -242,7 +242,7 @@ PW_DESCALE_P2X             times 16 dw  1 << (PASS1_BITS - 1)
 PW_1_NEG1                  times 8  dw  1
                            times 8  dw -1
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -262,13 +262,13 @@ PW_1_NEG1                  times 8  dw  1
 EXTN(jsimd_fdct_islow_avx2):
     push        ebp
     mov         ebp, esp
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
@@ -292,9 +292,9 @@ EXTN(jsimd_fdct_islow_avx2):
     ; ymm2=(20 21 22 23 24 25 26 27  60 61 62 63 64 65 66 67)
     ; ymm3=(30 31 32 33 34 35 36 37  70 71 72 73 74 75 76 77)
 
-    dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
 
-    dodct       ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
+    DODCT       ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
     ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5
 
     ; ---- Pass 2: process columns.
@@ -302,9 +302,9 @@ EXTN(jsimd_fdct_islow_avx2):
     vperm2i128  ymm4, ymm1, ymm3, 0x20  ; ymm4=data3_7
     vperm2i128  ymm1, ymm1, ymm3, 0x31  ; ymm1=data1_5
 
-    dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
 
-    dodct       ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
+    DODCT       ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
     ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5
 
     vperm2i128 ymm3, ymm0, ymm1, 0x30   ; ymm3=data0_1
@@ -322,7 +322,7 @@ EXTN(jsimd_fdct_islow_avx2):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     pop         ebp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-mmx.asm
index 34a43b9e5e..c2f308ed3b 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-mmx.asm
@@ -2,7 +2,7 @@
 ; jfdctint.asm - accurate integer FDCT (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, 2020, D. R. Commander.
+; Copyright (C) 2016, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -63,7 +63,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_islow_mmx)
 
 EXTN(jconst_fdct_islow_mmx):
@@ -80,7 +80,7 @@ PD_DESCALE_P1  times 2 dd  1 << (DESCALE_P1 - 1)
 PD_DESCALE_P2  times 2 dd  1 << (DESCALE_P2 - 1)
 PW_DESCALE_P2X times 4 dw  1 << (PASS1_BITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -109,19 +109,19 @@ EXTN(jsimd_fdct_islow_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
     mov         edx, POINTER [data(eax)]  ; (DCTELEM *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     movq        mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -363,7 +363,7 @@ EXTN(jsimd_fdct_islow_mmx):
 
     mov         edx, POINTER [data(eax)]  ; (DCTELEM *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 
     movq        mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -609,7 +609,7 @@ EXTN(jsimd_fdct_islow_mmx):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-sse2.asm
index 6f8e18cb9d..b6e679918d 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-sse2.asm
@@ -2,7 +2,7 @@
 ; jfdctint.asm - accurate integer FDCT (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, 2020, D. R. Commander.
+; Copyright (C) 2016, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -63,7 +63,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_islow_sse2)
 
 EXTN(jconst_fdct_islow_sse2):
@@ -80,7 +80,7 @@ PD_DESCALE_P1  times 4 dd  1 << (DESCALE_P1 - 1)
 PD_DESCALE_P2  times 4 dd  1 << (DESCALE_P2 - 1)
 PW_DESCALE_P2X times 8 dw  1 << (PASS1_BITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -110,13 +110,13 @@ EXTN(jsimd_fdct_islow_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
 ;   push        esi                     ; unused
 ;   push        edi                     ; unused
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process rows.
 
@@ -622,7 +622,7 @@ EXTN(jsimd_fdct_islow_sse2):
 ;   pop         esi                     ; unused
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-3dn.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-3dn.asm
index 87951910d8..1f696cb59b 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-3dn.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-3dn.asm
@@ -2,7 +2,7 @@
 ; jidctflt.asm - floating-point IDCT (3DNow! & MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -24,7 +24,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_float_3dnow)
 
 EXTN(jconst_idct_float_3dnow):
@@ -36,7 +36,7 @@ PD_2_613        times 2 dd 2.613125929752753055713286
 PD_RNDINT_MAGIC times 2 dd 100663296.0  ; (float)(0x00C00000 << 3)
 PB_CENTERJSAMP  times 8 db CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -78,7 +78,7 @@ EXTN(jsimd_idct_float_3dnow):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input, store into work array.
 
@@ -87,21 +87,21 @@ EXTN(jsimd_idct_float_3dnow):
     mov         esi, JCOEFPTR [coef_block(eax)]  ; inptr
     lea         edi, [workspace]                 ; FAST_FLOAT *wsptr
     mov         ecx, DCTSIZE/2                   ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
     mov         eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
     or          eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
     jnz         short .columnDCT
 
-    pushpic     ebx                     ; save GOT address
+    PUSHPIC     ebx                     ; save GOT address
     mov         ebx, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
     mov         eax, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
     or          ebx, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
     or          eax, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
     or          ebx, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
     or          eax, ebx
-    poppic      ebx                     ; restore GOT address
+    POPPIC      ebx                     ; restore GOT address
     jnz         short .columnDCT
 
     ; -- AC terms all zero
@@ -127,7 +127,7 @@ EXTN(jsimd_idct_float_3dnow):
     movq        MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1
     movq        MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
     jmp         near .nextcolumn
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -293,7 +293,7 @@ EXTN(jsimd_idct_float_3dnow):
     mov         edi, JSAMPARRAY [output_buf(eax)]  ; (JSAMPROW *)
     mov         eax, JDIMENSION [output_col(eax)]
     mov         ecx, DCTSIZE/2                     ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     ; -- Even part
@@ -420,14 +420,14 @@ EXTN(jsimd_idct_float_3dnow):
     punpckldq   mm6, mm4                ; mm6=(00 01 02 03 04 05 06 07)
     punpckhdq   mm7, mm4                ; mm7=(10 11 12 13 14 15 16 17)
 
-    pushpic     ebx                     ; save GOT address
+    PUSHPIC     ebx                     ; save GOT address
 
     mov         edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
     mov         ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
     movq        MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
     movq        MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
 
-    poppic      ebx                     ; restore GOT address
+    POPPIC      ebx                     ; restore GOT address
 
     add         esi, byte 2*SIZEOF_FAST_FLOAT  ; wsptr
     add         edi, byte 2*SIZEOF_JSAMPROW
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse.asm
index b27ecfdf46..daeef22afc 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse.asm
@@ -2,7 +2,7 @@
 ; jidctflt.asm - floating-point IDCT (SSE & MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -23,18 +23,18 @@
 
 ; --------------------------------------------------------------------------
 
-%macro unpcklps2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+%macro UNPCKLPS2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
     shufps      %1, %2, 0x44
 %endmacro
 
-%macro unpckhps2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+%macro UNPCKHPS2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
     shufps      %1, %2, 0xEE
 %endmacro
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_float_sse)
 
 EXTN(jconst_idct_float_sse):
@@ -46,7 +46,7 @@ PD_M2_613      times 4 dd -2.613125929752753055713286
 PD_0_125       times 4 dd  0.125        ; 1/8
 PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -88,7 +88,7 @@ EXTN(jsimd_idct_float_sse):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input, store into work array.
 
@@ -97,7 +97,7 @@ EXTN(jsimd_idct_float_sse):
     mov         esi, JCOEFPTR [coef_block(eax)]  ; inptr
     lea         edi, [workspace]                 ; FAST_FLOAT *wsptr
     mov         ecx, DCTSIZE/4                   ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
     mov         eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -149,7 +149,7 @@ EXTN(jsimd_idct_float_sse):
     movaps      XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
     movaps      XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
     jmp         near .nextcolumn
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -325,11 +325,11 @@ EXTN(jsimd_idct_float_sse):
     unpckhps    xmm4, xmm0              ; xmm4=(42 52 43 53)
 
     movaps      xmm3, xmm6              ; transpose coefficients(phase 2)
-    unpcklps2   xmm6, xmm7              ; xmm6=(00 10 20 30)
-    unpckhps2   xmm3, xmm7              ; xmm3=(01 11 21 31)
+    UNPCKLPS2   xmm6, xmm7              ; xmm6=(00 10 20 30)
+    UNPCKHPS2   xmm3, xmm7              ; xmm3=(01 11 21 31)
     movaps      xmm0, xmm1              ; transpose coefficients(phase 2)
-    unpcklps2   xmm1, xmm2              ; xmm1=(02 12 22 32)
-    unpckhps2   xmm0, xmm2              ; xmm0=(03 13 23 33)
+    UNPCKLPS2   xmm1, xmm2              ; xmm1=(02 12 22 32)
+    UNPCKHPS2   xmm0, xmm2              ; xmm0=(03 13 23 33)
 
     movaps      xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
     movaps      xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
@@ -340,11 +340,11 @@ EXTN(jsimd_idct_float_sse):
     movaps      XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
 
     movaps      xmm6, xmm5              ; transpose coefficients(phase 2)
-    unpcklps2   xmm5, xmm7              ; xmm5=(40 50 60 70)
-    unpckhps2   xmm6, xmm7              ; xmm6=(41 51 61 71)
+    UNPCKLPS2   xmm5, xmm7              ; xmm5=(40 50 60 70)
+    UNPCKHPS2   xmm6, xmm7              ; xmm6=(41 51 61 71)
     movaps      xmm3, xmm4              ; transpose coefficients(phase 2)
-    unpcklps2   xmm4, xmm2              ; xmm4=(42 52 62 72)
-    unpckhps2   xmm3, xmm2              ; xmm3=(43 53 63 73)
+    UNPCKLPS2   xmm4, xmm2              ; xmm4=(42 52 62 72)
+    UNPCKHPS2   xmm3, xmm2              ; xmm3=(43 53 63 73)
 
     movaps      XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
     movaps      XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
@@ -372,7 +372,7 @@ EXTN(jsimd_idct_float_sse):
     mov         edi, JSAMPARRAY [output_buf(eax)]  ; (JSAMPROW *)
     mov         eax, JDIMENSION [output_col(eax)]
     mov         ecx, DCTSIZE/4                     ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     ; -- Even part
@@ -536,7 +536,7 @@ EXTN(jsimd_idct_float_sse):
     punpckldq   mm5, mm6                ; mm5=(20 21 22 23 24 25 26 27)
     punpckhdq   mm4, mm6                ; mm4=(30 31 32 33 34 35 36 37)
 
-    pushpic     ebx                     ; save GOT address
+    PUSHPIC     ebx                     ; save GOT address
 
     mov         edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
     mov         ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@@ -547,7 +547,7 @@ EXTN(jsimd_idct_float_sse):
     movq        MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
     movq        MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
 
-    poppic      ebx                     ; restore GOT address
+    POPPIC      ebx                     ; restore GOT address
 
     add         esi, byte 4*SIZEOF_FAST_FLOAT  ; wsptr
     add         edi, byte 4*SIZEOF_JSAMPROW
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse2.asm
index c646eaef76..c39ffbe71b 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse2.asm
@@ -2,7 +2,7 @@
 ; jidctflt.asm - floating-point IDCT (SSE & SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -23,18 +23,18 @@
 
 ; --------------------------------------------------------------------------
 
-%macro unpcklps2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+%macro UNPCKLPS2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
     shufps      %1, %2, 0x44
 %endmacro
 
-%macro unpckhps2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+%macro UNPCKHPS2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
     shufps      %1, %2, 0xEE
 %endmacro
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_float_sse2)
 
 EXTN(jconst_idct_float_sse2):
@@ -46,7 +46,7 @@ PD_M2_613       times 4  dd -2.613125929752753055713286
 PD_RNDINT_MAGIC times 4  dd  100663296.0  ; (float)(0x00C00000 << 3)
 PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -88,7 +88,7 @@ EXTN(jsimd_idct_float_sse2):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input, store into work array.
 
@@ -97,7 +97,7 @@ EXTN(jsimd_idct_float_sse2):
     mov         esi, JCOEFPTR [coef_block(eax)]  ; inptr
     lea         edi, [workspace]                 ; FAST_FLOAT *wsptr
     mov         ecx, DCTSIZE/4                   ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
     mov         eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -150,7 +150,7 @@ EXTN(jsimd_idct_float_sse2):
     movaps      XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
     movaps      XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
     jmp         near .nextcolumn
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -287,11 +287,11 @@ EXTN(jsimd_idct_float_sse2):
     unpckhps    xmm4, xmm0              ; xmm4=(42 52 43 53)
 
     movaps      xmm3, xmm6              ; transpose coefficients(phase 2)
-    unpcklps2   xmm6, xmm7              ; xmm6=(00 10 20 30)
-    unpckhps2   xmm3, xmm7              ; xmm3=(01 11 21 31)
+    UNPCKLPS2   xmm6, xmm7              ; xmm6=(00 10 20 30)
+    UNPCKHPS2   xmm3, xmm7              ; xmm3=(01 11 21 31)
     movaps      xmm0, xmm1              ; transpose coefficients(phase 2)
-    unpcklps2   xmm1, xmm2              ; xmm1=(02 12 22 32)
-    unpckhps2   xmm0, xmm2              ; xmm0=(03 13 23 33)
+    UNPCKLPS2   xmm1, xmm2              ; xmm1=(02 12 22 32)
+    UNPCKHPS2   xmm0, xmm2              ; xmm0=(03 13 23 33)
 
     movaps      xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
     movaps      xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
@@ -302,11 +302,11 @@ EXTN(jsimd_idct_float_sse2):
     movaps      XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
 
     movaps      xmm6, xmm5              ; transpose coefficients(phase 2)
-    unpcklps2   xmm5, xmm7              ; xmm5=(40 50 60 70)
-    unpckhps2   xmm6, xmm7              ; xmm6=(41 51 61 71)
+    UNPCKLPS2   xmm5, xmm7              ; xmm5=(40 50 60 70)
+    UNPCKHPS2   xmm6, xmm7              ; xmm6=(41 51 61 71)
     movaps      xmm3, xmm4              ; transpose coefficients(phase 2)
-    unpcklps2   xmm4, xmm2              ; xmm4=(42 52 62 72)
-    unpckhps2   xmm3, xmm2              ; xmm3=(43 53 63 73)
+    UNPCKLPS2   xmm4, xmm2              ; xmm4=(42 52 62 72)
+    UNPCKHPS2   xmm3, xmm2              ; xmm3=(43 53 63 73)
 
     movaps      XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
     movaps      XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
@@ -334,7 +334,7 @@ EXTN(jsimd_idct_float_sse2):
     mov         edi, JSAMPARRAY [output_buf(eax)]  ; (JSAMPROW *)
     mov         eax, JDIMENSION [output_col(eax)]
     mov         ecx, DCTSIZE/4                     ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     ; -- Even part
@@ -464,7 +464,7 @@ EXTN(jsimd_idct_float_sse2):
     pshufd      xmm5, xmm6, 0x4E  ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
     pshufd      xmm3, xmm7, 0x4E  ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
 
-    pushpic     ebx                     ; save GOT address
+    PUSHPIC     ebx                     ; save GOT address
 
     mov         edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
     mov         ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
@@ -475,7 +475,7 @@ EXTN(jsimd_idct_float_sse2):
     movq        XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
     movq        XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3
 
-    poppic      ebx                     ; restore GOT address
+    POPPIC      ebx                     ; restore GOT address
 
     add         esi, byte 4*SIZEOF_FAST_FLOAT  ; wsptr
     add         edi, byte 4*SIZEOF_JSAMPROW
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-mmx.asm
index 24622d4369..19de457f78 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-mmx.asm
@@ -2,7 +2,7 @@
 ; jidctfst.asm - fast integer IDCT (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -56,7 +56,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS))       ; FIX(2.613125930) - FIX(1)
 %define PRE_MULTIPLY_SCALE_BITS  2
 %define CONST_SHIFT              (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_ifast_mmx)
 
 EXTN(jconst_idct_ifast_mmx):
@@ -67,7 +67,7 @@ PW_MF1613      times 4 dw -F_1_613 << CONST_SHIFT
 PW_F1082       times 4 dw  F_1_082 << CONST_SHIFT
 PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -109,7 +109,7 @@ EXTN(jsimd_idct_ifast_mmx):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input, store into work array.
 
@@ -118,7 +118,7 @@ EXTN(jsimd_idct_ifast_mmx):
     mov         esi, JCOEFPTR [coef_block(eax)]  ; inptr
     lea         edi, [workspace]                 ; JCOEF *wsptr
     mov         ecx, DCTSIZE/4                   ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
     mov         eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -163,7 +163,7 @@ EXTN(jsimd_idct_ifast_mmx):
     movq        MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
     movq        MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
     jmp         near .nextcolumn
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -326,7 +326,7 @@ EXTN(jsimd_idct_ifast_mmx):
     mov         edi, JSAMPARRAY [output_buf(eax)]  ; (JSAMPROW *)
     mov         eax, JDIMENSION [output_col(eax)]
     mov         ecx, DCTSIZE/4                     ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     ; -- Even part
@@ -464,7 +464,7 @@ EXTN(jsimd_idct_ifast_mmx):
     punpckldq   mm5, mm4                ; mm5=(20 21 22 23 24 25 26 27)
     punpckhdq   mm1, mm4                ; mm1=(30 31 32 33 34 35 36 37)
 
-    pushpic     ebx                     ; save GOT address
+    PUSHPIC     ebx                     ; save GOT address
 
     mov         edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
     mov         ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@@ -475,7 +475,7 @@ EXTN(jsimd_idct_ifast_mmx):
     movq        MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
     movq        MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
 
-    poppic      ebx                     ; restore GOT address
+    POPPIC      ebx                     ; restore GOT address
 
     add         esi, byte 4*SIZEOF_JCOEF     ; wsptr
     add         edi, byte 4*SIZEOF_JSAMPROW
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-sse2.asm
index 19704ffa48..966311eda7 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-sse2.asm
@@ -2,7 +2,7 @@
 ; jidctfst.asm - fast integer IDCT (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -56,7 +56,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS))       ; FIX(2.613125930) - FIX(1)
 %define PRE_MULTIPLY_SCALE_BITS  2
 %define CONST_SHIFT              (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_ifast_sse2)
 
 EXTN(jconst_idct_ifast_sse2):
@@ -67,7 +67,7 @@ PW_MF1613      times 8  dw -F_1_613 << CONST_SHIFT
 PW_F1082       times 8  dw  F_1_082 << CONST_SHIFT
 PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -101,13 +101,13 @@ EXTN(jsimd_idct_ifast_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input.
 
@@ -155,7 +155,7 @@ EXTN(jsimd_idct_ifast_sse2):
     movdqa      XMMWORD [wk(0)], xmm2   ; wk(0)=col1
     movdqa      XMMWORD [wk(1)], xmm0   ; wk(1)=col3
     jmp         near .column_end
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -490,7 +490,7 @@ EXTN(jsimd_idct_ifast_sse2):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-avx2.asm
index 199c7df3b6..dd4a3d5e8c 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-avx2.asm
@@ -2,7 +2,7 @@
 ; jidctint.asm - accurate integer IDCT (AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -65,7 +65,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %1-%4: Input/output registers
 ; %5-%8: Temp registers
 
-%macro dotranspose 8
+%macro DOTRANSPOSE 8
     ; %5=(00 10 20 30 40 50 60 70  01 11 21 31 41 51 61 71)
     ; %6=(03 13 23 33 43 53 63 73  02 12 22 32 42 52 62 72)
     ; %7=(04 14 24 34 44 54 64 74  05 15 25 35 45 55 65 75)
@@ -118,7 +118,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %5-%12: Temp registers
 ; %9:     Pass (1 or 2)
 
-%macro dodct 13
+%macro DODCT 13
     ; -- Even part
 
     ; (Original)
@@ -250,7 +250,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_islow_avx2)
 
 EXTN(jconst_idct_islow_avx2):
@@ -269,7 +269,7 @@ PB_CENTERJSAMP             times 32 db  CENTERJSAMPLE
 PW_1_NEG1                  times 8  dw  1
                            times 8  dw -1
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -303,13 +303,13 @@ EXTN(jsimd_idct_islow_avx2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns.
 
@@ -353,7 +353,7 @@ EXTN(jsimd_idct_islow_avx2):
     vpshufd     ymm3, ymm4, 0xFF        ; ymm3=col3_7=(03 03 03 03 03 03 03 03  07 07 07 07 07 07 07 07)
 
     jmp         near .column_end
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -371,10 +371,10 @@ EXTN(jsimd_idct_islow_avx2):
     vperm2i128  ymm2, ymm5, ymm7, 0x20  ; ymm2=in2_6
     vperm2i128  ymm3, ymm7, ymm6, 0x31  ; ymm3=in7_5
 
-    dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1
+    DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1
     ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6
 
-    dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
     ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7
 
 .column_end:
@@ -395,10 +395,10 @@ EXTN(jsimd_idct_islow_avx2):
     vperm2i128  ymm4, ymm3, ymm1, 0x31  ; ymm3=in7_5
     vperm2i128  ymm1, ymm3, ymm1, 0x20  ; ymm1=in3_1
 
-    dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2
+    DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2
     ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6
 
-    dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
     ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7
 
     vpacksswb   ymm0, ymm0, ymm1        ; ymm0=data01_45
@@ -442,7 +442,7 @@ EXTN(jsimd_idct_islow_avx2):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-mmx.asm
index f15c8d34bc..e2e1b3ff79 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-mmx.asm
@@ -2,7 +2,7 @@
 ; jidctint.asm - accurate integer IDCT (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, 2020, D. R. Commander.
+; Copyright (C) 2016, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -63,7 +63,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_islow_mmx)
 
 EXTN(jconst_idct_islow_mmx):
@@ -80,7 +80,7 @@ PD_DESCALE_P1  times 2 dd  1 << (DESCALE_P1 - 1)
 PD_DESCALE_P2  times 2 dd  1 << (DESCALE_P2 - 1)
 PB_CENTERJSAMP times 8 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -122,7 +122,7 @@ EXTN(jsimd_idct_islow_mmx):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input, store into work array.
 
@@ -131,7 +131,7 @@ EXTN(jsimd_idct_islow_mmx):
     mov         esi, JCOEFPTR [coef_block(eax)]  ; inptr
     lea         edi, [workspace]                 ; JCOEF *wsptr
     mov         ecx, DCTSIZE/4                   ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
     mov         eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -178,7 +178,7 @@ EXTN(jsimd_idct_islow_mmx):
     movq        MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
     movq        MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
     jmp         near .nextcolumn
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -513,7 +513,7 @@ EXTN(jsimd_idct_islow_mmx):
     mov         edi, JSAMPARRAY [output_buf(eax)]  ; (JSAMPROW *)
     mov         eax, JDIMENSION [output_col(eax)]
     mov         ecx, DCTSIZE/4                     ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .rowloop:
 
     ; -- Even part
@@ -816,7 +816,7 @@ EXTN(jsimd_idct_islow_mmx):
     punpckldq   mm7, mm5                ; mm7=(20 21 22 23 24 25 26 27)
     punpckhdq   mm4, mm5                ; mm4=(30 31 32 33 34 35 36 37)
 
-    pushpic     ebx                     ; save GOT address
+    PUSHPIC     ebx                     ; save GOT address
 
     mov         edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
     mov         ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@@ -827,7 +827,7 @@ EXTN(jsimd_idct_islow_mmx):
     movq        MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7
     movq        MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
 
-    poppic      ebx                     ; restore GOT address
+    POPPIC      ebx                     ; restore GOT address
 
     add         esi, byte 4*SIZEOF_JCOEF     ; wsptr
     add         edi, byte 4*SIZEOF_JSAMPROW
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-sse2.asm
index 43e320189b..42be940d72 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-sse2.asm
@@ -2,7 +2,7 @@
 ; jidctint.asm - accurate integer IDCT (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, 2020, D. R. Commander.
+; Copyright (C) 2016, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -63,7 +63,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_islow_sse2)
 
 EXTN(jconst_idct_islow_sse2):
@@ -80,7 +80,7 @@ PD_DESCALE_P1  times 4  dd  1 << (DESCALE_P1 - 1)
 PD_DESCALE_P2  times 4  dd  1 << (DESCALE_P2 - 1)
 PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -114,13 +114,13 @@ EXTN(jsimd_idct_islow_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input.
 
@@ -172,7 +172,7 @@ EXTN(jsimd_idct_islow_sse2):
     movdqa      XMMWORD [wk(10)], xmm3  ; wk(10)=col5
     movdqa      XMMWORD [wk(11)], xmm4  ; wk(11)=col7
     jmp         near .column_end
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -847,7 +847,7 @@ EXTN(jsimd_idct_islow_sse2):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-mmx.asm
index e2307e1cb6..920dad90bd 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-mmx.asm
@@ -2,7 +2,7 @@
 ; jidctred.asm - reduced-size IDCT (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -69,7 +69,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS)  ; FIX(3.624509785)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_red_mmx)
 
 EXTN(jconst_idct_red_mmx):
@@ -87,7 +87,7 @@ PD_DESCALE_P1_2 times 2 dd  1 << (DESCALE_P1_2 - 1)
 PD_DESCALE_P2_2 times 2 dd  1 << (DESCALE_P2_2 - 1)
 PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -124,13 +124,13 @@ EXTN(jsimd_idct_4x4_mmx):
     mov         [esp], eax
     mov         ebp, esp                    ; ebp = aligned ebp
     lea         esp, [workspace]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; need not be preserved
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input, store into work array.
 
@@ -139,7 +139,7 @@ EXTN(jsimd_idct_4x4_mmx):
     mov         esi, JCOEFPTR [coef_block(eax)]  ; inptr
     lea         edi, [workspace]                 ; JCOEF *wsptr
     mov         ecx, DCTSIZE/4                   ; ctr
-    alignx      16, 7
+    ALIGNX      16, 7
 .columnloop:
 %ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
     mov         eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -181,7 +181,7 @@ EXTN(jsimd_idct_4x4_mmx):
     movq        MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
     movq        MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
     jmp         near .nextcolumn
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -479,7 +479,7 @@ EXTN(jsimd_idct_4x4_mmx):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; need not be preserved
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
@@ -512,7 +512,7 @@ EXTN(jsimd_idct_2x2_mmx):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input.
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-sse2.asm
index 6e56494e97..9a6f9946e7 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-sse2.asm
@@ -2,7 +2,7 @@
 ; jidctred.asm - reduced-size IDCT (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -69,7 +69,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS)  ; FIX(3.624509785)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_red_sse2)
 
 EXTN(jconst_idct_red_sse2):
@@ -87,7 +87,7 @@ PD_DESCALE_P1_2 times 4  dd  1 << (DESCALE_P1_2 - 1)
 PD_DESCALE_P2_2 times 4  dd  1 << (DESCALE_P2_2 - 1)
 PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -122,13 +122,13 @@ EXTN(jsimd_idct_4x4_sse2):
     mov         [esp], eax
     mov         ebp, esp                     ; ebp = aligned ebp
     lea         esp, [wk(0)]
-    pushpic     ebx
+    PUSHPIC     ebx
 ;   push        ecx                     ; unused
 ;   push        edx                     ; need not be preserved
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input.
 
@@ -171,7 +171,7 @@ EXTN(jsimd_idct_4x4_sse2):
     pshufd      xmm3, xmm3, 0xFA  ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
 
     jmp         near .column_end
-    alignx      16, 7
+    ALIGNX      16, 7
 %endif
 .columnDCT:
 
@@ -400,7 +400,7 @@ EXTN(jsimd_idct_4x4_sse2):
     pop         esi
 ;   pop         edx                     ; need not be preserved
 ;   pop         ecx                     ; unused
-    poppic      ebx
+    POPPIC      ebx
     mov         esp, ebp                ; esp <- aligned ebp
     pop         esp                     ; esp <- original ebp
     pop         ebp
@@ -433,7 +433,7 @@ EXTN(jsimd_idct_2x2_sse2):
     push        esi
     push        edi
 
-    get_GOT     ebx                     ; get GOT address
+    GET_GOT     ebx                     ; get GOT address
 
     ; ---- Pass 1: process columns from input.
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jquant-3dn.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jquant-3dn.asm
index 5cb60caa94..6436bad1ec 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquant-3dn.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquant-3dn.asm
@@ -2,7 +2,7 @@
 ; jquant.asm - sample data conversion and quantization (3DNow! & MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_float_3dnow):
     mov         eax, JDIMENSION [start_col]
     mov         edi, POINTER [workspace]       ; (DCTELEM *)
     mov         ecx, DCTSIZE/2
-    alignx      16, 7
+    ALIGNX      16, 7
 .convloop:
     mov         ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
     mov         edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
@@ -154,7 +154,7 @@ EXTN(jsimd_quantize_float_3dnow):
     mov         edx, POINTER [divisors]
     mov         edi, JCOEFPTR [coef_block]
     mov         eax, DCTSIZE2/16
-    alignx      16, 7
+    ALIGNX      16, 7
 .quantloop:
     movq        mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
     movq        mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jquant-mmx.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jquant-mmx.asm
index 61305c625d..e525ba9e7b 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquant-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquant-mmx.asm
@@ -2,7 +2,7 @@
 ; jquant.asm - sample data conversion and quantization (MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_mmx):
     mov         eax, JDIMENSION [start_col]
     mov         edi, POINTER [workspace]       ; (DCTELEM *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .convloop:
     mov         ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
     mov         edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
@@ -157,10 +157,10 @@ EXTN(jsimd_quantize_mmx):
     mov         edx, POINTER [divisors]
     mov         edi, JCOEFPTR [coef_block]
     mov         ah, 2
-    alignx      16, 7
+    ALIGNX      16, 7
 .quantloop1:
     mov         al, DCTSIZE2/8/2
-    alignx      16, 7
+    ALIGNX      16, 7
 .quantloop2:
     movq        mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
     movq        mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)]
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jquant-sse.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jquant-sse.asm
index 218adc976f..1cf2cc0ce5 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquant-sse.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquant-sse.asm
@@ -2,7 +2,7 @@
 ; jquant.asm - sample data conversion and quantization (SSE & MMX)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_float_sse):
     mov         eax, JDIMENSION [start_col]
     mov         edi, POINTER [workspace]       ; (DCTELEM *)
     mov         ecx, DCTSIZE/2
-    alignx      16, 7
+    ALIGNX      16, 7
 .convloop:
     mov         ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
     mov         edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
@@ -150,7 +150,7 @@ EXTN(jsimd_quantize_float_sse):
     mov         edx, POINTER [divisors]
     mov         edi, JCOEFPTR [coef_block]
     mov         eax, DCTSIZE2/16
-    alignx      16, 7
+    ALIGNX      16, 7
 .quantloop:
     movaps      xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
     movaps      xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jquantf-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jquantf-sse2.asm
index a881ab50f9..66efd3eeca 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquantf-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquantf-sse2.asm
@@ -2,7 +2,7 @@
 ; jquantf.asm - sample data conversion and quantization (SSE & SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_float_sse2):
     mov         eax, JDIMENSION [start_col]
     mov         edi, POINTER [workspace]       ; (DCTELEM *)
     mov         ecx, DCTSIZE/2
-    alignx      16, 7
+    ALIGNX      16, 7
 .convloop:
     mov         ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
     mov         edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
@@ -127,7 +127,7 @@ EXTN(jsimd_quantize_float_sse2):
     mov         edx, POINTER [divisors]
     mov         edi, JCOEFPTR [coef_block]
     mov         eax, DCTSIZE2/16
-    alignx      16, 7
+    ALIGNX      16, 7
 .quantloop:
     movaps      xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
     movaps      xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jquanti-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/i386/jquanti-sse2.asm
index 0a509408aa..2a69af9c95 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquanti-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquanti-sse2.asm
@@ -2,7 +2,7 @@
 ; jquanti.asm - sample data conversion and quantization (SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_sse2):
     mov         eax, JDIMENSION [start_col]
     mov         edi, POINTER [workspace]       ; (DCTELEM *)
     mov         ecx, DCTSIZE/4
-    alignx      16, 7
+    ALIGNX      16, 7
 .convloop:
     mov         ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
     mov         edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]  ; (JSAMPLE *)
@@ -133,7 +133,7 @@ EXTN(jsimd_quantize_sse2):
     mov         edx, POINTER [divisors]
     mov         edi, JCOEFPTR [coef_block]
     mov         eax, DCTSIZE2/32
-    alignx      16, 7
+    ALIGNX      16, 7
 .quantloop:
     movdqa      xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
     movdqa      xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)]
diff --git a/3rdparty/libjpeg-turbo/src/simd/i386/jsimd.c b/3rdparty/libjpeg-turbo/src/simd/i386/jsimd.c
index 80bc821ff4..b429b0a532 100644
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jsimd.c
@@ -2,8 +2,8 @@
  * jsimd_i386.c
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2023, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -21,7 +21,6 @@
 #include "../../jdct.h"
 #include "../../jsimddct.h"
 #include "../jsimd.h"
-#include "jconfigint.h"
 
 /*
  * In the PIC cases, we have no guarantee that constants will keep
@@ -32,13 +31,11 @@
 #define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
 #define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
 
-static unsigned int simd_support = (unsigned int)(~0);
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
+static THREAD_LOCAL unsigned int simd_huffman = 1;
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 LOCAL(void)
 init_simd(void)
@@ -161,6 +158,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_extrgb_ycc_convert_avx2;
@@ -220,6 +220,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_extrgb_gray_convert_avx2;
@@ -279,6 +282,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
   void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_ycc_extrgb_convert_avx2;
@@ -382,6 +388,9 @@ GLOBAL(void)
 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
                       JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
                                compptr->v_samp_factor,
@@ -402,6 +411,9 @@ GLOBAL(void)
 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
                       JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
                                compptr->v_samp_factor,
@@ -464,6 +476,9 @@ GLOBAL(void)
 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
                              input_data, output_data_ptr);
@@ -479,6 +494,9 @@ GLOBAL(void)
 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
                              input_data, output_data_ptr);
@@ -540,6 +558,9 @@ GLOBAL(void)
 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
                                    compptr->downsampled_width, input_data,
@@ -558,6 +579,9 @@ GLOBAL(void)
 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
                                    compptr->downsampled_width, input_data,
@@ -626,6 +650,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
@@ -684,6 +711,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
@@ -788,6 +818,9 @@ GLOBAL(void)
 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
                DCTELEM *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_convsamp_avx2(sample_data, start_col, workspace);
   else if (simd_support & JSIMD_SSE2)
@@ -800,6 +833,9 @@ GLOBAL(void)
 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
                      FAST_FLOAT *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_SSE2)
     jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
   else if (simd_support & JSIMD_SSE)
@@ -870,6 +906,9 @@ jsimd_can_fdct_float(void)
 GLOBAL(void)
 jsimd_fdct_islow(DCTELEM *data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_fdct_islow_avx2(data);
   else if (simd_support & JSIMD_SSE2)
@@ -881,6 +920,9 @@ jsimd_fdct_islow(DCTELEM *data)
 GLOBAL(void)
 jsimd_fdct_ifast(DCTELEM *data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
     jsimd_fdct_ifast_sse2(data);
   else
@@ -890,6 +932,9 @@ jsimd_fdct_ifast(DCTELEM *data)
 GLOBAL(void)
 jsimd_fdct_float(FAST_FLOAT *data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
     jsimd_fdct_float_sse(data);
   else if (simd_support & JSIMD_3DNOW)
@@ -945,6 +990,9 @@ jsimd_can_quantize_float(void)
 GLOBAL(void)
 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_quantize_avx2(coef_block, divisors, workspace);
   else if (simd_support & JSIMD_SSE2)
@@ -957,6 +1005,9 @@ GLOBAL(void)
 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
                      FAST_FLOAT *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_SSE2)
     jsimd_quantize_float_sse2(coef_block, divisors, workspace);
   else if (simd_support & JSIMD_SSE)
@@ -1020,6 +1071,9 @@ jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                JCOEFPTR coef_block, JSAMPARRAY output_buf,
                JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
     jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
                         output_col);
@@ -1032,6 +1086,9 @@ jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                JCOEFPTR coef_block, JSAMPARRAY output_buf,
                JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
     jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
                         output_col);
@@ -1126,6 +1183,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
                  JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
                           output_col);
@@ -1142,6 +1202,9 @@ jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
                  JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
     jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
                           output_col);
@@ -1155,6 +1218,9 @@ jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
                  JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
     jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
                           output_col);
@@ -1212,7 +1278,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
   jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
                                          Sl, Al, values, zerobits);
@@ -1238,7 +1304,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
                                                  jpeg_natural_order_start,
diff --git a/3rdparty/libjpeg-turbo/src/simd/jsimd.h b/3rdparty/libjpeg-turbo/src/simd/jsimd.h
index 64747c6360..a28754adb9 100644
--- a/3rdparty/libjpeg-turbo/src/simd/jsimd.h
+++ b/3rdparty/libjpeg-turbo/src/simd/jsimd.h
@@ -2,10 +2,10 @@
  * simd/jsimd.h
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2011, 2014-2016, 2018, 2020, D. R. Commander.
+ * Copyright (C) 2011, 2014-2016, 2018, 2020, 2022, D. R. Commander.
  * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
  * Copyright (C) 2014, Linaro Limited.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
  * Copyright (C) 2020, Arm Limited.
  *
@@ -1243,16 +1243,16 @@ EXTERN(JOCTET *) jsimd_huff_encode_one_block_neon_slowtbl
 /* Progressive Huffman encoding */
 EXTERN(void) jsimd_encode_mcu_AC_first_prepare_sse2
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits);
+   UJCOEF *values, size_t *zerobits);
 
 EXTERN(void) jsimd_encode_mcu_AC_first_prepare_neon
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *values, size_t *zerobits);
+   UJCOEF *values, size_t *zerobits);
 
 EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_sse2
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits);
+   UJCOEF *absvalues, size_t *bits);
 
 EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_neon
   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
-   JCOEF *absvalues, size_t *bits);
+   UJCOEF *absvalues, size_t *bits);
diff --git a/3rdparty/libjpeg-turbo/src/simd/mips/jsimd.c b/3rdparty/libjpeg-turbo/src/simd/mips/jsimd.c
index d2546eed32..c6e789aa2f 100644
--- a/3rdparty/libjpeg-turbo/src/simd/mips/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips/jsimd.c
@@ -2,9 +2,9 @@
  * jsimd_mips.c
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2011, 2014, 2016, 2018, 2020, D. R. Commander.
+ * Copyright (C) 2009-2011, 2014, 2016, 2018, 2020, 2022, D. R. Commander.
  * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -23,11 +23,9 @@
 #include "../../jsimddct.h"
 #include "../jsimd.h"
 
-#include <stdio.h>
-#include <string.h>
 #include <ctype.h>
 
-static unsigned int simd_support = ~0;
+static THREAD_LOCAL unsigned int simd_support = ~0;
 
 #if !(defined(__mips_dsp) && (__mips_dsp_rev >= 2)) && defined(__linux__)
 
@@ -57,8 +55,6 @@ parse_proc_cpuinfo(const char *search_string)
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 LOCAL(void)
 init_simd(void)
@@ -1128,7 +1124,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
 }
 
@@ -1141,7 +1137,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return 0;
 }
diff --git a/3rdparty/libjpeg-turbo/src/simd/mips64/jsimd.c b/3rdparty/libjpeg-turbo/src/simd/mips64/jsimd.c
index e8f1af562b..917440b43b 100644
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jsimd.c
@@ -2,9 +2,9 @@
  * jsimd_mips64.c
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander.
+ * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
  * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
- * Copyright (C) 2015, 2018, Matthieu Darbois.
+ * Copyright (C) 2015, 2018, 2022, Matthieu Darbois.
  * Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
@@ -24,11 +24,9 @@
 #include "../../jsimddct.h"
 #include "../jsimd.h"
 
-#include <stdio.h>
-#include <string.h>
 #include <ctype.h>
 
-static unsigned int simd_support = ~0;
+static THREAD_LOCAL unsigned int simd_support = ~0;
 
 #if defined(__linux__)
 
@@ -96,8 +94,6 @@ parse_proc_cpuinfo(int bufsize)
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 LOCAL(void)
 init_simd(void)
@@ -851,7 +847,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
 }
 
@@ -864,7 +860,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return 0;
 }
diff --git a/3rdparty/libjpeg-turbo/src/simd/nasm/jsimdext.inc b/3rdparty/libjpeg-turbo/src/simd/nasm/jsimdext.inc
index e8d50b0349..b5341ed275 100644
--- a/3rdparty/libjpeg-turbo/src/simd/nasm/jsimdext.inc
+++ b/3rdparty/libjpeg-turbo/src/simd/nasm/jsimdext.inc
@@ -2,9 +2,10 @@
 ; jsimdext.inc - common declarations
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2010, 2016, 2018-2019, D. R. Commander.
+; Copyright (C) 2010, 2016, 2018-2019, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthieu Darbois.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
 ;
@@ -75,6 +76,14 @@
 ; mark stack as non-executable
 section .note.GNU-stack noalloc noexec nowrite progbits
 
+%ifdef __CET__
+%ifdef __x86_64__
+section .note.gnu.property note alloc noexec align=8
+    dd 0x00000004, 0x00000010, 0x00000005, 0x00554e47
+    dd 0xc0000002, 0x00000004, 0x00000003, 0x00000000
+%endif
+%endif
+
 ; -- segment definition --
 ;
 %ifdef __x86_64__
@@ -271,7 +280,7 @@ const_base:
 
 %define GOTOFF(got, sym)  (got) + (sym) - const_base
 
-%imacro get_GOT 1
+%imacro GET_GOT 1
     ; NOTE: this macro destroys ecx resister.
     call        %%geteip
     add         ecx, byte (%%ref - $)
@@ -303,7 +312,7 @@ const_base:
 
 %define GOTOFF(got, sym)  (got) + (sym) wrt ..gotoff
 
-%imacro get_GOT 1
+%imacro GET_GOT 1
     extern      GOT_SYMBOL
     call        %%geteip
     add         %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
@@ -316,13 +325,13 @@ const_base:
 
 %endif    ; GOT_SYMBOL == _MACHO_PIC_ ----------------
 
-%imacro pushpic 1.nolist
+%imacro PUSHPIC 1.nolist
     push        %1
 %endmacro
-%imacro poppic  1.nolist
+%imacro POPPIC  1.nolist
     pop         %1
 %endmacro
-%imacro movpic  2.nolist
+%imacro MOVPIC  2.nolist
     mov         %1, %2
 %endmacro
 
@@ -330,13 +339,13 @@ const_base:
 
 %define GOTOFF(got, sym)  (sym)
 
-%imacro get_GOT 1.nolist
+%imacro GET_GOT 1.nolist
 %endmacro
-%imacro pushpic 1.nolist
+%imacro PUSHPIC 1.nolist
 %endmacro
-%imacro poppic  1.nolist
+%imacro POPPIC  1.nolist
 %endmacro
-%imacro movpic  2.nolist
+%imacro MOVPIC  2.nolist
 %endmacro
 
 %endif   ;  PIC -----------------------------------------
@@ -348,7 +357,7 @@ const_base:
 %define MSKLE(x, y)  (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
 %define FILLB(b, n)  (($$-(b)) & ((n)-1))
 
-%imacro alignx 1-2.nolist 0xFFFF
+%imacro ALIGNX 1-2.nolist 0xFFFF
 %%bs: \
   times MSKLE(FILLB(%%bs, %1), %2) & MSKLE(16, FILLB($, %1)) & FILLB($, %1) \
         db 0x90                                      ; nop
@@ -370,7 +379,7 @@ const_base:
 
 ; Align the next data on {2,4,8,16,..}-byte boundary.
 ;
-%imacro alignz 1.nolist
+%imacro ALIGNZ 1.nolist
     align       %1, db 0                ; filling zeros
 %endmacro
 
@@ -378,7 +387,7 @@ const_base:
 
 %ifdef WIN64
 
-%imacro collect_args 1
+%imacro COLLECT_ARGS 1
     sub         rsp, SIZEOF_XMMWORD
     movaps      XMMWORD [rsp], xmm6
     sub         rsp, SIZEOF_XMMWORD
@@ -397,17 +406,17 @@ const_base:
 %endif
 %if %1 > 4
     push        r14
-    mov         r14, [rax+48]
+    mov         r14, [rbp+48]
 %endif
 %if %1 > 5
     push        r15
-    mov         r15, [rax+56]
+    mov         r15, [rbp+56]
 %endif
     push        rsi
     push        rdi
 %endmacro
 
-%imacro uncollect_args 1
+%imacro UNCOLLECT_ARGS 1
     pop         rdi
     pop         rsi
 %if %1 > 5
@@ -428,7 +437,7 @@ const_base:
     add         rsp, SIZEOF_XMMWORD
 %endmacro
 
-%imacro push_xmm 1
+%imacro PUSH_XMM 1
     sub         rsp, %1 * SIZEOF_XMMWORD
     movaps      XMMWORD [rsp+0*SIZEOF_XMMWORD], xmm8
 %if %1 > 1
@@ -442,7 +451,7 @@ const_base:
 %endif
 %endmacro
 
-%imacro pop_xmm 1
+%imacro POP_XMM 1
     movaps      xmm8, XMMWORD [rsp+0*SIZEOF_XMMWORD]
 %if %1 > 1
     movaps      xmm9, XMMWORD [rsp+1*SIZEOF_XMMWORD]
@@ -458,7 +467,7 @@ const_base:
 
 %else
 
-%imacro collect_args 1
+%imacro COLLECT_ARGS 1
     push        r10
     mov         r10, rdi
 %if %1 > 1
@@ -483,7 +492,7 @@ const_base:
 %endif
 %endmacro
 
-%imacro uncollect_args 1
+%imacro UNCOLLECT_ARGS 1
 %if %1 > 5
     pop         r15
 %endif
@@ -502,16 +511,29 @@ const_base:
     pop         r10
 %endmacro
 
-%imacro push_xmm 1
+%imacro PUSH_XMM 1
 %endmacro
 
-%imacro pop_xmm 1
+%imacro POP_XMM 1
 %endmacro
 
 %endif
 
 %endif
 
+%ifdef __CET__
+
+%imacro ENDBR64 0
+    dd 0xfa1e0ff3
+%endmacro
+
+%else
+
+%imacro ENDBR64 0
+%endmacro
+
+%endif
+
 ; --------------------------------------------------------------------------
 ;  Defines picked up from the C headers
 ;
diff --git a/3rdparty/libjpeg-turbo/src/simd/powerpc/jsimd.c b/3rdparty/libjpeg-turbo/src/simd/powerpc/jsimd.c
index b9e86dcfac..461f603633 100644
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jsimd.c
@@ -2,8 +2,8 @@
  * jsimd_powerpc.c
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2011, 2014-2016, 2018, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2014-2016, 2018, 2022, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -27,11 +27,12 @@
 #include "../../jsimddct.h"
 #include "../jsimd.h"
 
-#include <stdio.h>
-#include <string.h>
 #include <ctype.h>
 
-#if defined(__OpenBSD__)
+#if defined(__APPLE__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#elif defined(__OpenBSD__)
 #include <sys/param.h>
 #include <sys/sysctl.h>
 #include <machine/cpu.h>
@@ -40,7 +41,7 @@
 #include <sys/auxv.h>
 #endif
 
-static unsigned int simd_support = ~0;
+static THREAD_LOCAL unsigned int simd_support = ~0;
 
 #if !defined(__ALTIVEC__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
 
@@ -108,8 +109,6 @@ parse_proc_cpuinfo(int bufsize)
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 LOCAL(void)
 init_simd(void)
@@ -121,6 +120,10 @@ init_simd(void)
   int bufsize = 1024; /* an initial guess for the line buffer size limit */
 #elif defined(__amigaos4__)
   uint32 altivec = 0;
+#elif defined(__APPLE__)
+  int mib[2] = { CTL_HW, HW_VECTORUNIT };
+  int altivec;
+  size_t len = sizeof(altivec);
 #elif defined(__OpenBSD__)
   int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
   int altivec;
@@ -134,7 +137,7 @@ init_simd(void)
 
   simd_support = 0;
 
-#if defined(__ALTIVEC__) || defined(__APPLE__)
+#if defined(__ALTIVEC__)
   simd_support |= JSIMD_ALTIVEC;
 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
   while (!parse_proc_cpuinfo(bufsize)) {
@@ -146,7 +149,7 @@ init_simd(void)
   IExec->GetCPUInfoTags(GCIT_VectorUnit, &altivec, TAG_DONE);
   if (altivec == VECTORTYPE_ALTIVEC)
     simd_support |= JSIMD_ALTIVEC;
-#elif defined(__OpenBSD__)
+#elif defined(__APPLE__) || defined(__OpenBSD__)
   if (sysctl(mib, 2, &altivec, &len, NULL, 0) == 0 && altivec != 0)
     simd_support |= JSIMD_ALTIVEC;
 #elif defined(__FreeBSD__)
@@ -862,7 +865,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
 }
 
@@ -875,7 +878,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return 0;
 }
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolext-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolext-avx2.asm
index ffb527db00..39e6f207ca 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolext-avx2.asm
@@ -1,9 +1,10 @@
 ;
 ; jccolext.asm - colorspace conversion (64-bit AVX2)
 ;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -33,21 +34,22 @@
 ; r13d = JDIMENSION output_row
 ; r14d = int num_rows
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
 %define WK_NUM  8
 
     align       32
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2)
 
 EXTN(jsimd_rgb_ycc_convert_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_YMMWORD)  ; align to 256 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 5
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, (SIZEOF_YMMWORD * WK_NUM)
+    COLLECT_ARGS 5
     push        rbx
 
     mov         ecx, r10d
@@ -548,9 +550,9 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
 .return:
     pop         rbx
     vzeroupper
-    uncollect_args 5
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 5
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolext-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolext-sse2.asm
index af70ed6010..2073988d33 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolext-sse2.asm
@@ -1,8 +1,9 @@
 ;
 ; jccolext.asm - colorspace conversion (64-bit SSE2)
 ;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -32,21 +33,22 @@
 ; r13d = JDIMENSION output_row
 ; r14d = int num_rows
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  8
 
     align       32
     GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2)
 
 EXTN(jsimd_rgb_ycc_convert_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 5
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 5
     push        rbx
 
     mov         ecx, r10d
@@ -473,9 +475,9 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
 
 .return:
     pop         rbx
-    uncollect_args 5
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 5
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolor-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolor-avx2.asm
index 16b78298dc..1f069caad9 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolor-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolor-avx2.asm
@@ -1,7 +1,7 @@
 ;
 ; jccolor.asm - colorspace conversion (64-bit AVX2)
 ;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -33,7 +33,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_ycc_convert_avx2)
 
 EXTN(jconst_rgb_ycc_convert_avx2):
@@ -46,7 +46,7 @@ PD_ONEHALFM1_CJ times 8 dd  (1 << (SCALEBITS - 1)) - 1 + \
                             (CENTERJSAMPLE << SCALEBITS)
 PD_ONEHALF      times 8 dd  (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolor-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolor-sse2.asm
index e2955c2134..c0c1526d8c 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolor-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolor-sse2.asm
@@ -1,7 +1,7 @@
 ;
 ; jccolor.asm - colorspace conversion (64-bit SSE2)
 ;
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -32,7 +32,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_ycc_convert_sse2)
 
 EXTN(jconst_rgb_ycc_convert_sse2):
@@ -45,7 +45,7 @@ PD_ONEHALFM1_CJ times 4 dd  (1 << (SCALEBITS - 1)) - 1 + \
                             (CENTERJSAMPLE << SCALEBITS)
 PD_ONEHALF      times 4 dd  (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgray-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgray-avx2.asm
index 591255bb11..354683ca42 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgray-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgray-avx2.asm
@@ -1,7 +1,7 @@
 ;
 ; jcgray.asm - grayscale colorspace conversion (64-bit AVX2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -29,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_gray_convert_avx2)
 
 EXTN(jconst_rgb_gray_convert_avx2):
@@ -38,7 +38,7 @@ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
 PW_F0114_F0250 times 8 dw F_0_114, F_0_250
 PD_ONEHALF     times 8 dd (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgray-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgray-sse2.asm
index e389904f2f..d27c4b9a82 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgray-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgray-sse2.asm
@@ -1,7 +1,7 @@
 ;
 ; jcgray.asm - grayscale colorspace conversion (64-bit SSE2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -28,7 +28,7 @@ F_0_337 equ (F_0_587 - F_0_250)  ; FIX(0.58700) - FIX(0.25000)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_rgb_gray_convert_sse2)
 
 EXTN(jconst_rgb_gray_convert_sse2):
@@ -37,7 +37,7 @@ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
 PW_F0114_F0250 times 4 dw F_0_114, F_0_250
 PD_ONEHALF     times 4 dd (1 << (SCALEBITS - 1))
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgryext-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgryext-avx2.asm
index ddcc2c0a2f..d2ae6d63a4 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgryext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgryext-avx2.asm
@@ -1,9 +1,10 @@
 ;
 ; jcgryext.asm - grayscale colorspace conversion (64-bit AVX2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -33,21 +34,22 @@
 ; r13d = JDIMENSION output_row
 ; r14d = int num_rows
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
 %define WK_NUM  2
 
     align       32
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)
 
 EXTN(jsimd_rgb_gray_convert_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_YMMWORD)  ; align to 256 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 5
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_YMMWORD * WK_NUM)
+    COLLECT_ARGS 5
     push        rbx
 
     mov         ecx, r10d
@@ -427,9 +429,9 @@ EXTN(jsimd_rgb_gray_convert_avx2):
 .return:
     pop         rbx
     vzeroupper
-    uncollect_args 5
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 5
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgryext-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgryext-sse2.asm
index f1d399a63b..3c2834e964 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgryext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgryext-sse2.asm
@@ -1,8 +1,9 @@
 ;
 ; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2)
 ;
-; Copyright (C) 2011, 2016, D. R. Commander.
+; Copyright (C) 2011, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -32,21 +33,22 @@
 ; r13d = JDIMENSION output_row
 ; r14d = int num_rows
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  2
 
     align       32
     GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2)
 
 EXTN(jsimd_rgb_gray_convert_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 5
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 5
     push        rbx
 
     mov         ecx, r10d
@@ -352,9 +354,9 @@ EXTN(jsimd_rgb_gray_convert_sse2):
 
 .return:
     pop         rbx
-    uncollect_args 5
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 5
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jchuff-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jchuff-sse2.asm
index 9ea6df946e..39aa24650c 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jchuff-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jchuff-sse2.asm
@@ -1,9 +1,10 @@
 ;
 ; jchuff-sse2.asm - Huffman entropy encoding (64-bit SSE2)
 ;
-; Copyright (C) 2009-2011, 2014-2016, 2019, 2021, D. R. Commander.
+; Copyright (C) 2009-2011, 2014-2016, 2019, 2021, 2023-2024, D. R. Commander.
 ; Copyright (C) 2015, Matthieu Darbois.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -38,7 +39,7 @@ endstruc
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_huff_encode_one_block)
 
 EXTN(jconst_huff_encode_one_block):
@@ -48,7 +49,7 @@ jpeg_mask_bits dd 0x0000, 0x0001, 0x0003, 0x0007
                dd 0x00ff, 0x01ff, 0x03ff, 0x07ff
                dd 0x0fff, 0x1fff, 0x3fff, 0x7fff
 
-    alignz      32
+    ALIGNZ      32
 
 times 1 << 14 db 15
 times 1 << 13 db 14
@@ -66,7 +67,8 @@ times 1 <<  2 db  3
 times 1 <<  1 db  2
 times 1 <<  0 db  1
 times 1       db  0
-jpeg_nbits_table:
+GLOBAL_DATA(jpeg_nbits_table)
+EXTN(jpeg_nbits_table):
 times 1       db  0
 times 1 <<  0 db  1
 times 1 <<  1 db  2
@@ -85,10 +87,10 @@ times 1 << 13 db 14
 times 1 << 14 db 15
 times 1 << 15 db 16
 
-    alignz      32
+    ALIGNZ      32
 
 %define NBITS(x)      nbits_base + x
-%define MASK_BITS(x)  NBITS((x) * 4) + (jpeg_mask_bits - jpeg_nbits_table)
+%define MASK_BITS(x)  NBITS((x) * 4) + (jpeg_mask_bits - EXTN(jpeg_nbits_table))
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -208,15 +210,15 @@ times 1 << 15 db 16
 ; rax - buffer
 ; rbx - temp
 ; rcx - nbits
-; rdx - block --> free_bits
+; rdx - code
 ; rsi - nbits_base
 ; rdi - t
-; rbp - code
 ; r8  - dctbl --> code_temp
 ; r9  - actbl
 ; r10 - state
 ; r11 - index
 ; r12 - put_buffer
+; r15 - block --> free_bits
 
 %define buffer       rax
 %ifdef WIN64
@@ -231,12 +233,11 @@ times 1 << 15 db 16
 %define nbitsq       rcx
 %define nbits        ecx
 %define nbitsb       cl
-%define block        rdx
+%define codeq        rdx
+%define code         edx
 %define nbits_base   rsi
 %define t            rdi
 %define td           edi
-%define codeq        rbp
-%define code         ebp
 %define dctbl        r8
 %define actbl        r9
 %define state        r10
@@ -244,6 +245,7 @@ times 1 << 15 db 16
 %define indexd       r11d
 %define put_buffer   r12
 %define put_bufferd  r12d
+%define block        r15
 
 ; Step 1: Re-arrange input data according to jpeg_natural_order
 ; xx 01 02 03 04 05 06 07      xx 01 08 16 09 02 03 10
@@ -259,6 +261,9 @@ times 1 << 15 db 16
     GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)
 
 EXTN(jsimd_huff_encode_one_block_sse2):
+    ENDBR64
+    push        rbp
+    mov         rbp, rsp
 
 %ifdef WIN64
 
@@ -266,15 +271,15 @@ EXTN(jsimd_huff_encode_one_block_sse2):
 ; rdx = JOCTET *buffer
 ; r8 = JCOEFPTR block
 ; r9 = int last_dc_val
-; [rax+48] = c_derived_tbl *dctbl
-; [rax+56] = c_derived_tbl *actbl
+; [rbp+48] = c_derived_tbl *dctbl
+; [rbp+56] = c_derived_tbl *actbl
 
                                                           ;X: X = code stream
     mov         buffer, rdx
+    push        r15
     mov         block, r8
     movups      xmm3, XMMWORD [block + 0 * SIZEOF_WORD]   ;D: w3 = xx 01 02 03 04 05 06 07
     push        rbx
-    push        rbp
     movdqa      xmm0, xmm3                                ;A: w0 = xx 01 02 03 04 05 06 07
     push        rsi
     push        rdi
@@ -284,12 +289,10 @@ EXTN(jsimd_huff_encode_one_block_sse2):
     movsx       code, word [block]                        ;Z:     code = block[0];
     pxor        xmm4, xmm4                                ;A: w4[i] = 0;
     sub         code, r9d                                 ;Z:     code -= last_dc_val;
-    mov         dctbl, POINTER [rsp+6*8+4*8]
-    mov         actbl, POINTER [rsp+6*8+5*8]
+    mov         dctbl, POINTER [rbp+48]
+    mov         actbl, POINTER [rbp+56]
     punpckldq   xmm0, xmm1                                ;A: w0 = xx 01 08 09 02 03 10 11
-    lea         nbits_base, [rel jpeg_nbits_table]
-    add         rsp, -DCTSIZE2 * SIZEOF_WORD
-    mov         t, rsp
+    lea         nbits_base, [rel EXTN(jpeg_nbits_table)]
 
 %else
 
@@ -301,23 +304,27 @@ EXTN(jsimd_huff_encode_one_block_sse2):
 ; r9 = c_derived_tbl *actbl
 
                                                           ;X: X = code stream
+    push        r15
+    mov         block, rdx
     movups      xmm3, XMMWORD [block + 0 * SIZEOF_WORD]   ;D: w3 = xx 01 02 03 04 05 06 07
     push        rbx
-    push        rbp
     movdqa      xmm0, xmm3                                ;A: w0 = xx 01 02 03 04 05 06 07
     push        r12
     mov         state, rdi
     mov         buffer, rsi
     movups      xmm1, XMMWORD [block + 8 * SIZEOF_WORD]   ;B: w1 = 08 09 10 11 12 13 14 15
     movsx       codeq, word [block]                       ;Z:     code = block[0];
-    lea         nbits_base, [rel jpeg_nbits_table]
+    lea         nbits_base, [rel EXTN(jpeg_nbits_table)]
     pxor        xmm4, xmm4                                ;A: w4[i] = 0;
     sub         codeq, rcx                                ;Z:     code -= last_dc_val;
     punpckldq   xmm0, xmm1                                ;A: w0 = xx 01 08 09 02 03 10 11
-    lea         t, [rsp - DCTSIZE2 * SIZEOF_WORD]         ;   use red zone for t_
 
 %endif
 
+    ; Allocate stack space for t array, and realign stack.
+    add         rsp, -DCTSIZE2 * SIZEOF_WORD - 8
+    mov         t, rsp
+
     pshuflw     xmm0, xmm0, 11001001b                     ;A: w0 = 01 08 xx 09 02 03 10 11
     pinsrw      xmm0, word [block + 16 * SIZEOF_WORD], 2  ;A: w0 = 01 08 16 09 02 03 10 11
     punpckhdq   xmm3, xmm1                                ;D: w3 = 04 05 12 13 06 07 14 15
@@ -443,9 +450,9 @@ EXTN(jsimd_huff_encode_one_block_sse2):
     pinsrw      xmm5, word [block + 29 * SIZEOF_WORD], 7  ;E: w5 = 42 49 56 57 50 43 36 29
                                                           ;        (Row 4, offset 1)
 %undef block
-%define free_bitsq  rdx
-%define free_bitsd  edx
-%define free_bitsb  dl
+%define free_bitsq  r15
+%define free_bitsd  r15d
+%define free_bitsb  r15b
     pcmpeqw     xmm1, xmm0                                ;F: w1[i] = (w1[i] == 0 ? -1 : 0);
     shl         tempq, 48                                 ;Z:     temp <<= 48;
     pxor        xmm2, xmm2                                ;E: w2[i] = 0;
@@ -534,12 +541,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
     test        index, index
     jnz         .BLOOP                                    ;   } while (index != 0);
 .ELOOP:                                                   ; }  /* index != 0 */
-    sub         td, esp                                   ; t -= (WIN64: &t_[0], UNIX: &t_[64]);
-%ifdef WIN64
+    sub         td, esp                                   ; t -= &t_[0];
     cmp         td, (DCTSIZE2 - 2) * SIZEOF_WORD          ; if (t != 62)
-%else
-    cmp         td, -2 * SIZEOF_WORD                      ; if (t != -2)
-%endif
     je          .EFN                                      ; {
     movzx       nbits, byte [actbl + c_derived_tbl.ehufsi + 0]
                                                           ;   nbits = actbl->ehufsi[0];
@@ -556,18 +559,17 @@ EXTN(jsimd_huff_encode_one_block_sse2):
                                                           ; state->cur.put_buffer.simd = put_buffer;
     mov         byte [state + working_state.cur.free_bits], free_bitsb
                                                           ; state->cur.free_bits = free_bits;
-%ifdef WIN64
-    sub         rsp, -DCTSIZE2 * SIZEOF_WORD
+    sub         rsp, -DCTSIZE2 * SIZEOF_WORD - 8
     pop         r12
+%ifdef WIN64
     pop         rdi
     pop         rsi
-    pop         rbp
     pop         rbx
 %else
-    pop         r12
-    pop         rbp
     pop         rbx
 %endif
+    pop         r15
+    pop         rbp
     ret
 
 ; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcphuff-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcphuff-sse2.asm
index 01b5c0235f..0e2740462e 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcphuff-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcphuff-sse2.asm
@@ -3,6 +3,8 @@
 ; (64-bit SSE2)
 ;
 ; Copyright (C) 2016, 2018, Matthieu Darbois
+; Copyright (C) 2023, Aliaksiej Kandracienka.
+; Copyright (C) 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -281,16 +283,13 @@
     GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)
 
 EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [rbp - 16]
-    collect_args 6
-
-    movdqa      XMMWORD [rbp - 16], ZERO
+    sub         rsp, SIZEOF_XMMWORD
+    movdqa      XMMWORD [rsp], ZERO
+    COLLECT_ARGS 6
 
     movd        AL, r13d
     pxor        ZERO, ZERO
@@ -384,10 +383,9 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
 
     REDUCE0
 
-    movdqa      ZERO, XMMWORD [rbp - 16]
-    uncollect_args 6
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 6
+    movdqa      ZERO, XMMWORD [rsp]
+    mov         rsp, rbp
     pop         rbp
     ret
 
@@ -449,16 +447,13 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
     GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)
 
 EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [rbp - 16]
-    collect_args 6
-
-    movdqa      XMMWORD [rbp - 16], ZERO
+    sub         rsp, SIZEOF_XMMWORD
+    movdqa      XMMWORD [rsp], ZERO
+    COLLECT_ARGS 6
 
     xor         SIGN, SIGN
     xor         EOB, EOB
@@ -606,10 +601,9 @@ EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
     REDUCE0
 
     mov         eax, EOB
-    movdqa      ZERO, XMMWORD [rbp - 16]
-    uncollect_args 6
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 6
+    movdqa      ZERO, XMMWORD [rsp]
+    mov         rsp, rbp
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcsample-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcsample-avx2.asm
index b32527aebe..fede6b38b4 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcsample-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcsample-avx2.asm
@@ -2,7 +2,7 @@
 ; jcsample.asm - downsampling (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
@@ -44,10 +44,10 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)
 
 EXTN(jsimd_h2v1_downsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 6
+    COLLECT_ARGS 6
 
     mov         ecx, r13d
     shl         rcx, 3                  ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -178,7 +178,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
 
 .return:
     vzeroupper
-    uncollect_args 6
+    UNCOLLECT_ARGS 6
     pop         rbp
     ret
 
@@ -206,10 +206,10 @@ EXTN(jsimd_h2v1_downsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)
 
 EXTN(jsimd_h2v2_downsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 6
+    COLLECT_ARGS 6
 
     mov         ecx, r13d
     shl         rcx, 3                  ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -358,7 +358,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
 
 .return:
     vzeroupper
-    uncollect_args 6
+    UNCOLLECT_ARGS 6
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcsample-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcsample-sse2.asm
index 2fcfe4567a..0a0ee65e5a 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcsample-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcsample-sse2.asm
@@ -2,7 +2,7 @@
 ; jcsample.asm - downsampling (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -43,10 +43,10 @@
     GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)
 
 EXTN(jsimd_h2v1_downsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 6
+    COLLECT_ARGS 6
 
     mov         ecx, r13d
     shl         rcx, 3                  ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -160,7 +160,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
     jg          near .rowloop
 
 .return:
-    uncollect_args 6
+    UNCOLLECT_ARGS 6
     pop         rbp
     ret
 
@@ -188,10 +188,10 @@ EXTN(jsimd_h2v1_downsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)
 
 EXTN(jsimd_h2v2_downsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 6
+    COLLECT_ARGS 6
 
     mov         ecx, r13d
     shl         rcx, 3                  ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -321,7 +321,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
     jg          near .rowloop
 
 .return:
-    uncollect_args 6
+    UNCOLLECT_ARGS 6
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolext-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolext-avx2.asm
index 2370fda642..a8384cb560 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolext-avx2.asm
@@ -2,9 +2,10 @@
 ; jdcolext.asm - colorspace conversion (64-bit AVX2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -34,21 +35,22 @@
 ; r13 = JSAMPARRAY output_buf
 ; r14d = int num_rows
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
 %define WK_NUM  2
 
     align       32
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)
 
 EXTN(jsimd_ycc_rgb_convert_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_YMMWORD)  ; align to 256 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 5
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (WK_NUM * SIZEOF_YMMWORD)
+    COLLECT_ARGS 5
     push        rbx
 
     mov         ecx, r10d               ; num_cols
@@ -485,9 +487,9 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
 .return:
     pop         rbx
     vzeroupper
-    uncollect_args 5
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 5
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolext-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolext-sse2.asm
index e07c8d7518..bfb59abf12 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolext-sse2.asm
@@ -2,8 +2,9 @@
 ; jdcolext.asm - colorspace conversion (64-bit SSE2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -33,21 +34,22 @@
 ; r13 = JSAMPARRAY output_buf
 ; r14d = int num_rows
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  2
 
     align       32
     GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2)
 
 EXTN(jsimd_ycc_rgb_convert_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 5
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 5
     push        rbx
 
     mov         ecx, r10d               ; num_cols
@@ -428,9 +430,9 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
 
 .return:
     pop         rbx
-    uncollect_args 5
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 5
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolor-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolor-avx2.asm
index 43de9db04d..4d52a0f16a 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolor-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolor-avx2.asm
@@ -2,7 +2,7 @@
 ; jdcolor.asm - colorspace conversion (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -32,7 +32,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_ycc_rgb_convert_avx2)
 
 EXTN(jconst_ycc_rgb_convert_avx2):
@@ -43,7 +43,7 @@ PW_MF0344_F0285 times 8  dw -F_0_344, F_0_285
 PW_ONE          times 16 dw  1
 PD_ONEHALF      times 8  dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolor-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolor-sse2.asm
index b3f1fec07e..93d3c8dd4a 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolor-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolor-sse2.asm
@@ -2,7 +2,7 @@
 ; jdcolor.asm - colorspace conversion (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_ycc_rgb_convert_sse2)
 
 EXTN(jconst_ycc_rgb_convert_sse2):
@@ -42,7 +42,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
 PW_ONE          times 8 dw  1
 PD_ONEHALF      times 4 dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmerge-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmerge-avx2.asm
index 9515a17013..4be435624e 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmerge-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmerge-avx2.asm
@@ -2,7 +2,7 @@
 ; jdmerge.asm - merged upsampling/color conversion (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -32,7 +32,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_merged_upsample_avx2)
 
 EXTN(jconst_merged_upsample_avx2):
@@ -43,7 +43,7 @@ PW_MF0344_F0285 times 8  dw -F_0_344, F_0_285
 PW_ONE          times 16 dw  1
 PD_ONEHALF      times 8  dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmerge-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmerge-sse2.asm
index aedccc20f6..a22f6ac733 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmerge-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmerge-sse2.asm
@@ -2,7 +2,7 @@
 ; jdmerge.asm - merged upsampling/color conversion (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772)  ; FIX(2) - FIX(1.77200)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_merged_upsample_sse2)
 
 EXTN(jconst_merged_upsample_sse2):
@@ -42,7 +42,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
 PW_ONE          times 8 dw  1
 PD_ONEHALF      times 4 dd  1 << (SCALEBITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmrgext-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmrgext-avx2.asm
index 8b264b4f03..3392f3a383 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmrgext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmrgext-avx2.asm
@@ -2,9 +2,10 @@
 ; jdmrgext.asm - merged upsampling/color conversion (64-bit AVX2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -34,21 +35,22 @@
 ; r12d = JDIMENSION in_row_group_ctr
 ; r13 = JSAMPARRAY output_buf
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
 %define WK_NUM  3
 
     align       32
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)
 
 EXTN(jsimd_h2v1_merged_upsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_YMMWORD)  ; align to 256 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 4
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, SIZEOF_YMMWORD * WK_NUM
+    COLLECT_ARGS 4
     push        rbx
 
     mov         ecx, r10d               ; col
@@ -479,9 +481,9 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
 .return:
     pop         rbx
     vzeroupper
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
@@ -505,10 +507,10 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)
 
 EXTN(jsimd_h2v2_merged_upsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
     push        rbx
 
     mov         eax, r10d
@@ -587,7 +589,7 @@ EXTN(jsimd_h2v2_merged_upsample_avx2):
     add         rsp, SIZEOF_JSAMPARRAY*4
 
     pop         rbx
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmrgext-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmrgext-sse2.asm
index eb3ab9dbd9..901db984f9 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmrgext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmrgext-sse2.asm
@@ -2,8 +2,9 @@
 ; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2)
 ;
 ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
+; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -33,21 +34,22 @@
 ; r12d = JDIMENSION in_row_group_ctr
 ; r13 = JSAMPARRAY output_buf
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  3
 
     align       32
     GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)
 
 EXTN(jsimd_h2v1_merged_upsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 4
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 4
     push        rbx
 
     mov         ecx, r10d               ; col
@@ -421,9 +423,9 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 
 .return:
     pop         rbx
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
@@ -447,10 +449,10 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)
 
 EXTN(jsimd_h2v2_merged_upsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
     push        rbx
 
     mov         eax, r10d
@@ -529,7 +531,7 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
     add         rsp, SIZEOF_JSAMPARRAY*4
 
     pop         rbx
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdsample-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdsample-avx2.asm
index 1e4979f933..017427a158 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdsample-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdsample-avx2.asm
@@ -2,9 +2,10 @@
 ; jdsample.asm - upsampling (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2015, Intel Corporation.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -21,7 +22,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fancy_upsample_avx2)
 
 EXTN(jconst_fancy_upsample_avx2):
@@ -32,7 +33,7 @@ PW_THREE times 16 dw 3
 PW_SEVEN times 16 dw 7
 PW_EIGHT times 16 dw 8
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -61,11 +62,11 @@ PW_EIGHT times 16 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)
 
 EXTN(jsimd_h2v1_fancy_upsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    push_xmm    3
-    collect_args 4
+    PUSH_XMM    3
+    COLLECT_ARGS 4
 
     mov         eax, r11d               ; colctr
     test        rax, rax
@@ -186,8 +187,8 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
 
 .return:
     vzeroupper
-    uncollect_args 4
-    pop_xmm     3
+    UNCOLLECT_ARGS 4
+    POP_XMM     3
     pop         rbp
     ret
 
@@ -208,22 +209,23 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
 ; r12 = JSAMPARRAY input_data
 ; r13 = JSAMPARRAY *output_data_ptr
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD  ; ymmword wk[WK_NUM]
 %define WK_NUM  4
 
     align       32
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)
 
 EXTN(jsimd_h2v2_fancy_upsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
-    and         rsp, byte (-SIZEOF_YMMWORD)  ; align to 256 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    push_xmm    3
-    collect_args 4
+    mov         rbp, rsp
+    push        r15
+    and         rsp, byte (-SIZEOF_YMMWORD)  ; align to 128 bits
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, (SIZEOF_YMMWORD * WK_NUM)
+    PUSH_XMM    3
+    COLLECT_ARGS 4
     push        rbx
 
     mov         eax, r11d               ; colctr
@@ -498,10 +500,10 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
 .return:
     pop         rbx
     vzeroupper
-    uncollect_args 4
-    pop_xmm     3
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    POP_XMM     3
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
@@ -524,10 +526,10 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)
 
 EXTN(jsimd_h2v1_upsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
 
     mov         edx, r11d
     add         rdx, byte (SIZEOF_YMMWORD-1)
@@ -590,7 +592,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
 
 .return:
     vzeroupper
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
@@ -613,10 +615,10 @@ EXTN(jsimd_h2v1_upsample_avx2):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)
 
 EXTN(jsimd_h2v2_upsample_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
     push        rbx
 
     mov         edx, r11d
@@ -687,7 +689,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
 .return:
     pop         rbx
     vzeroupper
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdsample-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdsample-sse2.asm
index 38dbceec26..95c4d4c9ed 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdsample-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdsample-sse2.asm
@@ -2,8 +2,9 @@
 ; jdsample.asm - upsampling (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -20,7 +21,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fancy_upsample_sse2)
 
 EXTN(jconst_fancy_upsample_sse2):
@@ -31,7 +32,7 @@ PW_THREE times 8 dw 3
 PW_SEVEN times 8 dw 7
 PW_EIGHT times 8 dw 8
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -60,10 +61,10 @@ PW_EIGHT times 8 dw 8
     GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)
 
 EXTN(jsimd_h2v1_fancy_upsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
 
     mov         eax, r11d               ; colctr
     test        rax, rax
@@ -174,7 +175,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
     jg          near .rowloop
 
 .return:
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
@@ -195,21 +196,22 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
 ; r12 = JSAMPARRAY input_data
 ; r13 = JSAMPARRAY *output_data_ptr
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  4
 
     align       32
     GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)
 
 EXTN(jsimd_h2v2_fancy_upsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 4
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 4
     push        rbx
 
     mov         eax, r11d               ; colctr
@@ -472,9 +474,9 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
 
 .return:
     pop         rbx
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
@@ -497,10 +499,10 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)
 
 EXTN(jsimd_h2v1_upsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
 
     mov         edx, r11d
     add         rdx, byte (2*SIZEOF_XMMWORD)-1
@@ -561,7 +563,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
     jg          short .rowloop
 
 .return:
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
@@ -584,10 +586,10 @@ EXTN(jsimd_h2v1_upsample_sse2):
     GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)
 
 EXTN(jsimd_h2v2_upsample_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
     push        rbx
 
     mov         edx, r11d
@@ -656,7 +658,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
 
 .return:
     pop         rbx
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctflt-sse.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctflt-sse.asm
index ef2796649b..cf46d93d61 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctflt-sse.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctflt-sse.asm
@@ -2,7 +2,8 @@
 ; jfdctflt.asm - floating-point FDCT (64-bit SSE)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -34,7 +35,7 @@
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_float_sse)
 
 EXTN(jconst_fdct_float_sse):
@@ -44,7 +45,7 @@ PD_0_707 times 4 dd 0.707106781186547524400844
 PD_0_541 times 4 dd 0.541196100146196984399723
 PD_1_306 times 4 dd 1.306562964876376527856643
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -58,21 +59,22 @@ PD_1_306 times 4 dd 1.306562964876376527856643
 
 ; r10 = FAST_FLOAT *data
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  2
 
     align       32
     GLOBAL_FUNCTION(jsimd_fdct_float_sse)
 
 EXTN(jsimd_fdct_float_sse):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 1
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 1
 
     ; ---- Pass 1: process rows.
 
@@ -344,9 +346,9 @@ EXTN(jsimd_fdct_float_sse):
     dec         rcx
     jnz         near .columnloop
 
-    uncollect_args 1
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 1
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctfst-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctfst-sse2.asm
index 2e1bfe6e8c..cdc6236585 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctfst-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctfst-sse2.asm
@@ -2,7 +2,8 @@
 ; jfdctfst.asm - fast integer FDCT (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,7 +50,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS)  ; FIX(1.306562965)
 %define PRE_MULTIPLY_SCALE_BITS  2
 %define CONST_SHIFT              (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_ifast_sse2)
 
 EXTN(jconst_fdct_ifast_sse2):
@@ -59,7 +60,7 @@ PW_F0382 times 8 dw F_0_382 << CONST_SHIFT
 PW_F0541 times 8 dw F_0_541 << CONST_SHIFT
 PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -73,21 +74,22 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
 
 ; r10 = DCTELEM *data
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  2
 
     align       32
     GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2)
 
 EXTN(jsimd_fdct_ifast_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 1
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 1
 
     ; ---- Pass 1: process rows.
 
@@ -378,9 +380,9 @@ EXTN(jsimd_fdct_ifast_sse2):
     movdqa      XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm6
     movdqa      XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm2
 
-    uncollect_args 1
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 1
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctint-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctint-avx2.asm
index e56258b48a..b6b4c73a50 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctint-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctint-avx2.asm
@@ -2,7 +2,7 @@
 ; jfdctint.asm - accurate integer FDCT (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -65,7 +65,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %1-%4: Input/output registers
 ; %5-%8: Temp registers
 
-%macro dotranspose 8
+%macro DOTRANSPOSE 8
     ; %1=(00 01 02 03 04 05 06 07  40 41 42 43 44 45 46 47)
     ; %2=(10 11 12 13 14 15 16 17  50 51 52 53 54 55 56 57)
     ; %3=(20 21 22 23 24 25 26 27  60 61 62 63 64 65 66 67)
@@ -108,7 +108,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %5-%8: Temp registers
 ; %9:    Pass (1 or 2)
 
-%macro dodct 9
+%macro DODCT 9
     vpsubw      %5, %1, %4              ; %5=data1_0-data6_7=tmp6_7
     vpaddw      %6, %1, %4              ; %6=data1_0+data6_7=tmp1_0
     vpaddw      %7, %2, %3              ; %7=data3_2+data4_5=tmp3_2
@@ -223,7 +223,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_islow_avx2)
 
 EXTN(jconst_fdct_islow_avx2):
@@ -242,7 +242,7 @@ PW_DESCALE_P2X             times 16 dw  1 << (PASS1_BITS - 1)
 PW_1_NEG1                  times 8  dw  1
                            times 8  dw -1
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -260,10 +260,10 @@ PW_1_NEG1                  times 8  dw  1
     GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)
 
 EXTN(jsimd_fdct_islow_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 1
+    COLLECT_ARGS 1
 
     ; ---- Pass 1: process rows.
 
@@ -285,9 +285,9 @@ EXTN(jsimd_fdct_islow_avx2):
     ; ymm2=(20 21 22 23 24 25 26 27  60 61 62 63 64 65 66 67)
     ; ymm3=(30 31 32 33 34 35 36 37  70 71 72 73 74 75 76 77)
 
-    dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
 
-    dodct       ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
+    DODCT       ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
     ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5
 
     ; ---- Pass 2: process columns.
@@ -295,9 +295,9 @@ EXTN(jsimd_fdct_islow_avx2):
     vperm2i128  ymm4, ymm1, ymm3, 0x20  ; ymm4=data3_7
     vperm2i128  ymm1, ymm1, ymm3, 0x31  ; ymm1=data1_5
 
-    dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
 
-    dodct       ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
+    DODCT       ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
     ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5
 
     vperm2i128 ymm3, ymm0, ymm1, 0x30   ; ymm3=data0_1
@@ -311,7 +311,7 @@ EXTN(jsimd_fdct_islow_avx2):
     vmovdqu     YMMWORD [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm7
 
     vzeroupper
-    uncollect_args 1
+    UNCOLLECT_ARGS 1
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctint-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctint-sse2.asm
index ec1f383ccb..44e7cd0554 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctint-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctint-sse2.asm
@@ -2,7 +2,8 @@
 ; jfdctint.asm - accurate integer FDCT (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2020, 2024, D. R. Commander.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -63,7 +64,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_fdct_islow_sse2)
 
 EXTN(jconst_fdct_islow_sse2):
@@ -80,7 +81,7 @@ PD_DESCALE_P1  times 4 dd  1 << (DESCALE_P1 - 1)
 PD_DESCALE_P2  times 4 dd  1 << (DESCALE_P2 - 1)
 PW_DESCALE_P2X times 8 dw  1 << (PASS1_BITS - 1)
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -94,21 +95,22 @@ PW_DESCALE_P2X times 8 dw  1 << (PASS1_BITS - 1)
 
 ; r10 = DCTELEM *data
 
-%define wk(i)   rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
+%define wk(i)   r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD  ; xmmword wk[WK_NUM]
 %define WK_NUM  6
 
     align       32
     GLOBAL_FUNCTION(jsimd_fdct_islow_sse2)
 
 EXTN(jsimd_fdct_islow_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 1
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 1
 
     ; ---- Pass 1: process rows.
 
@@ -608,9 +610,9 @@ EXTN(jsimd_fdct_islow_sse2):
     movdqa      XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm1
     movdqa      XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm3
 
-    uncollect_args 1
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 1
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctflt-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctflt-sse2.asm
index 60bf961896..c7cb39a072 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctflt-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctflt-sse2.asm
@@ -2,8 +2,9 @@
 ; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -24,18 +25,18 @@
 
 ; --------------------------------------------------------------------------
 
-%macro unpcklps2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+%macro UNPCKLPS2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
     shufps      %1, %2, 0x44
 %endmacro
 
-%macro unpckhps2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+%macro UNPCKHPS2 2  ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
     shufps      %1, %2, 0xEE
 %endmacro
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_float_sse2)
 
 EXTN(jconst_idct_float_sse2):
@@ -47,7 +48,7 @@ PD_M2_613       times 4  dd -2.613125929752753055713286
 PD_RNDINT_MAGIC times 4  dd  100663296.0  ; (float)(0x00C00000 << 3)
 PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -65,8 +66,7 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
 ; r12 = JSAMPARRAY output_buf
 ; r13d = JDIMENSION output_col
 
-%define original_rbp  rbp + 0
-%define wk(i)         rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
+%define wk(i)         r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
                                         ; xmmword wk[WK_NUM]
 %define WK_NUM        2
 %define workspace     wk(0) - DCTSIZE2 * SIZEOF_FAST_FLOAT
@@ -76,14 +76,15 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_float_sse2)
 
 EXTN(jsimd_idct_float_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
     lea         rsp, [workspace]
-    collect_args 4
+    COLLECT_ARGS 4
     push        rbx
 
     ; ---- Pass 1: process columns from input, store into work array.
@@ -280,11 +281,11 @@ EXTN(jsimd_idct_float_sse2):
     unpckhps    xmm4, xmm0              ; xmm4=(42 52 43 53)
 
     movaps      xmm3, xmm6              ; transpose coefficients(phase 2)
-    unpcklps2   xmm6, xmm7              ; xmm6=(00 10 20 30)
-    unpckhps2   xmm3, xmm7              ; xmm3=(01 11 21 31)
+    UNPCKLPS2   xmm6, xmm7              ; xmm6=(00 10 20 30)
+    UNPCKHPS2   xmm3, xmm7              ; xmm3=(01 11 21 31)
     movaps      xmm0, xmm1              ; transpose coefficients(phase 2)
-    unpcklps2   xmm1, xmm2              ; xmm1=(02 12 22 32)
-    unpckhps2   xmm0, xmm2              ; xmm0=(03 13 23 33)
+    UNPCKLPS2   xmm1, xmm2              ; xmm1=(02 12 22 32)
+    UNPCKHPS2   xmm0, xmm2              ; xmm0=(03 13 23 33)
 
     movaps      xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
     movaps      xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
@@ -295,11 +296,11 @@ EXTN(jsimd_idct_float_sse2):
     movaps      XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
 
     movaps      xmm6, xmm5              ; transpose coefficients(phase 2)
-    unpcklps2   xmm5, xmm7              ; xmm5=(40 50 60 70)
-    unpckhps2   xmm6, xmm7              ; xmm6=(41 51 61 71)
+    UNPCKLPS2   xmm5, xmm7              ; xmm5=(40 50 60 70)
+    UNPCKHPS2   xmm6, xmm7              ; xmm6=(41 51 61 71)
     movaps      xmm3, xmm4              ; transpose coefficients(phase 2)
-    unpcklps2   xmm4, xmm2              ; xmm4=(42 52 62 72)
-    unpckhps2   xmm3, xmm2              ; xmm3=(43 53 63 73)
+    UNPCKLPS2   xmm4, xmm2              ; xmm4=(42 52 62 72)
+    UNPCKHPS2   xmm3, xmm2              ; xmm3=(43 53 63 73)
 
     movaps      XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5
     movaps      XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6
@@ -322,7 +323,6 @@ EXTN(jsimd_idct_float_sse2):
 
     ; ---- Pass 2: process rows from work array, store into output array.
 
-    mov         rax, [original_rbp]
     lea         rsi, [workspace]        ; FAST_FLOAT *wsptr
     mov         rdi, r12                ; (JSAMPROW *)
     mov         eax, r13d
@@ -471,9 +471,9 @@ EXTN(jsimd_idct_float_sse2):
     jnz         near .rowloop
 
     pop         rbx
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctfst-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctfst-sse2.asm
index cb97fdfbb2..fd3bc32c16 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctfst-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctfst-sse2.asm
@@ -2,8 +2,9 @@
 ; jidctfst.asm - fast integer IDCT (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -57,7 +58,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS))         ; FIX(2.613125930) - FIX(1)
 %define PRE_MULTIPLY_SCALE_BITS  2
 %define CONST_SHIFT              (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_ifast_sse2)
 
 EXTN(jconst_idct_ifast_sse2):
@@ -68,7 +69,7 @@ PW_MF1613      times 8  dw -F_1_613 << CONST_SHIFT
 PW_F1082       times 8  dw  F_1_082 << CONST_SHIFT
 PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -86,8 +87,7 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
 ; r12 = JSAMPARRAY output_buf
 ; r13d = JDIMENSION output_col
 
-%define original_rbp  rbp + 0
-%define wk(i)         rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
+%define wk(i)         r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
                                         ; xmmword wk[WK_NUM]
 %define WK_NUM        2
 
@@ -95,14 +95,15 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)
 
 EXTN(jsimd_idct_ifast_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 4
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 4
 
     ; ---- Pass 1: process columns from input.
 
@@ -320,7 +321,6 @@ EXTN(jsimd_idct_ifast_sse2):
 
     ; ---- Pass 2: process rows from work array, store into output array.
 
-    mov         rax, [original_rbp]
     mov         rdi, r12                ; (JSAMPROW *)
     mov         eax, r13d
 
@@ -479,9 +479,9 @@ EXTN(jsimd_idct_ifast_sse2):
     movq        XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
     movq        XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
 
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
     ret
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctint-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctint-avx2.asm
index ca7e317f6e..84d125bd43 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctint-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctint-avx2.asm
@@ -2,7 +2,7 @@
 ; jidctint.asm - accurate integer IDCT (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -66,7 +66,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %1-%4: Input/output registers
 ; %5-%8: Temp registers
 
-%macro dotranspose 8
+%macro DOTRANSPOSE 8
     ; %5=(00 10 20 30 40 50 60 70  01 11 21 31 41 51 61 71)
     ; %6=(03 13 23 33 43 53 63 73  02 12 22 32 42 52 62 72)
     ; %7=(04 14 24 34 44 54 64 74  05 15 25 35 45 55 65 75)
@@ -119,7 +119,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; %5-%12: Temp registers
 ; %9:     Pass (1 or 2)
 
-%macro dodct 13
+%macro DODCT 13
     ; -- Even part
 
     ; (Original)
@@ -241,7 +241,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_islow_avx2)
 
 EXTN(jconst_idct_islow_avx2):
@@ -260,7 +260,7 @@ PB_CENTERJSAMP             times 32 db  CENTERJSAMPLE
 PW_1_NEG1                  times 8  dw  1
                            times 8  dw -1
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -282,11 +282,11 @@ PW_1_NEG1                  times 8  dw  1
     GLOBAL_FUNCTION(jsimd_idct_islow_avx2)
 
 EXTN(jsimd_idct_islow_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
     mov         rbp, rsp                     ; rbp = aligned rbp
-    push_xmm    4
-    collect_args 4
+    PUSH_XMM    4
+    COLLECT_ARGS 4
 
     ; ---- Pass 1: process columns.
 
@@ -343,10 +343,10 @@ EXTN(jsimd_idct_islow_avx2):
     vperm2i128  ymm2, ymm5, ymm7, 0x20  ; ymm2=in2_6
     vperm2i128  ymm3, ymm7, ymm6, 0x31  ; ymm3=in7_5
 
-    dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1
+    DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1
     ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6
 
-    dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
     ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7
 
 .column_end:
@@ -363,10 +363,10 @@ EXTN(jsimd_idct_islow_avx2):
     vperm2i128  ymm4, ymm3, ymm1, 0x31  ; ymm3=in7_5
     vperm2i128  ymm1, ymm3, ymm1, 0x20  ; ymm1=in3_1
 
-    dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2
+    DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2
     ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6
 
-    dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
+    DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
     ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7
 
     vpacksswb   ymm0, ymm0, ymm1        ; ymm0=data01_45
@@ -408,8 +408,8 @@ EXTN(jsimd_idct_islow_avx2):
     movq        XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
     movq        XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
 
-    uncollect_args 4
-    pop_xmm     4
+    UNCOLLECT_ARGS 4
+    POP_XMM     4
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctint-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctint-sse2.asm
index 7aa869bc0b..3f098b2c50 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctint-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctint-sse2.asm
@@ -2,8 +2,9 @@
 ; jidctint.asm - accurate integer IDCT (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, 2020, D. R. Commander.
+; Copyright (C) 2009, 2016, 2020, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -64,7 +65,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS)  ; FIX(3.072711026)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_islow_sse2)
 
 EXTN(jconst_idct_islow_sse2):
@@ -81,7 +82,7 @@ PD_DESCALE_P1  times 4  dd  1 << (DESCALE_P1 - 1)
 PD_DESCALE_P2  times 4  dd  1 << (DESCALE_P2 - 1)
 PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -99,8 +100,7 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
 ; r12 = JSAMPARRAY output_buf
 ; r13d = JDIMENSION output_col
 
-%define original_rbp  rbp + 0
-%define wk(i)         rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
+%define wk(i)         r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
                                         ; xmmword wk[WK_NUM]
 %define WK_NUM        12
 
@@ -108,14 +108,15 @@ PB_CENTERJSAMP times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_islow_sse2)
 
 EXTN(jsimd_idct_islow_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 4
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 4
 
     ; ---- Pass 1: process columns from input.
 
@@ -512,7 +513,6 @@ EXTN(jsimd_idct_islow_sse2):
 
     ; ---- Pass 2: process rows from work array, store into output array.
 
-    mov         rax, [original_rbp]
     mov         rdi, r12                ; (JSAMPROW *)
     mov         eax, r13d
 
@@ -836,9 +836,9 @@ EXTN(jsimd_idct_islow_sse2):
     movq        XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
     movq        XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
 
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctred-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctred-sse2.asm
index 4ece9d891c..2657cf3cb1 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctred-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctred-sse2.asm
@@ -2,8 +2,9 @@
 ; jidctred.asm - reduced-size IDCT (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -70,7 +71,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS)  ; FIX(3.624509785)
 ; --------------------------------------------------------------------------
     SECTION     SEG_CONST
 
-    alignz      32
+    ALIGNZ      32
     GLOBAL_DATA(jconst_idct_red_sse2)
 
 EXTN(jconst_idct_red_sse2):
@@ -88,7 +89,7 @@ PD_DESCALE_P1_2 times 4  dd  1 << (DESCALE_P1_2 - 1)
 PD_DESCALE_P2_2 times 4  dd  1 << (DESCALE_P2_2 - 1)
 PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
 
-    alignz      32
+    ALIGNZ      32
 
 ; --------------------------------------------------------------------------
     SECTION     SEG_TEXT
@@ -107,8 +108,7 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
 ; r12 = JSAMPARRAY output_buf
 ; r13d = JDIMENSION output_col
 
-%define original_rbp  rbp + 0
-%define wk(i)         rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
+%define wk(i)         r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
                                         ; xmmword wk[WK_NUM]
 %define WK_NUM        2
 
@@ -116,14 +116,15 @@ PB_CENTERJSAMP  times 16 db  CENTERJSAMPLE
     GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)
 
 EXTN(jsimd_idct_4x4_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp                     ; rax = original rbp
-    sub         rsp, byte 4
+    mov         rbp, rsp
+    push        r15
     and         rsp, byte (-SIZEOF_XMMWORD)  ; align to 128 bits
-    mov         [rsp], rax
-    mov         rbp, rsp                     ; rbp = aligned rbp
-    lea         rsp, [wk(0)]
-    collect_args 4
+    ; Allocate stack space for wk array.  r15 is used to access it.
+    mov         r15, rsp
+    sub         rsp, byte (SIZEOF_XMMWORD * WK_NUM)
+    COLLECT_ARGS 4
 
     ; ---- Pass 1: process columns from input.
 
@@ -309,7 +310,6 @@ EXTN(jsimd_idct_4x4_sse2):
 
     ; ---- Pass 2: process rows, store into output array.
 
-    mov         rax, [original_rbp]
     mov         rdi, r12                ; (JSAMPROW *)
     mov         eax, r13d
 
@@ -389,9 +389,9 @@ EXTN(jsimd_idct_4x4_sse2):
     movd        XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
     movd        XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
 
-    uncollect_args 4
-    mov         rsp, rbp                ; rsp <- aligned rbp
-    pop         rsp                     ; rsp <- original rbp
+    UNCOLLECT_ARGS 4
+    lea         rsp, [rbp-8]
+    pop         r15
     pop         rbp
     ret
 
@@ -414,10 +414,10 @@ EXTN(jsimd_idct_4x4_sse2):
     GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)
 
 EXTN(jsimd_idct_2x2_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 4
+    COLLECT_ARGS 4
     push        rbx
 
     ; ---- Pass 1: process columns from input.
@@ -565,7 +565,7 @@ EXTN(jsimd_idct_2x2_sse2):
     mov         word [rsi+rax*SIZEOF_JSAMPLE], cx
 
     pop         rbx
-    uncollect_args 4
+    UNCOLLECT_ARGS 4
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jquantf-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jquantf-sse2.asm
index ab2e3954f6..8bd79662e6 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jquantf-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jquantf-sse2.asm
@@ -2,7 +2,7 @@
 ; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -37,10 +37,10 @@
     GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)
 
 EXTN(jsimd_convsamp_float_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 3
+    COLLECT_ARGS 3
     push        rbx
 
     pcmpeqw     xmm7, xmm7
@@ -89,7 +89,7 @@ EXTN(jsimd_convsamp_float_sse2):
     jnz         short .convloop
 
     pop         rbx
-    uncollect_args 3
+    UNCOLLECT_ARGS 3
     pop         rbp
     ret
 
@@ -110,10 +110,10 @@ EXTN(jsimd_convsamp_float_sse2):
     GLOBAL_FUNCTION(jsimd_quantize_float_sse2)
 
 EXTN(jsimd_quantize_float_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 3
+    COLLECT_ARGS 3
 
     mov         rsi, r12
     mov         rdx, r11
@@ -146,7 +146,7 @@ EXTN(jsimd_quantize_float_sse2):
     dec         rax
     jnz         short .quantloop
 
-    uncollect_args 3
+    UNCOLLECT_ARGS 3
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jquanti-avx2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jquanti-avx2.asm
index 70fe81139c..c8ebd7966b 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jquanti-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jquanti-avx2.asm
@@ -2,7 +2,7 @@
 ; jquanti.asm - sample data conversion and quantization (64-bit AVX2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, 2018, D. R. Commander.
+; Copyright (C) 2009, 2016, 2018, 2024, D. R. Commander.
 ; Copyright (C) 2016, Matthieu Darbois.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
@@ -38,10 +38,10 @@
     GLOBAL_FUNCTION(jsimd_convsamp_avx2)
 
 EXTN(jsimd_convsamp_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 3
+    COLLECT_ARGS 3
 
     mov         eax, r11d
 
@@ -84,7 +84,7 @@ EXTN(jsimd_convsamp_avx2):
     vmovdqu     YMMWORD [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)], ymm3
 
     vzeroupper
-    uncollect_args 3
+    UNCOLLECT_ARGS 3
     pop         rbp
     ret
 
@@ -116,10 +116,10 @@ EXTN(jsimd_convsamp_avx2):
     GLOBAL_FUNCTION(jsimd_quantize_avx2)
 
 EXTN(jsimd_quantize_avx2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 3
+    COLLECT_ARGS 3
 
     vmovdqu     ymm4, [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
     vmovdqu     ymm5, [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
@@ -154,7 +154,7 @@ EXTN(jsimd_quantize_avx2):
     vmovdqu     [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
 
     vzeroupper
-    uncollect_args 3
+    UNCOLLECT_ARGS 3
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jquanti-sse2.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jquanti-sse2.asm
index 3ee442027a..352d74055c 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jquanti-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jquanti-sse2.asm
@@ -2,7 +2,7 @@
 ; jquanti.asm - sample data conversion and quantization (64-bit SSE2)
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
-; Copyright (C) 2009, 2016, D. R. Commander.
+; Copyright (C) 2009, 2016, 2024, D. R. Commander.
 ; Copyright (C) 2018, Matthias Räncker.
 ;
 ; Based on the x86 SIMD extension for IJG JPEG library
@@ -37,10 +37,10 @@
     GLOBAL_FUNCTION(jsimd_convsamp_sse2)
 
 EXTN(jsimd_convsamp_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 3
+    COLLECT_ARGS 3
     push        rbx
 
     pxor        xmm6, xmm6              ; xmm6=(all 0's)
@@ -84,7 +84,7 @@ EXTN(jsimd_convsamp_sse2):
     jnz         short .convloop
 
     pop         rbx
-    uncollect_args 3
+    UNCOLLECT_ARGS 3
     pop         rbp
     ret
 
@@ -116,10 +116,10 @@ EXTN(jsimd_convsamp_sse2):
     GLOBAL_FUNCTION(jsimd_quantize_sse2)
 
 EXTN(jsimd_quantize_sse2):
+    ENDBR64
     push        rbp
-    mov         rax, rsp
     mov         rbp, rsp
-    collect_args 3
+    COLLECT_ARGS 3
 
     mov         rsi, r12
     mov         rdx, r11
@@ -179,7 +179,7 @@ EXTN(jsimd_quantize_sse2):
     dec         rax
     jnz         near .quantloop
 
-    uncollect_args 3
+    UNCOLLECT_ARGS 3
     pop         rbp
     ret
 
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jsimd.c b/3rdparty/libjpeg-turbo/src/simd/x86_64/jsimd.c
index 584a010ad3..3f5ee77eb9 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jsimd.c
@@ -2,8 +2,8 @@
  * jsimd_x86_64.c
  *
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
- * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -21,7 +21,6 @@
 #include "../../jdct.h"
 #include "../../jsimddct.h"
 #include "../jsimd.h"
-#include "jconfigint.h"
 
 /*
  * In the PIC cases, we have no guarantee that constants will keep
@@ -32,13 +31,11 @@
 #define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
 #define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
 
-static unsigned int simd_support = (unsigned int)(~0);
-static unsigned int simd_huffman = 1;
+static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
+static THREAD_LOCAL unsigned int simd_huffman = 1;
 
 /*
  * Check what SIMD accelerations are supported.
- *
- * FIXME: This code is racy under a multi-threaded environment.
  */
 LOCAL(void)
 init_simd(void)
@@ -148,6 +145,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_extrgb_ycc_convert_avx2;
@@ -197,6 +197,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->in_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_extrgb_gray_convert_avx2;
@@ -246,6 +249,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_ycc_extrgb_convert_avx2;
@@ -336,6 +342,9 @@ GLOBAL(void)
 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
                       JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
                                compptr->v_samp_factor,
@@ -352,6 +361,9 @@ GLOBAL(void)
 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
                       JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
                                compptr->v_samp_factor,
@@ -406,6 +418,9 @@ GLOBAL(void)
 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
                              input_data, output_data_ptr);
@@ -418,6 +433,9 @@ GLOBAL(void)
 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
                              input_data, output_data_ptr);
@@ -472,6 +490,9 @@ GLOBAL(void)
 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
                                    compptr->downsampled_width, input_data,
@@ -486,6 +507,9 @@ GLOBAL(void)
 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
                                    compptr->downsampled_width, input_data,
@@ -545,6 +569,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
@@ -593,6 +620,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
 
+  if (simd_support == ~0U)
+    init_simd();
+
   switch (cinfo->out_color_space) {
   case JCS_EXT_RGB:
     avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
@@ -682,6 +712,9 @@ GLOBAL(void)
 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
                DCTELEM *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_convsamp_avx2(sample_data, start_col, workspace);
   else
@@ -751,6 +784,9 @@ jsimd_can_fdct_float(void)
 GLOBAL(void)
 jsimd_fdct_islow(DCTELEM *data)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_fdct_islow_avx2(data);
   else
@@ -812,6 +848,9 @@ jsimd_can_quantize_float(void)
 GLOBAL(void)
 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_quantize_avx2(coef_block, divisors, workspace);
   else
@@ -966,6 +1005,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
                  JDIMENSION output_col)
 {
+  if (simd_support == ~0U)
+    init_simd();
+
   if (simd_support & JSIMD_AVX2)
     jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
                           output_col);
@@ -1036,7 +1078,7 @@ jsimd_can_encode_mcu_AC_first_prepare(void)
 GLOBAL(void)
 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
                                   const int *jpeg_natural_order_start, int Sl,
-                                  int Al, JCOEF *values, size_t *zerobits)
+                                  int Al, UJCOEF *values, size_t *zerobits)
 {
   jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
                                          Sl, Al, values, zerobits);
@@ -1060,7 +1102,7 @@ jsimd_can_encode_mcu_AC_refine_prepare(void)
 GLOBAL(int)
 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
                                    const int *jpeg_natural_order_start, int Sl,
-                                   int Al, JCOEF *absvalues, size_t *bits)
+                                   int Al, UJCOEF *absvalues, size_t *bits)
 {
   return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
                                                  jpeg_natural_order_start,
diff --git a/3rdparty/libjpeg-turbo/src/simd/x86_64/jsimdcpu.asm b/3rdparty/libjpeg-turbo/src/simd/x86_64/jsimdcpu.asm
index 705f813d7d..251bc4cdae 100644
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jsimdcpu.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jsimdcpu.asm
@@ -3,6 +3,7 @@
 ;
 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
 ; Copyright (C) 2016, D. R. Commander.
+; Copyright (C) 2023, Aliaksiej Kandracienka.
 ;
 ; Based on
 ; x86 SIMD extension for IJG JPEG library
@@ -31,6 +32,8 @@
     GLOBAL_FUNCTION(jpeg_simd_cpu_support)
 
 EXTN(jpeg_simd_cpu_support):
+    push        rbp
+    mov         rbp, rsp
     push        rbx
     push        rdi
 
@@ -79,6 +82,7 @@ EXTN(jpeg_simd_cpu_support):
 
     pop         rdi
     pop         rbx
+    pop         rbp
     ret
 
 ; For some reason, the OS X linker does not honor the request to align the
diff --git a/3rdparty/libjpeg-turbo/src/structure.txt b/3rdparty/libjpeg-turbo/src/structure.txt
new file mode 100644
index 0000000000..030b8e8801
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/structure.txt
@@ -0,0 +1,981 @@
+IJG JPEG LIBRARY:  SYSTEM ARCHITECTURE
+
+This file was part of the Independent JPEG Group's software:
+Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
+Lossless JPEG Modifications:
+Copyright (C) 1999, Ken Murchison.
+libjpeg-turbo Modifications:
+Copyright (C) 2022-2023, D. R. Commander.
+For conditions of distribution and use, see the accompanying README.ijg file.
+
+
+This file provides an overview of the architecture of the IJG JPEG software;
+that is, the functions of the various modules in the system and the interfaces
+between modules.  For more precise details about any data structure or calling
+convention, see the include files and comments in the source code.
+
+We assume that the reader is already somewhat familiar with the JPEG standard.
+The README.ijg file includes references for learning about JPEG.  The file
+libjpeg.txt describes the library from the viewpoint of an application
+programmer using the library; it's best to read that file before this one.
+Also, the file coderules.txt describes the coding style conventions we use.
+
+In this document, JPEG-specific terminology follows the JPEG standard:
+  A "component" means a color channel, e.g., Red or Luminance.
+  A "sample" is a single component value (i.e., one number in the image data).
+  A "coefficient" is a frequency coefficient (a DCT transform output number).
+  A "block" is an 8x8 group of samples or coefficients.
+  A "data unit" is an abstract data type that is either a block for lossy
+        (DCT-based) codecs or a sample for lossless (predictive) codecs.
+  An "MCU" (minimum coded unit) is an interleaved set of data units of size
+        determined by the sampling factors, or a single data unit in a
+        noninterleaved scan.
+We do not use the terms "pixel" and "sample" interchangeably.  When we say
+pixel, we mean an element of the full-size image, while a sample is an element
+of the downsampled image.  Thus the number of samples may vary across
+components while the number of pixels does not.  (This terminology is not used
+rigorously throughout the code, but it is used in places where confusion would
+otherwise result.)
+
+
+*** System features ***
+
+The IJG distribution contains two parts:
+  * A subroutine library for JPEG compression and decompression.
+  * cjpeg/djpeg, two sample applications that use the library to transform
+    JFIF JPEG files to and from several other image formats.
+cjpeg/djpeg are of no great intellectual complexity: they merely add a simple
+command-line user interface and I/O routines for several uncompressed image
+formats.  This document concentrates on the library itself.
+
+We desire the library to be capable of supporting all JPEG baseline, extended
+sequential, progressive DCT, and lossless (spatial) processes.  Hierarchical
+processes are not supported.
+
+Within these limits, any set of compression parameters allowed by the JPEG
+spec should be readable for decompression.  (We can be more restrictive about
+what formats we can generate.)  Although the system design allows for all
+parameter values, some uncommon settings are not yet implemented and may
+never be; nonintegral sampling ratios are the prime example.
+
+By itself, the library handles only interchange JPEG datastreams --- in
+particular the widely used JFIF file format.  The library can be used by
+surrounding code to process interchange or abbreviated JPEG datastreams that
+are embedded in more complex file formats.  (For example, libtiff uses this
+library to implement JPEG compression within the TIFF file format.)
+
+The library includes a substantial amount of code that is not covered by the
+JPEG standard but is necessary for typical applications of JPEG.  These
+functions preprocess the image before JPEG compression or postprocess it after
+decompression.  They include colorspace conversion, downsampling/upsampling,
+and color quantization.  This code can be omitted if not needed.
+
+A wide range of quality vs. speed tradeoffs are possible in JPEG processing,
+and even more so in decompression postprocessing.  The decompression library
+provides multiple implementations that cover most of the useful tradeoffs,
+ranging from very-high-quality down to fast-preview operation.  On the
+compression side we have generally not provided low-quality choices, since
+compression is normally less time-critical.  It should be understood that the
+low-quality modes may not meet the JPEG standard's accuracy requirements;
+nonetheless, they are useful for viewers.
+
+
+*** System overview ***
+
+The compressor and decompressor are each divided into two main sections:
+the JPEG compressor or decompressor proper, and the preprocessing or
+postprocessing functions.  The interface between these two sections is the
+image data that Rec. ITU-T T.81 | ISO/IEC 10918-1 regards as its input or
+output: this data is in the colorspace to be used for compression, and it is
+downsampled to the sampling factors to be used.  The preprocessing and
+postprocessing steps are responsible for converting a normal image
+representation to or from this form.  (Those few applications that want to deal
+with YCbCr downsampled data can skip the preprocessing or postprocessing step.)
+
+Looking more closely, the compressor library contains the following main
+elements:
+
+  Preprocessing:
+    * Color space conversion (e.g., RGB to YCbCr).
+    * Edge expansion and downsampling.  Optionally, this step can do simple
+      smoothing --- this is often helpful for low-quality source data.
+  Lossy JPEG proper:
+    * MCU assembly, DCT, quantization.
+    * Entropy coding (sequential or progressive, Huffman or arithmetic).
+  Lossless JPEG proper:
+    * Point transform.
+    * Prediction, differencing.
+    * Entropy coding (Huffman or arithmetic)
+
+In addition to these modules we need overall control, marker generation,
+and support code (memory management & error handling).  There is also a
+module responsible for physically writing the output data --- typically
+this is just an interface to fwrite(), but some applications may need to
+do something else with the data.
+
+The decompressor library contains the following main elements:
+
+  Lossy JPEG proper:
+    * Entropy decoding (sequential or progressive, Huffman or arithmetic).
+    * Dequantization, inverse DCT, MCU disassembly.
+  Lossless JPEG proper:
+    * Entropy decoding (Huffman or arithmetic).
+    * Prediction, undifferencing.
+    * Point transform, sample size scaling.
+  Postprocessing:
+    * Upsampling.  Optionally, this step may be able to do more general
+      rescaling of the image.
+    * Color space conversion (e.g., YCbCr to RGB).  This step may also
+      provide gamma adjustment [ currently it does not ].
+    * Optional color quantization (e.g., reduction to 256 colors).
+    * Optional color precision reduction (e.g., 24-bit to 15-bit color).
+      [This feature is not currently implemented.]
+
+We also need overall control, marker parsing, and a data source module.
+The support code (memory management & error handling) can be shared with
+the compression half of the library.
+
+There may be several implementations of each of these elements, particularly
+in the decompressor, where a wide range of speed/quality tradeoffs is very
+useful.  It must be understood that some of the best speedups involve
+merging adjacent steps in the pipeline.  For example, upsampling, color space
+conversion, and color quantization might all be done at once when using a
+low-quality ordered-dither technique.  The system architecture is designed to
+allow such merging where appropriate.
+
+
+Note: it is convenient to regard edge expansion (padding to block boundaries)
+as a preprocessing/postprocessing function, even though
+Rec. ITU-T T.81 | ISO/IEC 10918-1 includes it in compression/decompression.  We
+do this because downsampling/upsampling can be simplified a little if they work
+on padded data: it's not necessary to have special cases at the right and
+bottom edges.  Therefore the interface buffer is always an integral number of
+blocks wide and high, and we expect compression preprocessing to pad the source
+data properly.  Padding will occur only to the next block (8-sample) boundary.
+In an interleaved-scan situation, additional dummy blocks may be used to fill
+out MCUs, but the MCU assembly and disassembly logic will create or discard
+these blocks internally.  (This is advantageous for speed reasons, since we
+avoid DCTing the dummy blocks.  It also permits a small reduction in file size,
+because the compressor can choose dummy block contents so as to minimize their
+size in compressed form.  Finally, it makes the interface buffer specification
+independent of whether the file is actually interleaved or not.)  Applications
+that wish to deal directly with the downsampled data must provide similar
+buffering and padding for odd-sized images.
+
+
+*** Poor man's object-oriented programming ***
+
+It should be clear by now that we have a lot of quasi-independent processing
+steps, many of which have several possible behaviors.  To avoid cluttering the
+code with lots of switch statements, we use a simple form of object-style
+programming to separate out the different possibilities.
+
+For example, two different color quantization algorithms could be implemented
+as two separate modules that present the same external interface; at runtime,
+the calling code will access the proper module indirectly through an "object".
+
+We can get the limited features we need while staying within portable C.
+The basic tool is a function pointer.  An "object" is just a struct
+containing one or more function pointer fields, each of which corresponds to
+a method name in real object-oriented languages.  During initialization we
+fill in the function pointers with references to whichever module we have
+determined we need to use in this run.  Then invocation of the module is done
+by indirecting through a function pointer; on most machines this is no more
+expensive than a switch statement, which would be the only other way of
+making the required run-time choice.  The really significant benefit, of
+course, is keeping the source code clean and well structured.
+
+We can also arrange to have private storage that varies between different
+implementations of the same kind of object.  We do this by making all the
+module-specific object structs be separately allocated entities, which will
+be accessed via pointers in the master compression or decompression struct.
+The "public" fields or methods for a given kind of object are specified by
+a commonly known struct.  But a module's initialization code can allocate
+a larger struct that contains the common struct as its first member, plus
+additional private fields.  With appropriate pointer casting, the module's
+internal functions can access these private fields.  (For a simple example,
+see jdatadst.c, which implements the external interface specified by struct
+jpeg_destination_mgr, but adds extra fields.)
+
+(Of course this would all be a lot easier if we were using C++, but we are
+not yet prepared to assume that everyone has a C++ compiler.)
+
+An important benefit of this scheme is that it is easy to provide multiple
+versions of any method, each tuned to a particular case.  While a lot of
+precalculation might be done to select an optimal implementation of a method,
+the cost per invocation is constant.  For example, the upsampling step might
+have a "generic" method, plus one or more "hardwired" methods for the most
+popular sampling factors; the hardwired methods would be faster because they'd
+use straight-line code instead of for-loops.  The cost to determine which
+method to use is paid only once, at startup, and the selection criteria are
+hidden from the callers of the method.
+
+This plan differs a little bit from usual object-oriented structures, in that
+only one instance of each object class will exist during execution.  The
+reason for having the class structure is that on different runs we may create
+different instances (choose to execute different modules).  You can think of
+the term "method" as denoting the common interface presented by a particular
+set of interchangeable functions, and "object" as denoting a group of related
+methods, or the total shared interface behavior of a group of modules.
+
+
+*** Overall control structure ***
+
+We previously mentioned the need for overall control logic in the compression
+and decompression libraries.  In IJG implementations prior to v5, overall
+control was mostly provided by "pipeline control" modules, which proved to be
+large, unwieldy, and hard to understand.  To improve the situation, the
+control logic has been subdivided into multiple modules.  The control modules
+consist of:
+
+1. Master control for module selection and initialization.  This has two
+responsibilities:
+
+   1A.  Startup initialization at the beginning of image processing.
+        The individual processing modules to be used in this run are selected
+        and given initialization calls.
+
+   1B.  Per-pass control.  This determines how many passes will be performed
+        and calls each active processing module to configure itself
+        appropriately at the beginning of each pass.  End-of-pass processing,
+        where necessary, is also invoked from the master control module.
+
+   Method selection is partially distributed, in that a particular processing
+   module may contain several possible implementations of a particular method,
+   which it will select among when given its initialization call.  The master
+   control code need only be concerned with decisions that affect more than
+   one module.
+
+2. Data buffering control.  A separate control module exists for each
+   inter-processing-step data buffer.  This module is responsible for
+   invoking the processing steps that write or read that data buffer.
+
+Each buffer controller sees the world as follows:
+
+input data => processing step A => buffer => processing step B => output data
+                      |              |               |
+              ------------------ controller ------------------
+
+The controller knows the dataflow requirements of steps A and B: how much data
+they want to accept in one chunk and how much they output in one chunk.  Its
+function is to manage its buffer and call A and B at the proper times.
+
+A data buffer control module may itself be viewed as a processing step by a
+higher-level control module; thus the control modules form a binary tree with
+elementary processing steps at the leaves of the tree.
+
+The control modules are objects.  A considerable amount of flexibility can
+be had by replacing implementations of a control module.  For example:
+* Merging of adjacent steps in the pipeline is done by replacing a control
+  module and its pair of processing-step modules with a single processing-
+  step module.  (Hence the possible merges are determined by the tree of
+  control modules.)
+* In some processing modes, a given interstep buffer need only be a "strip"
+  buffer large enough to accommodate the desired data chunk sizes.  In other
+  modes, a full-image buffer is needed and several passes are required.
+  The control module determines which kind of buffer is used and manipulates
+  virtual array buffers as needed.  One or both processing steps may be
+  unaware of the multi-pass behavior.
+
+In theory, we might be able to make all of the data buffer controllers
+interchangeable and provide just one set of implementations for all.  In
+practice, each one contains considerable special-case processing for its
+particular job.  The buffer controller concept should be regarded as an
+overall system structuring principle, not as a complete description of the
+task performed by any one controller.
+
+
+*** Compression object structure ***
+
+Here is a sketch of the logical structure of the JPEG compression library in
+lossy mode:
+
+                                                 |-- Colorspace conversion
+                  |-- Preprocessing controller --|
+                  |                              |-- Downsampling
+Main controller --|
+                  |                            |-- Forward DCT, quantize
+                  |-- Coefficient controller --|
+                                               |-- Entropy encoding
+
+... and in lossless mode:
+
+                                                 |-- Colorspace conversion
+                  |-- Preprocessing controller --|
+                  |                              |-- Downsampling
+Main controller --|
+                  |                           |-- Point transform
+                  |                           |
+                  |-- Difference controller --|-- Prediction, differencing
+                                              |
+                                              |-- Lossless mode entropy
+                                                  encoding
+
+This sketch also describes the flow of control (subroutine calls) during
+typical image data processing.  Each of the components shown in the diagram is
+an "object" which may have several different implementations available.  One
+or more source code files contain the actual implementation(s) of each object.
+
+The objects shown above are:
+
+* Main controller: buffer controller for the subsampled-data buffer, which
+  holds the preprocessed input data.  This controller invokes preprocessing to
+  fill the subsampled-data buffer, and JPEG compression to empty it.  There is
+  usually no need for a full-image buffer here; a strip buffer is adequate.
+
+* Preprocessing controller: buffer controller for the downsampling input data
+  buffer, which lies between colorspace conversion and downsampling.  Note
+  that a unified conversion/downsampling module would probably replace this
+  controller entirely.
+
+* Colorspace conversion: converts application image data into the desired
+  JPEG color space; also changes the data from pixel-interleaved layout to
+  separate component planes.  Processes one pixel row at a time.
+
+* Downsampling: performs reduction of chroma components as required.
+  Optionally may perform pixel-level smoothing as well.  Processes a "row
+  group" at a time, where a row group is defined as Vmax pixel rows of each
+  component before downsampling, and Vk sample rows afterwards (remember Vk
+  differs across components).  Some downsampling or smoothing algorithms may
+  require context rows above and below the current row group; the
+  preprocessing controller is responsible for supplying these rows via proper
+  buffering.  The downsampler is responsible for edge expansion at the right
+  edge (i.e., extending each sample row to a multiple of 8 samples); but the
+  preprocessing controller is responsible for vertical edge expansion (i.e.,
+  duplicating the bottom sample row as needed to make a multiple of 8 rows).
+
+* Coefficient controller: buffer controller for the DCT-coefficient data.
+  This controller handles MCU assembly, including insertion of dummy DCT
+  blocks when needed at the right or bottom edge.  When performing
+  Huffman-code optimization or emitting a multiscan JPEG file, this
+  controller is responsible for buffering the full image.  The equivalent of
+  one fully interleaved MCU row of subsampled data is processed per call,
+  even when the JPEG file is noninterleaved.
+
+* Forward DCT and quantization: Perform DCT, quantize, and emit coefficients.
+  Works on one or more DCT blocks at a time.  (Note: the coefficients are now
+  emitted in normal array order, which the entropy encoder is expected to
+  convert to zigzag order as necessary.  Prior versions of the IJG code did
+  the conversion to zigzag order within the quantization step.)
+
+* Entropy encoding: Perform Huffman or arithmetic entropy coding and emit the
+  coded data to the data destination module.  Works on one MCU per call.
+  For progressive JPEG, the same DCT blocks are fed to the entropy coder
+  during each pass, and the coder must emit the appropriate subset of
+  coefficients.
+
+* Difference controller: buffer controller for the spatial difference data.
+  When emitting a multiscan JPEG file, this controller is responsible for
+  buffering the full image.  The equivalent of one fully interleaved MCU row
+  of subsampled data is processed per call, even when the JPEG file is
+  noninterleaved.
+
+* Point transform: Downscale the data by the point transform value.
+
+* Prediction and differencing: Calculate the predictor and subtract it
+  from the input.  Works on one scanline per call.  The difference
+  controller supplies the prior scanline, which is used for prediction.
+
+* Lossless mode entropy encoding: Perform Huffman or arithmetic entropy coding
+  and emit the coded data to the data destination module.  This module handles
+  MCU assembly.  Works on one MCU row per call.
+
+In addition to the above objects, the compression library includes these
+objects:
+
+* Master control: determines the number of passes required, controls overall
+  and per-pass initialization of the other modules.
+
+* Marker writing: generates JPEG markers (except for RSTn, which is emitted
+  by the entropy encoder when needed).
+
+* Data destination manager: writes the output JPEG datastream to its final
+  destination (e.g., a file).  The destination manager supplied with the
+  library knows how to write to a stdio stream or to a memory buffer;
+  for other behaviors, the surrounding application may provide its own
+  destination manager.
+
+* Memory manager: allocates and releases memory, controls virtual arrays
+  (with backing store management, where required).
+
+* Error handler: performs formatting and output of error and trace messages;
+  determines handling of nonfatal errors.  The surrounding application may
+  override some or all of this object's methods to change error handling.
+
+* Progress monitor: supports output of "percent-done" progress reports.
+  This object represents an optional callback to the surrounding application:
+  if wanted, it must be supplied by the application.
+
+The error handler, destination manager, and progress monitor objects are
+defined as separate objects in order to simplify application-specific
+customization of the JPEG library.  A surrounding application may override
+individual methods or supply its own all-new implementation of one of these
+objects.  The object interfaces for these objects are therefore treated as
+part of the application interface of the library, whereas the other objects
+are internal to the library.
+
+The error handler and memory manager are shared by JPEG compression and
+decompression; the progress monitor, if used, may be shared as well.
+
+
+*** Decompression object structure ***
+
+Here is a sketch of the logical structure of the JPEG decompression library in
+lossy mode:
+
+                                               |-- Entropy decoding
+                  |-- Coefficient controller --|
+                  |                            |-- Dequantize, Inverse DCT
+Main controller --|
+                  |                               |-- Upsampling
+                  |-- Postprocessing controller --|   |-- Colorspace conversion
+                                                  |-- Color quantization
+                                                  |-- Color precision reduction
+
+... and in lossless mode:
+
+                                              |-- Lossless mode entropy
+                                              |   decoding
+                                              |
+                  |-- Difference controller --|-- Prediction, undifferencing
+                  |                           |
+                  |                           |-- Point transform, sample size
+                  |                               scaling
+Main controller --|
+                  |                               |-- Upsampling
+                  |-- Postprocessing controller --|
+                                                  |-- Color precision reduction
+
+As before, this diagram also represents typical control flow.  The objects
+shown are:
+
+* Main controller: buffer controller for the subsampled-data buffer, which
+  holds the output of JPEG decompression proper.  This controller's primary
+  task is to feed the postprocessing procedure.  Some upsampling algorithms
+  may require context rows above and below the current row group; when this
+  is true, the main controller is responsible for managing its buffer so as
+  to make context rows available.  In the current design, the main buffer is
+  always a strip buffer; a full-image buffer is never required.
+
+* Coefficient controller: buffer controller for the DCT-coefficient data.
+  This controller handles MCU disassembly, including deletion of any dummy
+  DCT blocks at the right or bottom edge.  When reading a multiscan JPEG
+  file, this controller is responsible for buffering the full image.
+  (Buffering DCT coefficients, rather than samples, is necessary to support
+  progressive JPEG.)  The equivalent of one fully interleaved MCU row of
+  subsampled data is processed per call, even when the source JPEG file is
+  noninterleaved.
+
+* Entropy decoding: Read coded data from the data source module and perform
+  Huffman or arithmetic entropy decoding.  Works on one MCU per call.
+  For progressive JPEG decoding, the coefficient controller supplies the prior
+  coefficients of each MCU (initially all zeroes), which the entropy decoder
+  modifies in each scan.
+
+* Dequantization and inverse DCT: like it says.  Note that the coefficients
+  buffered by the coefficient controller have NOT been dequantized; we
+  merge dequantization and inverse DCT into a single step for speed reasons.
+  When scaled-down output is asked for, simplified DCT algorithms may be used
+  that emit fewer samples per DCT block, not the full 8x8.  Works on one DCT
+  block at a time.
+
+* Difference controller: buffer controller for the spatial difference data.
+  When reading a multiscan JPEG file, this controller is responsible for
+  buffering the full image. The equivalent of one fully interleaved MCU row
+  is processed per call, even when the source JPEG file is noninterleaved.
+
+* Lossless mode entropy decoding: Read coded data from the data source module
+  and perform Huffman or arithmetic entropy decoding.  Works on one MCU row per
+  call.
+
+* Prediction and undifferencing: Calculate the predictor and add it to the
+  decoded difference.  Works on one scanline per call.  The difference
+  controller supplies the prior scanline, which is used for prediction.
+
+* Point transform and sample size scaling: Upscale the data by the point
+  transform value and downscale it to fit into the compiled-in sample size.
+
+* Postprocessing controller: buffer controller for the color quantization
+  input buffer, when quantization is in use.  (Without quantization, this
+  controller just calls the upsampler.)  For two-pass quantization, this
+  controller is responsible for buffering the full-image data.
+
+* Upsampling: restores chroma components to full size.  (May support more
+  general output rescaling, too.  Note that if undersized DCT outputs have
+  been emitted by the DCT module, this module must adjust so that properly
+  sized outputs are created.)  Works on one row group at a time.  This module
+  also calls the color conversion module, so its top level is effectively a
+  buffer controller for the upsampling->color conversion buffer.  However, in
+  all but the highest-quality operating modes, upsampling and color
+  conversion are likely to be merged into a single step.
+
+* Colorspace conversion: convert from JPEG color space to output color space,
+  and change data layout from separate component planes to pixel-interleaved.
+  Works on one pixel row at a time.
+
+* Color quantization: reduce the data to colormapped form, using either an
+  externally specified colormap or an internally generated one.  This module
+  is not used for full-color output.  Works on one pixel row at a time; may
+  require two passes to generate a color map.  Note that the output will
+  always be a single component representing colormap indexes.  In the current
+  design, the output values are JSAMPLEs, J12SAMPLEs, or J16SAMPLEs, so the
+  library cannot quantize to more than 256 colors when using 8-bit data
+  precision.  This is unlikely to be a problem in practice.
+
+* Color reduction: this module handles color precision reduction, e.g.,
+  generating 15-bit color (5 bits/primary) from JPEG's 24-bit output.
+  Not quite clear yet how this should be handled... should we merge it with
+  colorspace conversion???
+
+Note that some high-speed operating modes might condense the entire
+postprocessing sequence to a single module (upsample, color convert, and
+quantize in one step).
+
+In addition to the above objects, the decompression library includes these
+objects:
+
+* Master control: determines the number of passes required, controls overall
+  and per-pass initialization of the other modules.  This is subdivided into
+  input and output control: jdinput.c controls only input-side processing,
+  while jdmaster.c handles overall initialization and output-side control.
+
+* Marker reading: decodes JPEG markers (except for RSTn).
+
+* Data source manager: supplies the input JPEG datastream.  The source
+  manager supplied with the library knows how to read from a stdio stream
+  or from a memory buffer;  for other behaviors, the surrounding application
+  may provide its own source manager.
+
+* Memory manager: same as for compression library.
+
+* Error handler: same as for compression library.
+
+* Progress monitor: same as for compression library.
+
+As with compression, the data source manager, error handler, and progress
+monitor are candidates for replacement by a surrounding application.
+
+
+*** Decompression input and output separation ***
+
+To support efficient incremental display of progressive JPEG files, the
+decompressor is divided into two sections that can run independently:
+
+1. Data input includes marker parsing, entropy decoding, and input into the
+   coefficient controller's DCT coefficient buffer.  Note that this
+   processing is relatively cheap and fast.
+
+2. Data output reads from the DCT coefficient buffer and performs the IDCT
+   and all postprocessing steps.
+
+For a progressive JPEG file, the data input processing is allowed to get
+arbitrarily far ahead of the data output processing.  (This occurs only
+if the application calls jpeg_consume_input(); otherwise input and output
+run in lockstep, since the input section is called only when the output
+section needs more data.)  In this way the application can avoid making
+extra display passes when data is arriving faster than the display pass
+can run.  Furthermore, it is possible to abort an output pass without
+losing anything, since the coefficient buffer is read-only as far as the
+output section is concerned.  See libjpeg.txt for more detail.
+
+A full-image coefficient array is only created if the JPEG file has multiple
+scans (or if the application specifies buffered-image mode anyway).  When
+reading a single-scan file, the coefficient controller normally creates only
+a one-MCU buffer, so input and output processing must run in lockstep in this
+case.  jpeg_consume_input() is effectively a no-op in this situation.
+
+The main impact of dividing the decompressor in this fashion is that we must
+be very careful with shared variables in the cinfo data structure.  Each
+variable that can change during the course of decompression must be
+classified as belonging to data input or data output, and each section must
+look only at its own variables.  For example, the data output section may not
+depend on any of the variables that describe the current scan in the JPEG
+file, because these may change as the data input section advances into a new
+scan.
+
+The progress monitor is (somewhat arbitrarily) defined to treat input of the
+file as one pass when buffered-image mode is not used, and to ignore data
+input work completely when buffered-image mode is used.  Note that the
+library has no reliable way to predict the number of passes when dealing
+with a progressive JPEG file, nor can it predict the number of output passes
+in buffered-image mode.  So the work estimate is inherently bogus anyway.
+
+No comparable division is currently made in the compression library, because
+there isn't any real need for it.
+
+
+*** Data formats ***
+
+Arrays of 8-bit pixel sample values use the following data structure:
+
+    typedef something JSAMPLE;          a pixel component value, 0..MAXJSAMPLE
+    typedef JSAMPLE *JSAMPROW;          ptr to a row of samples
+    typedef JSAMPROW *JSAMPARRAY;       ptr to a list of rows
+    typedef JSAMPARRAY *JSAMPIMAGE;     ptr to a list of color-component arrays
+
+Arrays of 12-bit pixel sample values use the following data structure:
+
+    typedef something J12SAMPLE;        a pixel component value, 0..MAXJ12SAMPLE
+    typedef J12SAMPLE *J12SAMPROW;      ptr to a row of samples
+    typedef J12SAMPROW *J12SAMPARRAY;   ptr to a list of rows
+    typedef J12SAMPARRAY *J12SAMPIMAGE; ptr to a list of color-component arrays
+
+Arrays of 16-bit pixel sample values use the following data structure:
+
+    typedef something J16SAMPLE;        a pixel component value, 0..MAXJ16SAMPLE
+    typedef J16SAMPLE *J16SAMPROW;      ptr to a row of samples
+    typedef J16SAMPROW *J16SAMPARRAY;   ptr to a list of rows
+    typedef J16SAMPARRAY *J16SAMPIMAGE; ptr to a list of color-component arrays
+
+The basic element type JSAMPLE (8-bit sample) will be unsigned char, the basic
+element type J12SAMPLE (12-bit sample) will be short, and the basic element
+type J16SAMPLE (16-bit sample) will be unsigned short.
+
+With these conventions, J*SAMPLE values can be assumed to be >= 0.  This helps
+simplify correct rounding during downsampling, etc.  The JPEG standard's
+specification that 8-bit sample values run from -128..127 is accommodated by
+subtracting 128 from the sample value in the DCT step.  Similarly, during
+decompression the output of the IDCT step will be immediately shifted back to
+0..255.  (NOTE: different values are required when 12-bit samples are in use.
+When 8-bit samples are in use, the code uses MAXJSAMPLE and CENTERJSAMPLE,
+which are defined as 255 and 128 respectively.  When 12-bit samples are in use,
+the code uses MAXJ12SAMPLE and CENTERJ12SAMPLE, which are defined as 4095 and
+2048 respectively.  When 16-bit samples are in use, the code uses MAXJ16SAMPLE
+and CENTERJ16SAMPLE, which are defined as 65535 and 32768 respectively.)
+
+We use a pointer per row, rather than a two-dimensional J*SAMPLE array.  This
+choice costs only a small amount of memory and has several benefits:
+* Code using the data structure doesn't need to know the allocated width of
+  the rows.  This simplifies edge expansion/compression, since we can work
+  in an array that's wider than the logical picture width.
+* Indexing doesn't require multiplication; this is a performance win on many
+  machines.
+* Arrays with more than 64K total elements can be supported even on machines
+  where malloc() cannot allocate chunks larger than 64K.
+* The rows forming a component array may be allocated at different times
+  without extra copying.  This trick allows some speedups in smoothing steps
+  that need access to the previous and next rows.
+
+Note that each color component is stored in a separate array; we don't use the
+traditional layout in which the components of a pixel are stored together.
+This simplifies coding of modules that work on each component independently,
+because they don't need to know how many components there are.  Furthermore,
+we can read or write each component to a temporary file independently, which
+is helpful when dealing with noninterleaved JPEG files.
+
+In general, a specific sample value is accessed by code such as
+        image[colorcomponent][row][col]
+where col is measured from the image left edge, but row is measured from the
+first sample row currently in memory.  Either of the first two indexings can
+be precomputed by copying the relevant pointer.
+
+
+Since most image-processing applications prefer to work on images in which
+the components of a pixel are stored together, the data passed to or from the
+surrounding application uses the traditional convention: a single pixel is
+represented by N consecutive J*SAMPLE values, and an image row is an array of
+(# of color components)*(image width) J*SAMPLEs.  One or more rows of data can
+be represented by a pointer of type J*SAMPARRAY in this scheme.  This scheme is
+converted to component-wise storage inside the JPEG library.  (Applications
+that want to skip JPEG preprocessing or postprocessing will have to contend
+with component-wise storage.)
+
+
+Arrays of DCT-coefficient values use the following data structure:
+
+    typedef short JCOEF;                a 16-bit signed integer
+    typedef JCOEF JBLOCK[DCTSIZE2];     an 8x8 block of coefficients
+    typedef JBLOCK *JBLOCKROW;          ptr to one horizontal row of 8x8 blocks
+    typedef JBLOCKROW *JBLOCKARRAY;     ptr to a list of such rows
+    typedef JBLOCKARRAY *JBLOCKIMAGE;   ptr to a list of color component arrays
+
+The underlying type is at least a 16-bit signed integer; while "short" is big
+enough on all machines of interest, on some machines it is preferable to use
+"int" for speed reasons, despite the storage cost.  Coefficients are grouped
+into 8x8 blocks (but we always use #defines DCTSIZE and DCTSIZE2 rather than
+"8" and "64").
+
+The contents of a coefficient block may be in either "natural" or zigzagged
+order, and may be true values or divided by the quantization coefficients,
+depending on where the block is in the processing pipeline.  In the current
+library, coefficient blocks are kept in natural order everywhere; the entropy
+codecs zigzag or dezigzag the data as it is written or read.  The blocks
+contain quantized coefficients everywhere outside the DCT/IDCT subsystems.
+(This latter decision may need to be revisited to support variable
+quantization a la JPEG Part 3.)
+
+Notice that the allocation unit is now a row of 8x8 blocks, corresponding to
+eight rows of samples.  Otherwise the structure is much the same as for
+samples, and for the same reasons.
+
+
+*** Suspendable processing ***
+
+In some applications it is desirable to use the JPEG library as an
+incremental, memory-to-memory filter.  In this situation the data source or
+destination may be a limited-size buffer, and we can't rely on being able to
+empty or refill the buffer at arbitrary times.  Instead the application would
+like to have control return from the library at buffer overflow/underrun, and
+then resume compression or decompression at a later time.
+
+This scenario is supported for simple cases.  (For anything more complex, we
+recommend that the application "bite the bullet" and develop real multitasking
+capability.)  The libjpeg.txt file goes into more detail about the usage and
+limitations of this capability; here we address the implications for library
+structure.
+
+The essence of the problem is that the entropy codec (coder or decoder) must
+be prepared to stop at arbitrary times.  In turn, the controllers that call
+the entropy codec must be able to stop before having produced or consumed all
+the data that they normally would handle in one call.  That part is reasonably
+straightforward: we make the controller call interfaces include "progress
+counters" which indicate the number of data chunks successfully processed, and
+we require callers to test the counter rather than just assume all of the data
+was processed.
+
+Rather than trying to restart at an arbitrary point, the current Huffman
+codecs are designed to restart at the beginning of the current MCU after a
+suspension due to buffer overflow/underrun.  At the start of each call, the
+codec's internal state is loaded from permanent storage (in the JPEG object
+structures) into local variables.  On successful completion of the MCU, the
+permanent state is updated.  (This copying is not very expensive, and may even
+lead to *improved* performance if the local variables can be registerized.)
+If a suspension occurs, the codec simply returns without updating the state,
+thus effectively reverting to the start of the MCU.  Note that this implies
+leaving some data unprocessed in the source/destination buffer (ie, the
+compressed partial MCU).  The data source/destination module interfaces are
+specified so as to make this possible.  This also implies that the data buffer
+must be large enough to hold a worst-case compressed MCU; a couple thousand
+bytes should be enough.
+
+In a successive-approximation AC refinement scan, the progressive Huffman
+decoder has to be able to undo assignments of newly nonzero coefficients if it
+suspends before the MCU is complete, since decoding requires distinguishing
+previously-zero and previously-nonzero coefficients.  This is a bit tedious
+but probably won't have much effect on performance.  Other variants of Huffman
+decoding need not worry about this, since they will just store the same values
+again if forced to repeat the MCU.
+
+This approach would probably not work for an arithmetic codec, since its
+modifiable state is quite large and couldn't be copied cheaply.  Instead it
+would have to suspend and resume exactly at the point of the buffer end.
+
+The JPEG marker reader is designed to cope with suspension at an arbitrary
+point.  It does so by backing up to the start of the marker parameter segment,
+so the data buffer must be big enough to hold the largest marker of interest.
+Again, a couple KB should be adequate.  (A special "skip" convention is used
+to bypass COM and APPn markers, so these can be larger than the buffer size
+without causing problems; otherwise a 64K buffer would be needed in the worst
+case.)
+
+The JPEG marker writer currently does *not* cope with suspension.
+We feel that this is not necessary; it is much easier simply to require
+the application to ensure there is enough buffer space before starting.  (An
+empty 2K buffer is more than sufficient for the header markers; and ensuring
+there are a dozen or two bytes available before calling jpeg_finish_compress()
+will suffice for the trailer.)  This would not work for writing multi-scan
+JPEG files, but we simply do not intend to support that capability with
+suspension.
+
+
+*** Memory manager services ***
+
+The JPEG library's memory manager controls allocation and deallocation of
+memory, and it manages large "virtual" data arrays on machines where the
+operating system does not provide virtual memory.  Note that the same
+memory manager serves both compression and decompression operations.
+
+In all cases, allocated objects are tied to a particular compression or
+decompression master record, and they will be released when that master
+record is destroyed.
+
+The memory manager does not provide explicit deallocation of objects.
+Instead, objects are created in "pools" of free storage, and a whole pool
+can be freed at once.  This approach helps prevent storage-leak bugs, and
+it speeds up operations whenever malloc/free are slow (as they often are).
+The pools can be regarded as lifetime identifiers for objects.  Two
+pools/lifetimes are defined:
+  * JPOOL_PERMANENT     lasts until master record is destroyed
+  * JPOOL_IMAGE         lasts until done with image (JPEG datastream)
+Permanent lifetime is used for parameters and tables that should be carried
+across from one datastream to another; this includes all application-visible
+parameters.  Image lifetime is used for everything else.  (A third lifetime,
+JPOOL_PASS = one processing pass, was originally planned.  However it was
+dropped as not being worthwhile.  The actual usage patterns are such that the
+peak memory usage would be about the same anyway; and having per-pass storage
+substantially complicates the virtual memory allocation rules --- see below.)
+
+The memory manager deals with three kinds of object:
+1. "Small" objects.  Typically these require no more than 10K-20K total.
+2. "Large" objects.  These may require tens to hundreds of K depending on
+   image size.  Semantically they behave the same as small objects, but we
+   distinguish them because pool allocation heuristics may differ for large and
+   small objects (historically, large objects were also referenced by far
+   pointers on MS-DOS machines.)  Note that individual "large" objects cannot
+   exceed the size allowed by type size_t, which may be 64K or less on some
+   machines.
+3. "Virtual" objects.  These are large 2-D arrays of J*SAMPLEs or JBLOCKs
+   (typically large enough for the entire image being processed).  The
+   memory manager provides stripwise access to these arrays.  On machines
+   without virtual memory, the rest of the array may be swapped out to a
+   temporary file.
+
+(Note: J*SAMPARRAY and JBLOCKARRAY data structures are a combination of large
+objects for the data proper and small objects for the row pointers.  For
+convenience and speed, the memory manager provides single routines to create
+these structures.  Similarly, virtual arrays include a small control block
+and a J*SAMPARRAY or JBLOCKARRAY working buffer, all created with one call.)
+
+In the present implementation, virtual arrays are only permitted to have image
+lifespan.  (Permanent lifespan would not be reasonable, and pass lifespan is
+not very useful since a virtual array's raison d'etre is to store data for
+multiple passes through the image.)  We also expect that only "small" objects
+will be given permanent lifespan, though this restriction is not required by
+the memory manager.
+
+In a non-virtual-memory machine, some performance benefit can be gained by
+making the in-memory buffers for virtual arrays be as large as possible.
+(For small images, the buffers might fit entirely in memory, so blind
+swapping would be very wasteful.)  The memory manager will adjust the height
+of the buffers to fit within a prespecified maximum memory usage.  In order
+to do this in a reasonably optimal fashion, the manager needs to allocate all
+of the virtual arrays at once.  Therefore, there isn't a one-step allocation
+routine for virtual arrays; instead, there is a "request" routine that simply
+allocates the control block, and a "realize" routine (called just once) that
+determines space allocation and creates all of the actual buffers.  The
+realize routine must allow for space occupied by non-virtual large objects.
+(We don't bother to factor in the space needed for small objects, on the
+grounds that it isn't worth the trouble.)
+
+To support all this, we establish the following protocol for doing business
+with the memory manager:
+  1. Modules must request virtual arrays (which may have only image lifespan)
+     during the initial setup phase, i.e., in their jinit_xxx routines.
+  2. All "large" objects (including J*SAMPARRAYs and JBLOCKARRAYs) must also be
+     allocated during initial setup.
+  3. realize_virt_arrays will be called at the completion of initial setup.
+     The above conventions ensure that sufficient information is available
+     for it to choose a good size for virtual array buffers.
+Small objects of any lifespan may be allocated at any time.  We expect that
+the total space used for small objects will be small enough to be negligible
+in the realize_virt_arrays computation.
+
+In a virtual-memory machine, we simply pretend that the available space is
+infinite, thus causing realize_virt_arrays to decide that it can allocate all
+the virtual arrays as full-size in-memory buffers.  The overhead of the
+virtual-array access protocol is very small when no swapping occurs.
+
+A virtual array can be specified to be "pre-zeroed"; when this flag is set,
+never-yet-written sections of the array are set to zero before being made
+available to the caller.  If this flag is not set, never-written sections
+of the array contain garbage.  (This feature exists primarily because the
+equivalent logic would otherwise be needed in jdcoefct.c for progressive
+JPEG mode; we may as well make it available for possible other uses.)
+
+The first write pass on a virtual array is required to occur in top-to-bottom
+order; read passes, as well as any write passes after the first one, may
+access the array in any order.  This restriction exists partly to simplify
+the virtual array control logic, and partly because some file systems may not
+support seeking beyond the current end-of-file in a temporary file.  The main
+implication of this restriction is that rearrangement of rows (such as
+converting top-to-bottom data order to bottom-to-top) must be handled while
+reading data out of the virtual array, not while putting it in.
+
+
+*** Memory manager internal structure ***
+
+To isolate system dependencies as much as possible, we have broken the
+memory manager into two parts.  There is a reasonably system-independent
+"front end" (jmemmgr.c) and a "back end" that contains only the code
+likely to change across systems.  All of the memory management methods
+outlined above are implemented by the front end.  The back end provides
+the following routines for use by the front end (none of these routines
+are known to the rest of the JPEG code):
+
+jpeg_mem_init, jpeg_mem_term    system-dependent initialization/shutdown
+
+jpeg_get_small, jpeg_free_small interface to malloc and free library routines
+                                (or their equivalents)
+
+jpeg_get_large, jpeg_free_large historically was used to interface with
+                                FAR malloc/free on MS-DOS machines;  now the
+                                same as jpeg_get_small/jpeg_free_small
+
+jpeg_mem_available              estimate available memory
+
+jpeg_open_backing_store         create a backing-store object
+
+read_backing_store,             manipulate a backing-store object
+write_backing_store,
+close_backing_store
+
+On some systems there will be more than one type of backing-store object.
+jpeg_open_backing_store is responsible for choosing how to implement a given
+object.  The read/write/close routines are method pointers in the structure
+that describes a given object; this lets them be different for different object
+types.
+
+It may be necessary to ensure that backing store objects are explicitly
+released upon abnormal program termination.  To support this, we will expect
+the main program or surrounding application to arrange to call self_destruct
+(typically via jpeg_destroy) upon abnormal termination.  This may require a
+SIGINT signal handler or equivalent.  We don't want to have the back end module
+install its own signal handler, because that would pre-empt the surrounding
+application's ability to control signal handling.
+
+The IJG distribution includes several memory manager back end implementations.
+Usually the same back end should be suitable for all applications on a given
+system, but it is possible for an application to supply its own back end at
+need.
+
+
+*** Implications of DNL marker ***
+
+Some JPEG files may use a DNL marker to postpone definition of the image
+height (this would be useful for a fax-like scanner's output, for instance).
+In these files the SOF marker claims the image height is 0, and you only
+find out the true image height at the end of the first scan.
+
+We could read these files as follows:
+1. Upon seeing zero image height, replace it by 65535 (the maximum allowed).
+2. When the DNL is found, update the image height in the global image
+   descriptor.
+This implies that control modules must avoid making copies of the image
+height, and must re-test for termination after each MCU row.  This would
+be easy enough to do.
+
+In cases where image-size data structures are allocated, this approach will
+result in very inefficient use of virtual memory or much-larger-than-necessary
+temporary files.  This seems acceptable for something that probably won't be a
+mainstream usage.  People might have to forgo use of memory-hogging options
+(such as two-pass color quantization or noninterleaved JPEG files) if they
+want efficient conversion of such files.  (One could improve efficiency by
+demanding a user-supplied upper bound for the height, less than 65536; in most
+cases it could be much less.)
+
+The standard also permits the SOF marker to overestimate the image height,
+with a DNL to give the true, smaller height at the end of the first scan.
+This would solve the space problems if the overestimate wasn't too great.
+However, it implies that you don't even know whether DNL will be used.
+
+This leads to a couple of very serious objections:
+1. Testing for a DNL marker must occur in the inner loop of the decompressor's
+   Huffman decoder; this implies a speed penalty whether the feature is used
+   or not.
+2. There is no way to hide the last-minute change in image height from an
+   application using the decoder.  Thus *every* application using the IJG
+   library would suffer a complexity penalty whether it cared about DNL or
+   not.
+We currently do not support DNL because of these problems.
+
+A different approach is to insist that DNL-using files be preprocessed by a
+separate program that reads ahead to the DNL, then goes back and fixes the SOF
+marker.  This is a much simpler solution and is probably far more efficient.
+Even if one wants piped input, buffering the first scan of the JPEG file needs
+a lot smaller temp file than is implied by the maximum-height method.  For
+this approach we'd simply treat DNL as a no-op in the decompressor (at most,
+check that it matches the SOF image height).
+
+We will not worry about making the compressor capable of outputting DNL.
+Something similar to the first scheme above could be applied if anyone ever
+wants to make that work.
diff --git a/3rdparty/libjpeg-turbo/src/tjbench.c b/3rdparty/libjpeg-turbo/src/tjbench.c
new file mode 100644
index 0000000000..9dc6427880
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/tjbench.c
@@ -0,0 +1,1323 @@
+/*
+ * Copyright (C)2009-2019, 2021-2024 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+#include <errno.h>
+#include <limits.h>
+#if !defined(_MSC_VER) || _MSC_VER > 1600
+#include <stdint.h>
+#endif
+#include <cdjpeg.h>
+#include "./tjutil.h"
+#include "./turbojpeg.h"
+
+
+#define THROW(op, err) { \
+  printf("ERROR in line %d while %s:\n%s\n", __LINE__, op, err); \
+  retval = -1;  goto bailout; \
+}
+#define THROW_UNIX(m)  THROW(m, strerror(errno))
+
+static char tjErrorStr[JMSG_LENGTH_MAX] = "\0";
+static int tjErrorLine = -1, tjErrorCode = -1;
+
+#define THROW_TJG() { \
+  printf("ERROR in line %d\n%s\n", __LINE__, tj3GetErrorStr(NULL)); \
+  retval = -1;  goto bailout; \
+}
+
+#define THROW_TJ() { \
+  int _tjErrorCode = tj3GetErrorCode(handle); \
+  char *_tjErrorStr = tj3GetErrorStr(handle); \
+  \
+  if (!tj3Get(handle, TJPARAM_STOPONWARNING) && \
+      _tjErrorCode == TJERR_WARNING) { \
+    if (strncmp(tjErrorStr, _tjErrorStr, JMSG_LENGTH_MAX) || \
+        tjErrorCode != _tjErrorCode || tjErrorLine != __LINE__) { \
+      strncpy(tjErrorStr, _tjErrorStr, JMSG_LENGTH_MAX); \
+      tjErrorStr[JMSG_LENGTH_MAX - 1] = '\0'; \
+      tjErrorCode = _tjErrorCode; \
+      tjErrorLine = __LINE__; \
+      printf("WARNING in line %d:\n%s\n", __LINE__, _tjErrorStr); \
+    } \
+  } else { \
+    printf("%s in line %d:\n%s\n", \
+           _tjErrorCode == TJERR_WARNING ? "WARNING" : "ERROR", __LINE__, \
+           _tjErrorStr); \
+    retval = -1;  goto bailout; \
+  } \
+}
+
+#define IS_CROPPED(cr)  (cr.x != 0 || cr.y != 0 || cr.w != 0 || cr.h != 0)
+
+#define CROPPED_WIDTH(width) \
+  (IS_CROPPED(cr) ? (cr.w != 0 ? cr.w : TJSCALED(width, sf) - cr.x) : \
+                    TJSCALED(width, sf))
+
+#define CROPPED_HEIGHT(height) \
+  (IS_CROPPED(cr) ? (cr.h != 0 ? cr.h : TJSCALED(height, sf) - cr.y) : \
+                    TJSCALED(height, sf))
+
+static int stopOnWarning = 0, bottomUp = 0, noRealloc = 1, fastUpsample = 0,
+  fastDCT = 0, optimize = 0, progressive = 0, limitScans = 0, maxMemory = 0,
+  maxPixels = 0, arithmetic = 0, lossless = 0, restartIntervalBlocks = 0,
+  restartIntervalRows = 0;
+static int precision = 8, sampleSize, compOnly = 0, decompOnly = 0, doYUV = 0,
+  quiet = 0, doTile = 0, pf = TJPF_BGR, yuvAlign = 1, doWrite = 1;
+static char *ext = "ppm";
+static const char *pixFormatStr[TJ_NUMPF] = {
+  "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY", "", "", "", "", "CMYK"
+};
+static const char *subNameLong[TJ_NUMSAMP] = {
+  "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1", "4:4:1"
+};
+static const char *csName[TJ_NUMCS] = {
+  "RGB", "YCbCr", "GRAY", "CMYK", "YCCK"
+};
+static const char *subName[TJ_NUMSAMP] = {
+  "444", "422", "420", "GRAY", "440", "411", "441"
+};
+static tjscalingfactor *scalingFactors = NULL, sf = { 1, 1 };
+static tjregion cr = { 0, 0, 0, 0 };
+static int nsf = 0, xformOp = TJXOP_NONE, xformOpt = 0;
+static int (*customFilter) (short *, tjregion, tjregion, int, int,
+                            tjtransform *);
+static double benchTime = 5.0, warmup = 1.0;
+
+
+static char *formatName(int subsamp, int cs, char *buf)
+{
+  if (quiet == 1) {
+    if (lossless)
+      SNPRINTF(buf, 80, "%-2d/LOSSLESS   ", precision);
+    else if (subsamp == TJSAMP_UNKNOWN)
+      SNPRINTF(buf, 80, "%-2d/%-5s      ", precision, csName[cs]);
+    else
+      SNPRINTF(buf, 80, "%-2d/%-5s/%-5s", precision, csName[cs],
+               subNameLong[subsamp]);
+    return buf;
+  } else {
+    if (lossless)
+      return (char *)"Lossless";
+    else if (subsamp == TJSAMP_UNKNOWN)
+      return (char *)csName[cs];
+    else {
+      SNPRINTF(buf, 80, "%s %s", csName[cs], subNameLong[subsamp]);
+      return buf;
+    }
+  }
+}
+
+
+static char *sigfig(double val, int figs, char *buf, int len)
+{
+  char format[80];
+  int digitsAfterDecimal = figs - (int)ceil(log10(fabs(val)));
+
+  if (digitsAfterDecimal < 1)
+    SNPRINTF(format, 80, "%%.0f");
+  else
+    SNPRINTF(format, 80, "%%.%df", digitsAfterDecimal);
+  SNPRINTF(buf, len, format, val);
+  return buf;
+}
+
+
+/* Custom DCT filter which produces a negative of the image */
+static int dummyDCTFilter(short *coeffs, tjregion arrayRegion,
+                          tjregion planeRegion, int componentIndex,
+                          int transformIndex, tjtransform *transform)
+{
+  int i;
+
+  for (i = 0; i < arrayRegion.w * arrayRegion.h; i++)
+    coeffs[i] = -coeffs[i];
+  return 0;
+}
+
+
+/* Decompression test */
+static int decomp(unsigned char **jpegBufs, size_t *jpegSizes, void *dstBuf,
+                  int w, int h, int subsamp, int jpegQual, char *fileName,
+                  int tilew, int tileh)
+{
+  char tempStr[1024], sizeStr[24] = "\0", qualStr[16] = "\0";
+  FILE *file = NULL;
+  tjhandle handle = NULL;
+  int i, row, col, iter = 0, dstBufAlloc = 0, retval = 0;
+  double elapsed, elapsedDecode;
+  int ps = tjPixelSize[pf];
+  int scaledw, scaledh, pitch;
+  int ntilesw = (w + tilew - 1) / tilew, ntilesh = (h + tileh - 1) / tileh;
+  unsigned char *dstPtr, *dstPtr2, *yuvBuf = NULL;
+
+  if (lossless) sf = TJUNSCALED;
+
+  scaledw = TJSCALED(w, sf);
+  scaledh = TJSCALED(h, sf);
+
+  if (jpegQual > 0) {
+    SNPRINTF(qualStr, 16, "_%s%d", lossless ? "PSV" : "Q", jpegQual);
+    qualStr[15] = 0;
+  }
+
+  if ((handle = tj3Init(TJINIT_DECOMPRESS)) == NULL)
+    THROW_TJG();
+  if (tj3Set(handle, TJPARAM_STOPONWARNING, stopOnWarning) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_BOTTOMUP, bottomUp) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_FASTUPSAMPLE, fastUpsample) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_FASTDCT, fastDCT) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_SCANLIMIT, limitScans ? 500 : 0) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_MAXMEMORY, maxMemory) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_MAXPIXELS, maxPixels) == -1)
+    THROW_TJ();
+
+  if (IS_CROPPED(cr)) {
+    if (tj3DecompressHeader(handle, jpegBufs[0], jpegSizes[0]) == -1)
+      THROW_TJ();
+  }
+  if (tj3SetScalingFactor(handle, sf) == -1)
+    THROW_TJ();
+  if (tj3SetCroppingRegion(handle, cr) == -1)
+    THROW_TJ();
+  if (IS_CROPPED(cr)) {
+    scaledw = cr.w ? cr.w : scaledw - cr.x;
+    scaledh = cr.h ? cr.h : scaledh - cr.y;
+  }
+  pitch = scaledw * ps;
+
+  if (dstBuf == NULL) {
+#if ULLONG_MAX > SIZE_MAX
+    if ((unsigned long long)pitch * (unsigned long long)scaledh *
+        (unsigned long long)sampleSize > (unsigned long long)((size_t)-1))
+      THROW("allocating destination buffer", "Image is too large");
+#endif
+    if ((dstBuf = malloc((size_t)pitch * scaledh * sampleSize)) == NULL)
+      THROW_UNIX("allocating destination buffer");
+    dstBufAlloc = 1;
+  }
+
+  /* Set the destination buffer to gray so we know whether the decompressor
+     attempted to write to it */
+  if (precision == 8)
+    memset((unsigned char *)dstBuf, 127, (size_t)pitch * scaledh);
+  else if (precision == 12) {
+    for (i = 0; i < pitch * scaledh; i++)
+      ((short *)dstBuf)[i] = (short)2047;
+  } else {
+    for (i = 0; i < pitch * scaledh; i++)
+      ((unsigned short *)dstBuf)[i] = (unsigned short)32767;
+  }
+
+  if (doYUV) {
+    int width = doTile ? tilew : scaledw;
+    int height = doTile ? tileh : scaledh;
+    size_t yuvSize = tj3YUVBufSize(width, yuvAlign, height, subsamp);
+
+    if (yuvSize == 0)
+      THROW_TJG();
+    if ((yuvBuf = (unsigned char *)malloc(yuvSize)) == NULL)
+      THROW_UNIX("allocating YUV buffer");
+    memset(yuvBuf, 127, yuvSize);
+  }
+
+  /* Benchmark */
+  iter = -1;
+  elapsed = elapsedDecode = 0.;
+  while (1) {
+    int tile = 0;
+    double start = getTime();
+
+    for (row = 0, dstPtr = dstBuf; row < ntilesh;
+         row++, dstPtr += (size_t)pitch * tileh * sampleSize) {
+      for (col = 0, dstPtr2 = dstPtr; col < ntilesw;
+           col++, tile++, dstPtr2 += ps * tilew * sampleSize) {
+        int width = doTile ? min(tilew, w - col * tilew) : scaledw;
+        int height = doTile ? min(tileh, h - row * tileh) : scaledh;
+
+        if (doYUV) {
+          double startDecode;
+
+          if (tj3DecompressToYUV8(handle, jpegBufs[tile], jpegSizes[tile],
+                                  yuvBuf, yuvAlign) == -1)
+            THROW_TJ();
+          startDecode = getTime();
+          if (tj3DecodeYUV8(handle, yuvBuf, yuvAlign, dstPtr2, width, pitch,
+                            height, pf) == -1)
+            THROW_TJ();
+          if (iter >= 0) elapsedDecode += getTime() - startDecode;
+        } else {
+          if (precision == 8) {
+            if (tj3Decompress8(handle, jpegBufs[tile], jpegSizes[tile],
+                               dstPtr2, pitch, pf) == -1)
+              THROW_TJ();
+          } else if (precision == 12) {
+            if (tj3Decompress12(handle, jpegBufs[tile], jpegSizes[tile],
+                                (short *)dstPtr2, pitch, pf) == -1)
+              THROW_TJ();
+          } else {
+            if (tj3Decompress16(handle, jpegBufs[tile], jpegSizes[tile],
+                                (unsigned short *)dstPtr2, pitch, pf) == -1)
+              THROW_TJ();
+          }
+        }
+      }
+    }
+    elapsed += getTime() - start;
+    if (iter >= 0) {
+      iter++;
+      if (elapsed >= benchTime) break;
+    } else if (elapsed >= warmup) {
+      iter = 0;
+      elapsed = elapsedDecode = 0.;
+    }
+  }
+  if (doYUV) elapsed -= elapsedDecode;
+
+  if (quiet) {
+    printf("%-6s%s",
+           sigfig((double)(w * h) / 1000000. * (double)iter / elapsed, 4,
+                  tempStr, 1024),
+           quiet == 2 ? "\n" : "  ");
+    if (doYUV)
+      printf("%s\n",
+             sigfig((double)(w * h) / 1000000. * (double)iter / elapsedDecode,
+                    4, tempStr, 1024));
+    else if (quiet != 2) printf("\n");
+  } else {
+    printf("%s --> Frame rate:         %f fps\n",
+           doYUV ? "Decomp to YUV" : "Decompress   ", (double)iter / elapsed);
+    printf("                  Throughput:         %f Megapixels/sec\n",
+           (double)(w * h) / 1000000. * (double)iter / elapsed);
+    if (doYUV) {
+      printf("YUV Decode    --> Frame rate:         %f fps\n",
+             (double)iter / elapsedDecode);
+      printf("                  Throughput:         %f Megapixels/sec\n",
+             (double)(w * h) / 1000000. * (double)iter / elapsedDecode);
+    }
+  }
+
+  if (!doWrite) goto bailout;
+
+  if (sf.num != 1 || sf.denom != 1)
+    SNPRINTF(sizeStr, 24, "%d_%d", sf.num, sf.denom);
+  else if (tilew != w || tileh != h)
+    SNPRINTF(sizeStr, 24, "%dx%d", tilew, tileh);
+  else SNPRINTF(sizeStr, 24, "full");
+  if (decompOnly)
+    SNPRINTF(tempStr, 1024, "%s_%s.%s", fileName, sizeStr, ext);
+  else
+    SNPRINTF(tempStr, 1024, "%s_%s%s_%s.%s", fileName,
+             lossless ? "LOSSLS" : subName[subsamp], qualStr, sizeStr, ext);
+
+  if (precision == 8) {
+    if (tj3SaveImage8(handle, tempStr, (unsigned char *)dstBuf, scaledw, 0,
+                      scaledh, pf) == -1)
+      THROW_TJ();
+  } else if (precision == 12) {
+    if (tj3SaveImage12(handle, tempStr, (short *)dstBuf, scaledw, 0, scaledh,
+                       pf) == -1)
+      THROW_TJ();
+  } else {
+    if (tj3SaveImage16(handle, tempStr, (unsigned short *)dstBuf, scaledw, 0,
+                      scaledh, pf) == -1)
+      THROW_TJ();
+  }
+
+bailout:
+  if (file) fclose(file);
+  tj3Destroy(handle);
+  if (dstBufAlloc) free(dstBuf);
+  free(yuvBuf);
+  return retval;
+}
+
+
+static int fullTest(tjhandle handle, void *srcBuf, int w, int h, int subsamp,
+                    int jpegQual, char *fileName)
+{
+  char tempStr[1024], tempStr2[80];
+  FILE *file = NULL;
+  unsigned char **jpegBufs = NULL, *yuvBuf = NULL, *srcPtr, *srcPtr2;
+  void *tmpBuf = NULL;
+  double start, elapsed, elapsedEncode;
+  int row, col, i, tilew = w, tileh = h, retval = 0;
+  int iter;
+  size_t totalJpegSize = 0, *jpegSizes = NULL, yuvSize = 0;
+  int ps = tjPixelSize[pf];
+  int ntilesw = 1, ntilesh = 1, pitch = w * ps;
+  const char *pfStr = pixFormatStr[pf];
+
+#if ULLONG_MAX > SIZE_MAX
+  if ((unsigned long long)pitch * (unsigned long long)h *
+      (unsigned long long)sampleSize > (unsigned long long)((size_t)-1))
+    THROW("allocating temporary image buffer", "Image is too large");
+#endif
+  if ((tmpBuf = malloc((size_t)pitch * h * sampleSize)) == NULL)
+    THROW_UNIX("allocating temporary image buffer");
+
+  if (!quiet)
+    printf(">>>>>  %s (%s) <--> %d-bit JPEG (%s %s%d)  <<<<<\n", pfStr,
+           bottomUp ? "Bottom-up" : "Top-down", precision,
+           lossless ? "Lossless" : subNameLong[subsamp],
+           lossless ? "PSV" : "Q", jpegQual);
+
+  for (tilew = doTile ? 8 : w, tileh = doTile ? 8 : h; ;
+       tilew *= 2, tileh *= 2) {
+    if (tilew > w) tilew = w;
+    if (tileh > h) tileh = h;
+    ntilesw = (w + tilew - 1) / tilew;
+    ntilesh = (h + tileh - 1) / tileh;
+
+    if ((jpegBufs = (unsigned char **)malloc(sizeof(unsigned char *) *
+                                             ntilesw * ntilesh)) == NULL)
+      THROW_UNIX("allocating JPEG tile array");
+    memset(jpegBufs, 0, sizeof(unsigned char *) * ntilesw * ntilesh);
+    if ((jpegSizes = (size_t *)malloc(sizeof(size_t) * ntilesw *
+                                      ntilesh)) == NULL)
+      THROW_UNIX("allocating JPEG size array");
+    memset(jpegSizes, 0, sizeof(size_t) * ntilesw * ntilesh);
+
+    if (noRealloc) {
+      for (i = 0; i < ntilesw * ntilesh; i++) {
+        size_t jpegBufSize = tj3JPEGBufSize(tilew, tileh, subsamp);
+
+        if (jpegBufSize == 0)
+          THROW_TJG();
+        if ((jpegBufs[i] = tj3Alloc(jpegBufSize)) == NULL)
+          THROW_UNIX("allocating JPEG tiles");
+      }
+    }
+
+    /* Compression test */
+    if (quiet == 1)
+      printf("%-4s(%s)  %-2d/%-6s %-3d   ", pfStr, bottomUp ? "BU" : "TD",
+             precision, lossless ? "LOSSLS" : subNameLong[subsamp], jpegQual);
+    if (precision == 8) {
+      for (i = 0; i < h; i++)
+        memcpy(&((unsigned char *)tmpBuf)[pitch * i],
+               &((unsigned char *)srcBuf)[w * ps * i], w * ps);
+    } else {
+      for (i = 0; i < h; i++)
+        memcpy(&((unsigned short *)tmpBuf)[pitch * i],
+               &((unsigned short *)srcBuf)[w * ps * i], w * ps * sampleSize);
+    }
+
+    if (tj3Set(handle, TJPARAM_NOREALLOC, noRealloc) == -1)
+      THROW_TJ();
+    if (tj3Set(handle, TJPARAM_SUBSAMP, subsamp) == -1)
+      THROW_TJ();
+    if (tj3Set(handle, TJPARAM_FASTDCT, fastDCT) == -1)
+      THROW_TJ();
+    if (tj3Set(handle, TJPARAM_OPTIMIZE, optimize) == -1)
+      THROW_TJ();
+    if (tj3Set(handle, TJPARAM_PROGRESSIVE, progressive) == -1)
+      THROW_TJ();
+    if (tj3Set(handle, TJPARAM_ARITHMETIC, arithmetic) == -1)
+      THROW_TJ();
+    if (tj3Set(handle, TJPARAM_LOSSLESS, lossless) == -1)
+      THROW_TJ();
+    if (lossless) {
+      if (tj3Set(handle, TJPARAM_LOSSLESSPSV, jpegQual) == -1)
+        THROW_TJ();
+    } else {
+      if (tj3Set(handle, TJPARAM_QUALITY, jpegQual) == -1)
+        THROW_TJ();
+    }
+    if (tj3Set(handle, TJPARAM_RESTARTBLOCKS, restartIntervalBlocks) == -1)
+      THROW_TJ();
+    if (tj3Set(handle, TJPARAM_RESTARTROWS, restartIntervalRows) == -1)
+      THROW_TJ();
+    if (tj3Set(handle, TJPARAM_MAXMEMORY, maxMemory) == -1)
+      THROW_TJ();
+
+    if (doYUV) {
+      yuvSize = tj3YUVBufSize(tilew, yuvAlign, tileh, subsamp);
+      if (yuvSize == 0)
+        THROW_TJG();
+      if ((yuvBuf = (unsigned char *)malloc(yuvSize)) == NULL)
+        THROW_UNIX("allocating YUV buffer");
+      memset(yuvBuf, 127, yuvSize);
+    }
+
+    /* Benchmark */
+    iter = -1;
+    elapsed = elapsedEncode = 0.;
+    while (1) {
+      int tile = 0;
+
+      totalJpegSize = 0;
+      start = getTime();
+      for (row = 0, srcPtr = srcBuf; row < ntilesh;
+           row++, srcPtr += pitch * tileh * sampleSize) {
+        for (col = 0, srcPtr2 = srcPtr; col < ntilesw;
+             col++, tile++, srcPtr2 += ps * tilew * sampleSize) {
+          int width = min(tilew, w - col * tilew);
+          int height = min(tileh, h - row * tileh);
+
+          if (doYUV) {
+            double startEncode = getTime();
+
+            if (tj3EncodeYUV8(handle, srcPtr2, width, pitch, height, pf,
+                              yuvBuf, yuvAlign) == -1)
+              THROW_TJ();
+            if (iter >= 0) elapsedEncode += getTime() - startEncode;
+            if (tj3CompressFromYUV8(handle, yuvBuf, width, yuvAlign, height,
+                                    &jpegBufs[tile], &jpegSizes[tile]) == -1)
+              THROW_TJ();
+          } else {
+            if (precision == 8) {
+              if (tj3Compress8(handle, srcPtr2, width, pitch, height, pf,
+                               &jpegBufs[tile], &jpegSizes[tile]) == -1)
+                THROW_TJ();
+            } else if (precision == 12) {
+              if (tj3Compress12(handle, (short *)srcPtr2, width, pitch, height,
+                                pf, &jpegBufs[tile], &jpegSizes[tile]) == -1)
+                THROW_TJ();
+            } else {
+              if (tj3Compress16(handle, (unsigned short *)srcPtr2, width,
+                                pitch, height, pf, &jpegBufs[tile],
+                                &jpegSizes[tile]) == -1)
+                THROW_TJ();
+            }
+          }
+          totalJpegSize += jpegSizes[tile];
+        }
+      }
+      elapsed += getTime() - start;
+      if (iter >= 0) {
+        iter++;
+        if (elapsed >= benchTime) break;
+      } else if (elapsed >= warmup) {
+        iter = 0;
+        elapsed = elapsedEncode = 0.;
+      }
+    }
+    if (doYUV) elapsed -= elapsedEncode;
+
+    if (quiet == 1) printf("%-5d  %-5d   ", tilew, tileh);
+    if (quiet) {
+      if (doYUV)
+        printf("%-6s%s",
+               sigfig((double)(w * h) / 1000000. *
+                      (double)iter / elapsedEncode, 4, tempStr, 1024),
+               quiet == 2 ? "\n" : "  ");
+      printf("%-6s%s",
+             sigfig((double)(w * h) / 1000000. * (double)iter / elapsed, 4,
+                    tempStr, 1024),
+             quiet == 2 ? "\n" : "  ");
+      printf("%-6s%s",
+             sigfig((double)(w * h * ps) / (double)totalJpegSize, 4, tempStr2,
+                    80),
+             quiet == 2 ? "\n" : "  ");
+    } else {
+      printf("\n%s size: %d x %d\n", doTile ? "Tile" : "Image", tilew, tileh);
+      if (doYUV) {
+        printf("Encode YUV    --> Frame rate:         %f fps\n",
+               (double)iter / elapsedEncode);
+        printf("                  Output image size:  %lu bytes\n",
+               (unsigned long)yuvSize);
+        printf("                  Compression ratio:  %f:1\n",
+               (double)(w * h * ps) / (double)yuvSize);
+        printf("                  Throughput:         %f Megapixels/sec\n",
+               (double)(w * h) / 1000000. * (double)iter / elapsedEncode);
+        printf("                  Output bit stream:  %f Megabits/sec\n",
+               (double)yuvSize * 8. / 1000000. * (double)iter / elapsedEncode);
+      }
+      printf("%s --> Frame rate:         %f fps\n",
+             doYUV ? "Comp from YUV" : "Compress     ",
+             (double)iter / elapsed);
+      printf("                  Output image size:  %lu bytes\n",
+             (unsigned long)totalJpegSize);
+      printf("                  Compression ratio:  %f:1\n",
+             (double)(w * h * ps) / (double)totalJpegSize);
+      printf("                  Throughput:         %f Megapixels/sec\n",
+             (double)(w * h) / 1000000. * (double)iter / elapsed);
+      printf("                  Output bit stream:  %f Megabits/sec\n",
+             (double)totalJpegSize * 8. / 1000000. * (double)iter / elapsed);
+    }
+    if (tilew == w && tileh == h && doWrite) {
+     SNPRINTF(tempStr, 1024, "%s_%s_%s%d.jpg", fileName,
+              lossless ? "LOSSLS" : subName[subsamp],
+              lossless ? "PSV" : "Q", jpegQual);
+      if ((file = fopen(tempStr, "wb")) == NULL)
+        THROW_UNIX("opening reference image");
+      if (fwrite(jpegBufs[0], jpegSizes[0], 1, file) != 1)
+        THROW_UNIX("writing reference image");
+      fclose(file);  file = NULL;
+      if (!quiet) printf("Reference image written to %s\n", tempStr);
+    }
+
+    /* Decompression test */
+    if (!compOnly) {
+      if (decomp(jpegBufs, jpegSizes, tmpBuf, w, h, subsamp, jpegQual,
+                 fileName, tilew, tileh) == -1)
+        goto bailout;
+    } else if (quiet == 1) printf("N/A\n");
+
+    for (i = 0; i < ntilesw * ntilesh; i++) {
+      tj3Free(jpegBufs[i]);
+      jpegBufs[i] = NULL;
+    }
+    free(jpegBufs);  jpegBufs = NULL;
+    free(jpegSizes);  jpegSizes = NULL;
+    if (doYUV) {
+      free(yuvBuf);  yuvBuf = NULL;
+    }
+
+    if (tilew == w && tileh == h) break;
+  }
+
+bailout:
+  if (file) fclose(file);
+  if (jpegBufs) {
+    for (i = 0; i < ntilesw * ntilesh; i++)
+      tj3Free(jpegBufs[i]);
+  }
+  free(jpegBufs);
+  free(yuvBuf);
+  free(jpegSizes);
+  free(tmpBuf);
+  return retval;
+}
+
+
+static int decompTest(char *fileName)
+{
+  FILE *file = NULL;
+  tjhandle handle = NULL;
+  unsigned char **jpegBufs = NULL, *srcBuf = NULL;
+  size_t *jpegSizes = NULL, srcSize, totalJpegSize;
+  tjtransform *t = NULL;
+  double start, elapsed;
+  int ps = tjPixelSize[pf], tile, row, col, i, iter, retval = 0, decompsrc = 0;
+  char *temp = NULL, tempStr[80], tempStr2[80];
+  /* Original image */
+  int w = 0, h = 0, minTile = 16, tilew, tileh, ntilesw = 1, ntilesh = 1,
+    subsamp = -1, cs = -1;
+  /* Transformed image */
+  int tw, th, ttilew, ttileh, tntilesw, tntilesh, tsubsamp;
+
+  if ((file = fopen(fileName, "rb")) == NULL)
+    THROW_UNIX("opening file");
+  if (fseek(file, 0, SEEK_END) < 0 ||
+      (srcSize = ftell(file)) == (size_t)-1)
+    THROW_UNIX("determining file size");
+  if ((srcBuf = (unsigned char *)malloc(srcSize)) == NULL)
+    THROW_UNIX("allocating memory");
+  if (fseek(file, 0, SEEK_SET) < 0)
+    THROW_UNIX("setting file position");
+  if (fread(srcBuf, srcSize, 1, file) < 1)
+    THROW_UNIX("reading JPEG data");
+  fclose(file);  file = NULL;
+
+  temp = strrchr(fileName, '.');
+  if (temp != NULL) *temp = '\0';
+
+  if ((handle = tj3Init(TJINIT_TRANSFORM)) == NULL)
+    THROW_TJG();
+  if (tj3Set(handle, TJPARAM_STOPONWARNING, stopOnWarning) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_BOTTOMUP, bottomUp) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_NOREALLOC, noRealloc) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_FASTUPSAMPLE, fastUpsample) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_FASTDCT, fastDCT) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_SCANLIMIT, limitScans ? 500 : 0) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_MAXMEMORY, maxMemory) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_MAXPIXELS, maxPixels) == -1)
+    THROW_TJ();
+
+  if (tj3DecompressHeader(handle, srcBuf, srcSize) == -1)
+    THROW_TJ();
+  w = tj3Get(handle, TJPARAM_JPEGWIDTH);
+  h = tj3Get(handle, TJPARAM_JPEGHEIGHT);
+  subsamp = tj3Get(handle, TJPARAM_SUBSAMP);
+  precision = tj3Get(handle, TJPARAM_PRECISION);
+  if (tj3Get(handle, TJPARAM_PROGRESSIVE) == 1)
+    printf("JPEG image uses progressive entropy coding\n\n");
+  if (tj3Get(handle, TJPARAM_ARITHMETIC) == 1)
+    printf("JPEG image uses arithmetic entropy coding\n\n");
+  if (tj3Set(handle, TJPARAM_PROGRESSIVE, progressive) == -1)
+    THROW_TJ();
+  if (tj3Set(handle, TJPARAM_ARITHMETIC, arithmetic) == -1)
+    THROW_TJ();
+
+  lossless = tj3Get(handle, TJPARAM_LOSSLESS);
+  sampleSize = (precision == 8 ? sizeof(unsigned char) : sizeof(short));
+  cs = tj3Get(handle, TJPARAM_COLORSPACE);
+  if (w < 1 || h < 1)
+    THROW("reading JPEG header", "Invalid image dimensions");
+  if (cs == TJCS_YCCK || cs == TJCS_CMYK) {
+    pf = TJPF_CMYK;  ps = tjPixelSize[pf];
+  }
+  if (lossless) sf = TJUNSCALED;
+
+  if (tj3SetScalingFactor(handle, sf) == -1)
+    THROW_TJ();
+  if (tj3SetCroppingRegion(handle, cr) == -1)
+    THROW_TJ();
+
+  if (quiet == 1) {
+    printf("All performance values in Mpixels/sec\n\n");
+    printf("Pixel     JPEG             %s  %s   Xform   Comp    Decomp  ",
+           doTile ? "Tile " : "Image", doTile ? "Tile " : "Image");
+    if (doYUV) printf("Decode");
+    printf("\n");
+    printf("Format    Format           Width  Height  Perf    Ratio   Perf    ");
+    if (doYUV) printf("Perf");
+    printf("\n\n");
+  } else if (!quiet)
+    printf(">>>>>  %d-bit JPEG (%s) --> %s (%s)  <<<<<\n", precision,
+           formatName(subsamp, cs, tempStr), pixFormatStr[pf],
+           bottomUp ? "Bottom-up" : "Top-down");
+
+  if (doTile) {
+    if (subsamp == TJSAMP_UNKNOWN)
+      THROW("transforming",
+            "Could not determine subsampling level of JPEG image");
+    minTile = max(tjMCUWidth[subsamp], tjMCUHeight[subsamp]);
+  }
+  for (tilew = doTile ? minTile : w, tileh = doTile ? minTile : h; ;
+       tilew *= 2, tileh *= 2) {
+    if (tilew > w) tilew = w;
+    if (tileh > h) tileh = h;
+    ntilesw = (w + tilew - 1) / tilew;
+    ntilesh = (h + tileh - 1) / tileh;
+
+    if ((jpegBufs = (unsigned char **)malloc(sizeof(unsigned char *) *
+                                             ntilesw * ntilesh)) == NULL)
+      THROW_UNIX("allocating JPEG tile array");
+    memset(jpegBufs, 0, sizeof(unsigned char *) * ntilesw * ntilesh);
+    if ((jpegSizes = (size_t *)malloc(sizeof(size_t) * ntilesw *
+                                      ntilesh)) == NULL)
+      THROW_UNIX("allocating JPEG size array");
+    memset(jpegSizes, 0, sizeof(size_t) * ntilesw * ntilesh);
+
+    if (noRealloc &&
+        (doTile || xformOp != TJXOP_NONE || xformOpt != 0 || customFilter)) {
+      for (i = 0; i < ntilesw * ntilesh; i++) {
+        size_t jpegBufSize;
+
+        if (xformOp == TJXOP_TRANSPOSE || xformOp == TJXOP_TRANSVERSE ||
+            xformOp == TJXOP_ROT90 || xformOp == TJXOP_ROT270)
+          jpegBufSize = tj3JPEGBufSize(tileh, tilew, subsamp);
+        else
+          jpegBufSize = tj3JPEGBufSize(tilew, tileh, subsamp);
+        if (jpegBufSize == 0)
+          THROW_TJG();
+        if ((jpegBufs[i] = tj3Alloc(jpegBufSize)) == NULL)
+          THROW_UNIX("allocating JPEG tiles");
+      }
+    }
+
+    tw = w;  th = h;  ttilew = tilew;  ttileh = tileh;
+    if (!quiet) {
+      printf("\n%s size: %d x %d", doTile ? "Tile" : "Image", ttilew, ttileh);
+      if (sf.num != 1 || sf.denom != 1 || IS_CROPPED(cr))
+        printf(" --> %d x %d", CROPPED_WIDTH(tw), CROPPED_HEIGHT(th));
+      printf("\n");
+    } else if (quiet == 1) {
+      printf("%-4s(%s)  %-14s   ", pixFormatStr[pf],
+             bottomUp ? "BU" : "TD", formatName(subsamp, cs, tempStr));
+      printf("%-5d  %-5d   ", CROPPED_WIDTH(tilew), CROPPED_HEIGHT(tileh));
+    }
+
+    tsubsamp = subsamp;
+    if (doTile || xformOp != TJXOP_NONE || xformOpt != 0 || customFilter) {
+      if ((t = (tjtransform *)malloc(sizeof(tjtransform) * ntilesw *
+                                     ntilesh)) == NULL)
+        THROW_UNIX("allocating image transform array");
+
+      if (xformOp == TJXOP_TRANSPOSE || xformOp == TJXOP_TRANSVERSE ||
+          xformOp == TJXOP_ROT90 || xformOp == TJXOP_ROT270) {
+        tw = h;  th = w;  ttilew = tileh;  ttileh = tilew;
+      }
+
+      if (xformOp != TJXOP_NONE && xformOp != TJXOP_TRANSPOSE &&
+          subsamp == TJSAMP_UNKNOWN)
+        THROW("transforming",
+              "Could not determine subsampling level of JPEG image");
+      if (xformOpt & TJXOPT_GRAY) tsubsamp = TJSAMP_GRAY;
+      if (xformOp == TJXOP_HFLIP || xformOp == TJXOP_ROT180)
+        tw = tw - (tw % tjMCUWidth[tsubsamp]);
+      if (xformOp == TJXOP_VFLIP || xformOp == TJXOP_ROT180)
+        th = th - (th % tjMCUHeight[tsubsamp]);
+      if (xformOp == TJXOP_TRANSVERSE || xformOp == TJXOP_ROT90)
+        tw = tw - (tw % tjMCUHeight[tsubsamp]);
+      if (xformOp == TJXOP_TRANSVERSE || xformOp == TJXOP_ROT270)
+        th = th - (th % tjMCUWidth[tsubsamp]);
+      tntilesw = (tw + ttilew - 1) / ttilew;
+      tntilesh = (th + ttileh - 1) / ttileh;
+
+      if (xformOp == TJXOP_TRANSPOSE || xformOp == TJXOP_TRANSVERSE ||
+          xformOp == TJXOP_ROT90 || xformOp == TJXOP_ROT270) {
+        if (tsubsamp == TJSAMP_422) tsubsamp = TJSAMP_440;
+        else if (tsubsamp == TJSAMP_440) tsubsamp = TJSAMP_422;
+        else if (tsubsamp == TJSAMP_411) tsubsamp = TJSAMP_441;
+        else if (tsubsamp == TJSAMP_441) tsubsamp = TJSAMP_411;
+      }
+
+      for (row = 0, tile = 0; row < tntilesh; row++) {
+        for (col = 0; col < tntilesw; col++, tile++) {
+          t[tile].r.w = min(ttilew, tw - col * ttilew);
+          t[tile].r.h = min(ttileh, th - row * ttileh);
+          t[tile].r.x = col * ttilew;
+          t[tile].r.y = row * ttileh;
+          t[tile].op = xformOp;
+          t[tile].options = xformOpt | TJXOPT_TRIM;
+          t[tile].customFilter = customFilter;
+          if (t[tile].options & TJXOPT_NOOUTPUT && jpegBufs[tile]) {
+            tj3Free(jpegBufs[tile]);  jpegBufs[tile] = NULL;
+          }
+        }
+      }
+
+      iter = -1;
+      elapsed = 0.;
+      while (1) {
+        start = getTime();
+        if (tj3Transform(handle, srcBuf, srcSize, tntilesw * tntilesh,
+                         jpegBufs, jpegSizes, t) == -1)
+          THROW_TJ();
+        elapsed += getTime() - start;
+        if (iter >= 0) {
+          iter++;
+          if (elapsed >= benchTime) break;
+        } else if (elapsed >= warmup) {
+          iter = 0;
+          elapsed = 0.;
+        }
+      }
+
+      free(t);  t = NULL;
+
+      for (tile = 0, totalJpegSize = 0; tile < tntilesw * tntilesh; tile++)
+        totalJpegSize += jpegSizes[tile];
+
+      if (quiet) {
+        printf("%-6s%s%-6s%s",
+               sigfig((double)(w * h) / 1000000. / elapsed, 4, tempStr, 80),
+               quiet == 2 ? "\n" : "  ",
+               sigfig((double)(w * h * ps) / (double)totalJpegSize, 4,
+                      tempStr2, 80),
+               quiet == 2 ? "\n" : "  ");
+      } else {
+        printf("Transform     --> Frame rate:         %f fps\n",
+               1.0 / elapsed);
+        printf("                  Output image size:  %lu bytes\n",
+               (unsigned long)totalJpegSize);
+        printf("                  Compression ratio:  %f:1\n",
+               (double)(w * h * ps) / (double)totalJpegSize);
+        printf("                  Throughput:         %f Megapixels/sec\n",
+               (double)(w * h) / 1000000. / elapsed);
+        printf("                  Output bit stream:  %f Megabits/sec\n",
+               (double)totalJpegSize * 8. / 1000000. / elapsed);
+      }
+    } else {
+      if (quiet == 1) printf("N/A     N/A     ");
+      tj3Free(jpegBufs[0]);
+      jpegBufs[0] = NULL;
+      decompsrc = 1;
+    }
+
+    if (w == tilew) ttilew = tw;
+    if (h == tileh) ttileh = th;
+    if (!(xformOpt & TJXOPT_NOOUTPUT)) {
+      if (decomp(decompsrc ? &srcBuf : jpegBufs,
+                 decompsrc ? &srcSize : jpegSizes, NULL, tw, th, tsubsamp, 0,
+                 fileName, ttilew, ttileh) == -1)
+        goto bailout;
+    } else if (quiet == 1) printf("N/A\n");
+
+    for (i = 0; i < ntilesw * ntilesh; i++) {
+      tj3Free(jpegBufs[i]);
+      jpegBufs[i] = NULL;
+    }
+    free(jpegBufs);  jpegBufs = NULL;
+    free(jpegSizes);  jpegSizes = NULL;
+
+    if (tilew == w && tileh == h) break;
+  }
+
+bailout:
+  if (file) fclose(file);
+  if (jpegBufs) {
+    for (i = 0; i < ntilesw * ntilesh; i++)
+      tj3Free(jpegBufs[i]);
+  }
+  free(jpegBufs);
+  free(jpegSizes);
+  free(srcBuf);
+  free(t);
+  tj3Destroy(handle);
+  return retval;
+}
+
+
+static void usage(char *progName)
+{
+  int i;
+
+  printf("USAGE: %s\n", progName);
+  printf("       <Inputimage (BMP|PPM)> <Quality or PSV> [options]\n\n");
+  printf("       %s\n", progName);
+  printf("       <Inputimage (JPG)> [options]\n");
+
+  printf("\nGENERAL OPTIONS\n");
+  printf("---------------\n");
+  printf("-alloc = Dynamically allocate JPEG buffers\n");
+  printf("-benchtime T = Run each benchmark for at least T seconds [default = 5.0]\n");
+  printf("-bmp = Use Windows Bitmap format for output images [default = PPM]\n");
+  printf("     ** 8-bit data precision only **\n");
+  printf("-bottomup = Use bottom-up row order for packed-pixel source/destination buffers\n");
+  printf("-componly = Stop after running compression tests.  Do not test decompression.\n");
+  printf("-lossless = Generate lossless JPEG images when compressing (implies\n");
+  printf("     -subsamp 444).  PSV is the predictor selection value (1-7).\n");
+  printf("-maxmemory = Memory limit (in megabytes) for intermediate buffers used with\n");
+  printf("     progressive JPEG compression and decompression, optimized baseline entropy\n");
+  printf("     coding, lossless JPEG compression, and lossless transformation\n");
+  printf("     [default = no limit]\n");
+  printf("-maxpixels = Input image size limit (in pixels) [default = no limit]\n");
+  printf("-nowrite = Do not write reference or output images (improves consistency of\n");
+  printf("     benchmark results)\n");
+  printf("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =\n");
+  printf("     Use the specified pixel format for packed-pixel source/destination buffers\n");
+  printf("     [default = BGR]\n");
+  printf("-cmyk = Indirectly test YCCK JPEG compression/decompression\n");
+  printf("     (use the CMYK pixel format for packed-pixel source/destination buffers)\n");
+  printf("-precision N = Use N-bit data precision when compressing [N is 8, 12, or 16;\n");
+  printf("     default = 8; if N is 16, then -lossless must also be specified]\n");
+  printf("     (-precision 12 implies -optimize unless -arithmetic is also specified)\n");
+  printf("-quiet = Output results in tabular rather than verbose format\n");
+  printf("-restart N = When compressing, add a restart marker every N MCU rows (lossy) or\n");
+  printf("     N sample rows (lossless) [default = 0 (no restart markers)].  Append 'B'\n");
+  printf("     to specify the restart marker interval in MCU blocks (lossy) or samples\n");
+  printf("     (lossless).\n");
+  printf("-stoponwarning = Immediately discontinue the current\n");
+  printf("     compression/decompression/transform operation if a warning (non-fatal\n");
+  printf("     error) occurs\n");
+  printf("-tile = Compress/transform the input image into separate JPEG tiles of varying\n");
+  printf("     sizes (useful for measuring JPEG overhead)\n");
+  printf("-warmup T = Run each benchmark for T seconds [default = 1.0] prior to starting\n");
+  printf("     the timer, in order to prime the caches and thus improve the consistency\n");
+  printf("     of the benchmark results\n");
+
+  printf("\nLOSSY JPEG OPTIONS\n");
+  printf("------------------\n");
+  printf("-arithmetic = Use arithmetic entropy coding in JPEG images generated by\n");
+  printf("     compression and transform operations (can be combined with -progressive)\n");
+  printf("-crop WxH+X+Y = Decompress only the specified region of the JPEG image, where W\n");
+  printf("     and H are the width and height of the region (0 = maximum possible width\n");
+  printf("     or height) and X and Y are the left and upper boundary of the region, all\n");
+  printf("     specified relative to the scaled image dimensions.  X must be divible by\n");
+  printf("     the scaled MCU width.\n");
+  printf("-fastdct = Use the fastest DCT/IDCT algorithm available\n");
+  printf("-fastupsample = Use the fastest chrominance upsampling algorithm available\n");
+  printf("-optimize = Use optimized baseline entropy coding in JPEG images generated by\n");
+  printf("     compession and transform operations\n");
+  printf("-progressive = Use progressive entropy coding in JPEG images generated by\n");
+  printf("     compression and transform operations (can be combined with -arithmetic;\n");
+  printf("     implies -optimize unless -arithmetic is also specified)\n");
+  printf("-limitscans = Refuse to decompress or transform progressive JPEG images that\n");
+  printf("     have an unreasonably large number of scans\n");
+  printf("-scale M/N = When decompressing, scale the width/height of the JPEG image by a\n");
+  printf("     factor of M/N (M/N = ");
+  for (i = 0; i < nsf; i++) {
+    printf("%d/%d", scalingFactors[i].num, scalingFactors[i].denom);
+    if (nsf == 2 && i != nsf - 1) printf(" or ");
+    else if (nsf > 2) {
+      if (i != nsf - 1) printf(", ");
+      if (i == nsf - 2) printf("or ");
+    }
+    if (i % 8 == 0 && i != 0) printf("\n     ");
+  }
+  printf(")\n");
+  printf("-subsamp S = When compressing, use the specified level of chrominance\n");
+  printf("     subsampling (S = 444, 422, 440, 420, 411, 441, or GRAY) [default = test\n");
+  printf("     Grayscale, 4:2:0, 4:2:2, and 4:4:4 in sequence]\n");
+  printf("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =\n");
+  printf("     Perform the specified lossless transform operation on the input image\n");
+  printf("     prior to decompression (these operations are mutually exclusive)\n");
+  printf("-grayscale = Transform the input image into a grayscale JPEG image prior to\n");
+  printf("     decompression (can be combined with the other transform operations above)\n");
+  printf("-copynone = Do not copy any extra markers (including EXIF and ICC profile data)\n");
+  printf("     when transforming the input image\n");
+  printf("-yuv = Compress from/decompress to intermediate planar YUV images\n");
+  printf("     ** 8-bit data precision only **\n");
+  printf("-yuvpad N = The number of bytes by which each row in each plane of an\n");
+  printf("     intermediate YUV image is evenly divisible (N must be a power of 2)\n");
+  printf("     [default = 1]\n");
+
+  printf("\nNOTE:  If the quality/PSV is specified as a range (e.g. 90-100 or 1-4), a\n");
+  printf("separate test will be performed for all values in the range.\n\n");
+  exit(1);
+}
+
+
+int main(int argc, char *argv[])
+{
+  void *srcBuf = NULL;
+  int w = 0, h = 0, i, j, minQual = -1, maxQual = -1;
+  char *temp;
+  int minArg = 2, retval = 0, subsamp = -1;
+  tjhandle handle = NULL;
+
+  if ((scalingFactors = tj3GetScalingFactors(&nsf)) == NULL || nsf == 0)
+    THROW("executing tj3GetScalingFactors()", tj3GetErrorStr(NULL));
+
+  if (argc < minArg) usage(argv[0]);
+
+  temp = strrchr(argv[1], '.');
+  if (temp != NULL) {
+    if (!strcasecmp(temp, ".bmp")) ext = "bmp";
+    if (!strcasecmp(temp, ".jpg") || !strcasecmp(temp, ".jpeg"))
+      decompOnly = 1;
+  }
+
+  printf("\n");
+
+  if (!decompOnly) {
+    minArg = 3;
+    if (argc < minArg) usage(argv[0]);
+    minQual = atoi(argv[2]);
+    if ((temp = strchr(argv[2], '-')) != NULL && strlen(temp) > 1 &&
+        sscanf(&temp[1], "%d", &maxQual) == 1 && maxQual > minQual) {}
+    else maxQual = minQual;
+  }
+
+  if (argc > minArg) {
+    for (i = minArg; i < argc; i++) {
+      if (!strcasecmp(argv[i], "-tile")) {
+        doTile = 1;  xformOpt |= TJXOPT_CROP;
+      } else if (!strcasecmp(argv[i], "-precision") && i < argc - 1) {
+        int tempi = atoi(argv[++i]);
+
+        if (tempi != 8 && tempi != 12 && tempi != 16)
+          usage(argv[0]);
+        precision = tempi;
+      } else if (!strcasecmp(argv[i], "-fastupsample")) {
+        printf("Using fastest upsampling algorithm\n\n");
+        fastUpsample = 1;
+      } else if (!strcasecmp(argv[i], "-fastdct")) {
+        printf("Using fastest DCT/IDCT algorithm\n\n");
+        fastDCT = 1;
+      } else if (!strcasecmp(argv[i], "-optimize")) {
+        printf("Using optimized baseline entropy coding\n\n");
+        optimize = 1;
+        xformOpt |= TJXOPT_OPTIMIZE;
+      } else if (!strcasecmp(argv[i], "-progressive")) {
+        printf("Using progressive entropy coding\n\n");
+        progressive = 1;
+        xformOpt |= TJXOPT_PROGRESSIVE;
+      } else if (!strcasecmp(argv[i], "-arithmetic")) {
+        printf("Using arithmetic entropy coding\n\n");
+        arithmetic = 1;
+        xformOpt |= TJXOPT_ARITHMETIC;
+      } else if (!strcasecmp(argv[i], "-lossless")) {
+        lossless = 1;
+        subsamp = TJSAMP_444;
+      } else if (!strcasecmp(argv[i], "-rgb"))
+        pf = TJPF_RGB;
+      else if (!strcasecmp(argv[i], "-rgbx"))
+        pf = TJPF_RGBX;
+      else if (!strcasecmp(argv[i], "-bgr"))
+        pf = TJPF_BGR;
+      else if (!strcasecmp(argv[i], "-bgrx"))
+        pf = TJPF_BGRX;
+      else if (!strcasecmp(argv[i], "-xbgr"))
+        pf = TJPF_XBGR;
+      else if (!strcasecmp(argv[i], "-xrgb"))
+        pf = TJPF_XRGB;
+      else if (!strcasecmp(argv[i], "-cmyk"))
+        pf = TJPF_CMYK;
+      else if (!strcasecmp(argv[i], "-bottomup"))
+        bottomUp = 1;
+      else if (!strcasecmp(argv[i], "-quiet"))
+        quiet = 1;
+      else if (!strcasecmp(argv[i], "-qq"))
+        quiet = 2;
+      else if (!strcasecmp(argv[i], "-scale") && i < argc - 1) {
+        int temp1 = 0, temp2 = 0, match = 0;
+
+        if (sscanf(argv[++i], "%d/%d", &temp1, &temp2) == 2) {
+          for (j = 0; j < nsf; j++) {
+            if (temp1 == scalingFactors[j].num &&
+                temp2 == scalingFactors[j].denom) {
+              sf = scalingFactors[j];
+              match = 1;  break;
+            }
+          }
+          if (!match) usage(argv[0]);
+        } else usage(argv[0]);
+      } else if (!strcasecmp(argv[i], "-crop") && i < argc - 1) {
+        int temp1 = -1, temp2 = -1, temp3 = -1, temp4 = -1;
+
+        if (sscanf(argv[++i], "%dx%d+%d+%d", &temp1, &temp2, &temp3,
+                   &temp4) == 4 && temp1 >= 0 && temp2 >= 0 && temp3 >= 0 &&
+                   temp4 >= 0) {
+          cr.w = temp1;  cr.h = temp2;  cr.x = temp3;  cr.y = temp4;
+        } else usage(argv[0]);
+      } else if (!strcasecmp(argv[i], "-hflip"))
+        xformOp = TJXOP_HFLIP;
+      else if (!strcasecmp(argv[i], "-vflip"))
+        xformOp = TJXOP_VFLIP;
+      else if (!strcasecmp(argv[i], "-transpose"))
+        xformOp = TJXOP_TRANSPOSE;
+      else if (!strcasecmp(argv[i], "-transverse"))
+        xformOp = TJXOP_TRANSVERSE;
+      else if (!strcasecmp(argv[i], "-rot90"))
+        xformOp = TJXOP_ROT90;
+      else if (!strcasecmp(argv[i], "-rot180"))
+        xformOp = TJXOP_ROT180;
+      else if (!strcasecmp(argv[i], "-rot270"))
+        xformOp = TJXOP_ROT270;
+      else if (!strcasecmp(argv[i], "-grayscale"))
+        xformOpt |= TJXOPT_GRAY;
+      else if (!strcasecmp(argv[i], "-custom"))
+        customFilter = dummyDCTFilter;
+      else if (!strcasecmp(argv[i], "-nooutput"))
+        xformOpt |= TJXOPT_NOOUTPUT;
+      else if (!strcasecmp(argv[i], "-copynone"))
+        xformOpt |= TJXOPT_COPYNONE;
+      else if (!strcasecmp(argv[i], "-benchtime") && i < argc - 1) {
+        double tempd = atof(argv[++i]);
+
+        if (tempd > 0.0) benchTime = tempd;
+        else usage(argv[0]);
+      } else if (!strcasecmp(argv[i], "-warmup") && i < argc - 1) {
+        double tempd = atof(argv[++i]);
+
+        if (tempd >= 0.0) warmup = tempd;
+        else usage(argv[0]);
+        printf("Warmup time = %.1f seconds\n\n", warmup);
+      } else if (!strcasecmp(argv[i], "-alloc"))
+        noRealloc = 0;
+      else if (!strcasecmp(argv[i], "-bmp"))
+        ext = "bmp";
+      else if (!strcasecmp(argv[i], "-yuv")) {
+        printf("Testing planar YUV encoding/decoding\n\n");
+        doYUV = 1;
+      } else if (!strcasecmp(argv[i], "-yuvpad") && i < argc - 1) {
+        int tempi = atoi(argv[++i]);
+
+        if (tempi >= 1 && (tempi & (tempi - 1)) == 0) yuvAlign = tempi;
+        else usage(argv[0]);
+      } else if (!strcasecmp(argv[i], "-subsamp") && i < argc - 1) {
+        i++;
+        if (toupper(argv[i][0]) == 'G') subsamp = TJSAMP_GRAY;
+        else {
+          int tempi = atoi(argv[i]);
+
+          switch (tempi) {
+          case 444:  subsamp = TJSAMP_444;  break;
+          case 422:  subsamp = TJSAMP_422;  break;
+          case 440:  subsamp = TJSAMP_440;  break;
+          case 420:  subsamp = TJSAMP_420;  break;
+          case 411:  subsamp = TJSAMP_411;  break;
+          case 441:  subsamp = TJSAMP_441;  break;
+          default:  usage(argv[0]);
+          }
+        }
+      } else if (!strcasecmp(argv[i], "-componly"))
+        compOnly = 1;
+      else if (!strcasecmp(argv[i], "-nowrite"))
+        doWrite = 0;
+      else if (!strcasecmp(argv[i], "-limitscans"))
+        limitScans = 1;
+      else if (!strcasecmp(argv[i], "-maxmemory") && i < argc - 1) {
+        int tempi = atoi(argv[++i]);
+
+        if (tempi < 0) usage(argv[0]);
+        maxMemory = tempi;
+      } else if (!strcasecmp(argv[i], "-maxpixels") && i < argc - 1) {
+        int tempi = atoi(argv[++i]);
+
+        if (tempi < 0) usage(argv[0]);
+        maxPixels = tempi;
+      } else if (!strcasecmp(argv[i], "-restart") && i < argc - 1) {
+        int tempi = -1, nscan;  char tempc = 0;
+
+        if ((nscan = sscanf(argv[++i], "%d%c", &tempi, &tempc)) < 1 ||
+            tempi < 0 || tempi > 65535 ||
+            (nscan == 2 && tempc != 'B' && tempc != 'b'))
+          usage(argv[0]);
+
+        if (tempc == 'B' || tempc == 'b')
+          restartIntervalBlocks = tempi;
+        else
+          restartIntervalRows = tempi;
+      } else if (!strcasecmp(argv[i], "-stoponwarning"))
+        stopOnWarning = 1;
+      else usage(argv[0]);
+    }
+  }
+
+  if (precision == 16 && !lossless) {
+    printf("ERROR: -lossless must be specified along with -precision 16\n");
+    retval = -1;  goto bailout;
+  }
+  if (precision != 8 && doYUV) {
+    printf("ERROR: -yuv requires 8-bit data precision\n");
+    retval = -1;  goto bailout;
+  }
+  if (lossless && doYUV) {
+    printf("ERROR: -lossless and -yuv are incompatible\n");
+    retval = -1;  goto bailout;
+  }
+  sampleSize = (precision == 8 ? sizeof(unsigned char) : sizeof(short));
+
+  if ((sf.num != 1 || sf.denom != 1) && doTile) {
+    printf("Disabling tiled compression/decompression tests, because those tests do not\n");
+    printf("work when scaled decompression is enabled.\n\n");
+    doTile = 0;  xformOpt &= (~TJXOPT_CROP);
+  }
+
+  if (IS_CROPPED(cr)) {
+    if (!decompOnly) {
+      printf("ERROR: Partial image decompression can only be enabled for JPEG input images\n");
+      retval = -1;  goto bailout;
+    }
+    if (doTile) {
+      printf("Disabling tiled compression/decompression tests, because those tests do not\n");
+      printf("work when partial image decompression is enabled.\n\n");
+      doTile = 0;  xformOpt &= (~TJXOPT_CROP);
+    }
+    if (doYUV) {
+      printf("ERROR: -crop and -yuv are incompatible\n");
+      retval = -1;  goto bailout;
+    }
+  }
+
+  if (!noRealloc && doTile) {
+    printf("Disabling tiled compression/decompression tests, because those tests do not\n");
+    printf("work when dynamic JPEG buffer allocation is enabled.\n\n");
+    doTile = 0;  xformOpt &= (~TJXOPT_CROP);
+  }
+
+  if (!decompOnly) {
+    if ((handle = tj3Init(TJINIT_COMPRESS)) == NULL)
+      THROW_TJG();
+    if (tj3Set(handle, TJPARAM_STOPONWARNING, stopOnWarning) == -1)
+      THROW_TJ();
+    if (tj3Set(handle, TJPARAM_BOTTOMUP, bottomUp) == -1)
+      THROW_TJ();
+    if (tj3Set(handle, TJPARAM_MAXPIXELS, maxPixels) == -1)
+      THROW_TJ();
+
+    if (precision == 8) {
+      if ((srcBuf = tj3LoadImage8(handle, argv[1], &w, 1, &h, &pf)) == NULL)
+        THROW_TJ();
+    } else if (precision == 12) {
+      if ((srcBuf = tj3LoadImage12(handle, argv[1], &w, 1, &h, &pf)) == NULL)
+        THROW_TJ();
+    } else {
+      if ((srcBuf = tj3LoadImage16(handle, argv[1], &w, 1, &h, &pf)) == NULL)
+        THROW_TJ();
+    }
+    temp = strrchr(argv[1], '.');
+    if (temp != NULL) *temp = '\0';
+  }
+
+  if (quiet == 1 && !decompOnly) {
+    printf("All performance values in Mpixels/sec\n\n");
+    printf("Pixel     JPEG      JPEG  %s  %s   ",
+           doTile ? "Tile " : "Image", doTile ? "Tile " : "Image");
+    if (doYUV) printf("Encode  ");
+    printf("Comp    Comp    Decomp  ");
+    if (doYUV) printf("Decode");
+    printf("\n");
+    printf("Format    Format    %s  Width  Height  ",
+           lossless ? "PSV " : "Qual");
+    if (doYUV) printf("Perf    ");
+    printf("Perf    Ratio   Perf    ");
+    if (doYUV) printf("Perf");
+    printf("\n\n");
+  }
+
+  if (decompOnly) {
+    decompTest(argv[1]);
+    printf("\n");
+    goto bailout;
+  }
+  if (lossless) {
+    if (minQual < 1 || minQual > 7 || maxQual < 1 || maxQual > 7) {
+      puts("ERROR: PSV must be between 1 and 7.");
+      exit(1);
+    }
+  } else {
+    if (minQual < 1 || minQual > 100 || maxQual < 1 || maxQual > 100) {
+      puts("ERROR: Quality must be between 1 and 100.");
+      exit(1);
+    }
+  }
+  if (subsamp >= 0 && subsamp < TJ_NUMSAMP) {
+    for (i = maxQual; i >= minQual; i--)
+      fullTest(handle, srcBuf, w, h, subsamp, i, argv[1]);
+    printf("\n");
+  } else {
+    if (pf != TJPF_CMYK) {
+      for (i = maxQual; i >= minQual; i--)
+        fullTest(handle, srcBuf, w, h, TJSAMP_GRAY, i, argv[1]);
+      printf("\n");
+    }
+    for (i = maxQual; i >= minQual; i--)
+      fullTest(handle, srcBuf, w, h, TJSAMP_420, i, argv[1]);
+    printf("\n");
+    for (i = maxQual; i >= minQual; i--)
+      fullTest(handle, srcBuf, w, h, TJSAMP_422, i, argv[1]);
+    printf("\n");
+    for (i = maxQual; i >= minQual; i--)
+      fullTest(handle, srcBuf, w, h, TJSAMP_444, i, argv[1]);
+    printf("\n");
+  }
+
+bailout:
+  tj3Destroy(handle);
+  tj3Free(srcBuf);
+  return retval;
+}
diff --git a/3rdparty/libjpeg-turbo/src/tjutil.c b/3rdparty/libjpeg-turbo/src/tjutil.c
new file mode 100644
index 0000000000..2018160b16
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/tjutil.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C)2011, 2019 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef _WIN32
+
+#include <windows.h>
+#include "tjutil.h"
+
+static double getFreq(void)
+{
+  LARGE_INTEGER freq;
+
+  if (!QueryPerformanceFrequency(&freq)) return 0.0;
+  return (double)freq.QuadPart;
+}
+
+static double f = -1.0;
+
+double getTime(void)
+{
+  LARGE_INTEGER t;
+
+  if (f < 0.0) f = getFreq();
+  if (f == 0.0) return (double)GetTickCount() / 1000.;
+  else {
+    QueryPerformanceCounter(&t);
+    return (double)t.QuadPart / f;
+  }
+}
+
+#else
+
+#include <stdlib.h>
+#include <sys/time.h>
+#include "tjutil.h"
+
+double getTime(void)
+{
+  struct timeval tv;
+
+  if (gettimeofday(&tv, NULL) < 0) return 0.0;
+  else return (double)tv.tv_sec + ((double)tv.tv_usec / 1000000.);
+}
+
+#endif
diff --git a/3rdparty/libjpeg-turbo/src/tjutil.h b/3rdparty/libjpeg-turbo/src/tjutil.h
new file mode 100644
index 0000000000..10272e9886
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/tjutil.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C)2011, 2022 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef _WIN32
+#ifndef strcasecmp
+#define strcasecmp  stricmp
+#endif
+#ifndef strncasecmp
+#define strncasecmp  strnicmp
+#endif
+#endif
+
+#ifdef _MSC_VER
+#define SNPRINTF(str, n, format, ...) \
+  _snprintf_s(str, n, _TRUNCATE, format, ##__VA_ARGS__)
+#else
+#define SNPRINTF  snprintf
+#endif
+
+#ifndef min
+#define min(a, b)  ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef max
+#define max(a, b)  ((a) > (b) ? (a) : (b))
+#endif
+
+extern double getTime(void);
diff --git a/3rdparty/libjpeg-turbo/src/transupp.c b/3rdparty/libjpeg-turbo/src/transupp.c
new file mode 100644
index 0000000000..62587d3865
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/transupp.c
@@ -0,0 +1,2377 @@
+/*
+ * transupp.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1997-2019, Thomas G. Lane, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2017, 2021-2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains image transformation routines and other utility code
+ * used by the jpegtran sample application.  These are NOT part of the core
+ * JPEG library.  But we keep these routines separate from jpegtran.c to
+ * ease the task of maintaining jpegtran-like programs that have other user
+ * interfaces.
+ */
+
+/* Although this file really shouldn't have access to the library internals,
+ * it's helpful to let it call jround_up() and jcopy_block_row().
+ */
+#define JPEG_INTERNALS
+
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "transupp.h"           /* My own external interface */
+#include "jpegapicomp.h"
+#include <ctype.h>              /* to declare isdigit() */
+
+
+#if JPEG_LIB_VERSION >= 70
+#define dstinfo_min_DCT_h_scaled_size  dstinfo->min_DCT_h_scaled_size
+#define dstinfo_min_DCT_v_scaled_size  dstinfo->min_DCT_v_scaled_size
+#else
+#define dstinfo_min_DCT_h_scaled_size  DCTSIZE
+#define dstinfo_min_DCT_v_scaled_size  DCTSIZE
+#endif
+
+
+#if TRANSFORMS_SUPPORTED
+
+/*
+ * Lossless image transformation routines.  These routines work on DCT
+ * coefficient arrays and thus do not require any lossy decompression
+ * or recompression of the image.
+ * Thanks to Guido Vollbeding for the initial design and code of this feature,
+ * and to Ben Jackson for introducing the cropping feature.
+ *
+ * Horizontal flipping is done in-place, using a single top-to-bottom
+ * pass through the virtual source array.  It will thus be much the
+ * fastest option for images larger than main memory.
+ *
+ * The other routines require a set of destination virtual arrays, so they
+ * need twice as much memory as jpegtran normally does.  The destination
+ * arrays are always written in normal scan order (top to bottom) because
+ * the virtual array manager expects this.  The source arrays will be scanned
+ * in the corresponding order, which means multiple passes through the source
+ * arrays for most of the transforms.  That could result in much thrashing
+ * if the image is larger than main memory.
+ *
+ * If cropping or trimming is involved, the destination arrays may be smaller
+ * than the source arrays.  Note it is not possible to do horizontal flip
+ * in-place when a nonzero Y crop offset is specified, since we'd have to move
+ * data from one block row to another but the virtual array manager doesn't
+ * guarantee we can touch more than one row at a time.  So in that case,
+ * we have to use a separate destination array.
+ *
+ * Some notes about the operating environment of the individual transform
+ * routines:
+ * 1. Both the source and destination virtual arrays are allocated from the
+ *    source JPEG object, and therefore should be manipulated by calling the
+ *    source's memory manager.
+ * 2. The destination's component count should be used.  It may be smaller
+ *    than the source's when forcing to grayscale.
+ * 3. Likewise the destination's sampling factors should be used.  When
+ *    forcing to grayscale the destination's sampling factors will be all 1,
+ *    and we may as well take that as the effective iMCU size.
+ * 4. When "trim" is in effect, the destination's dimensions will be the
+ *    trimmed values but the source's will be untrimmed.
+ * 5. When "crop" is in effect, the destination's dimensions will be the
+ *    cropped values but the source's will be uncropped.  Each transform
+ *    routine is responsible for picking up source data starting at the
+ *    correct X and Y offset for the crop region.  (The X and Y offsets
+ *    passed to the transform routines are measured in iMCU blocks of the
+ *    destination.)
+ * 6. All the routines assume that the source and destination buffers are
+ *    padded out to a full iMCU boundary.  This is true, although for the
+ *    source buffer it is an undocumented property of jdcoefct.c.
+ */
+
+
+LOCAL(void)
+dequant_comp(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+             jvirt_barray_ptr coef_array, JQUANT_TBL *qtblptr1)
+{
+  JDIMENSION blk_x, blk_y;
+  int offset_y, k;
+  JQUANT_TBL *qtblptr;
+  JBLOCKARRAY buffer;
+  JBLOCKROW block;
+  JCOEFPTR ptr;
+
+  qtblptr = compptr->quant_table;
+  for (blk_y = 0; blk_y < compptr->height_in_blocks;
+       blk_y += compptr->v_samp_factor) {
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr)cinfo, coef_array, blk_y,
+       (JDIMENSION)compptr->v_samp_factor, TRUE);
+    for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+      block = buffer[offset_y];
+      for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
+        ptr = block[blk_x];
+        for (k = 0; k < DCTSIZE2; k++)
+          if (qtblptr->quantval[k] != qtblptr1->quantval[k])
+            ptr[k] *= qtblptr->quantval[k] / qtblptr1->quantval[k];
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+requant_comp(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+             jvirt_barray_ptr coef_array, JQUANT_TBL *qtblptr1)
+{
+  JDIMENSION blk_x, blk_y;
+  int offset_y, k;
+  JQUANT_TBL *qtblptr;
+  JBLOCKARRAY buffer;
+  JBLOCKROW block;
+  JCOEFPTR ptr;
+  JCOEF temp, qval;
+
+  qtblptr = compptr->quant_table;
+  for (blk_y = 0; blk_y < compptr->height_in_blocks;
+       blk_y += compptr->v_samp_factor) {
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr)cinfo, coef_array, blk_y,
+       (JDIMENSION)compptr->v_samp_factor, TRUE);
+    for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+      block = buffer[offset_y];
+      for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
+        ptr = block[blk_x];
+        for (k = 0; k < DCTSIZE2; k++) {
+          temp = qtblptr->quantval[k];
+          qval = qtblptr1->quantval[k];
+          if (temp != qval && qval != 0) {
+            temp *= ptr[k];
+            /* The following quantization code is copied from jcdctmgr.c */
+#ifdef FAST_DIVIDE
+#define DIVIDE_BY(a, b)  a /= b
+#else
+#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
+#endif
+            if (temp < 0) {
+              temp = -temp;
+              temp += qval >> 1; /* for rounding */
+              DIVIDE_BY(temp, qval);
+              temp = -temp;
+            } else {
+              temp += qval >> 1; /* for rounding */
+              DIVIDE_BY(temp, qval);
+            }
+            ptr[k] = temp;
+          }
+        }
+      }
+    }
+  }
+}
+
+
+/*
+ * Calculate largest common denominator using Euclid's algorithm.
+ */
+LOCAL(JCOEF)
+largest_common_denominator(JCOEF a, JCOEF b)
+{
+  JCOEF c;
+
+  do {
+    c = a % b;
+    a = b;
+    b = c;
+  } while (c);
+
+  return a;
+}
+
+
+LOCAL(void)
+adjust_quant(j_decompress_ptr srcinfo, jvirt_barray_ptr *src_coef_arrays,
+             j_decompress_ptr dropinfo, jvirt_barray_ptr *drop_coef_arrays,
+             boolean trim, j_compress_ptr dstinfo)
+{
+  jpeg_component_info *compptr1, *compptr2;
+  JQUANT_TBL *qtblptr1, *qtblptr2, *qtblptr3;
+  int ci, k;
+
+  for (ci = 0; ci < dstinfo->num_components && ci < dropinfo->num_components;
+       ci++) {
+    compptr1 = srcinfo->comp_info + ci;
+    compptr2 = dropinfo->comp_info + ci;
+    qtblptr1 = compptr1->quant_table;
+    if (qtblptr1 == NULL)
+      ERREXIT1(srcinfo, JERR_NO_QUANT_TABLE, compptr1->quant_tbl_no);
+    qtblptr2 = compptr2->quant_table;
+    if (qtblptr2 == NULL)
+      ERREXIT1(dropinfo, JERR_NO_QUANT_TABLE, compptr2->quant_tbl_no);
+    for (k = 0; k < DCTSIZE2; k++) {
+      if (qtblptr1->quantval[k] != qtblptr2->quantval[k]) {
+        if (trim)
+          requant_comp(dropinfo, compptr2, drop_coef_arrays[ci], qtblptr1);
+        else {
+          qtblptr3 = dstinfo->quant_tbl_ptrs[compptr1->quant_tbl_no];
+          for (k = 0; k < DCTSIZE2; k++)
+            if (qtblptr1->quantval[k] != qtblptr2->quantval[k])
+              qtblptr3->quantval[k] =
+                largest_common_denominator(qtblptr1->quantval[k],
+                                           qtblptr2->quantval[k]);
+          dequant_comp(srcinfo, compptr1, src_coef_arrays[ci], qtblptr3);
+          dequant_comp(dropinfo, compptr2, drop_coef_arrays[ci], qtblptr3);
+        }
+        break;
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_drop(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+        JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+        jvirt_barray_ptr *src_coef_arrays,
+        j_decompress_ptr dropinfo, jvirt_barray_ptr *drop_coef_arrays,
+        JDIMENSION drop_width, JDIMENSION drop_height)
+/* Drop (insert) the contents of another image into the source image.  If the
+ * number of components in the drop image is smaller than the number of
+ * components in the destination image, then we fill in the remaining
+ * components with zero.  This allows for dropping the contents of grayscale
+ * images into (arbitrarily sampled) color images.
+ */
+{
+  JDIMENSION comp_width, comp_height;
+  JDIMENSION blk_y, x_drop_blocks, y_drop_blocks;
+  int ci, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  jpeg_component_info *compptr;
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = drop_width * compptr->h_samp_factor;
+    comp_height = drop_height * compptr->v_samp_factor;
+    x_drop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_drop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (blk_y = 0; blk_y < comp_height; blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], blk_y + y_drop_blocks,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      if (ci < dropinfo->num_components) {
+        src_buffer = (*dropinfo->mem->access_virt_barray)
+          ((j_common_ptr)dropinfo, drop_coef_arrays[ci], blk_y,
+           (JDIMENSION)compptr->v_samp_factor, FALSE);
+        for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+          jcopy_block_row(src_buffer[offset_y],
+                          dst_buffer[offset_y] + x_drop_blocks, comp_width);
+        }
+      } else {
+        for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+          memset(dst_buffer[offset_y] + x_drop_blocks, 0,
+                 comp_width * sizeof(JBLOCK));
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_crop(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+        JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+        jvirt_barray_ptr *src_coef_arrays,
+        jvirt_barray_ptr *dst_coef_arrays)
+/* Crop.  This is only used when no rotate/flip is requested with the crop. */
+{
+  JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  jpeg_component_info *compptr;
+
+  /* We simply have to copy the right amount of data (the destination's
+   * image size) starting at the given X and Y offsets in the source.
+   */
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      src_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_y + y_crop_blocks,
+         (JDIMENSION)compptr->v_samp_factor, FALSE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+                        dst_buffer[offset_y], compptr->width_in_blocks);
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_crop_ext_zero(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                 JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+                 jvirt_barray_ptr *src_coef_arrays,
+                 jvirt_barray_ptr *dst_coef_arrays)
+/* Crop.  This is only used when no rotate/flip is requested with the crop.
+ * Extension: If the destination size is larger than the source, we fill in the
+ * expanded region with zero (neutral gray).  Note that we also have to zero
+ * partial iMCUs at the right and bottom edge of the source image area in this
+ * case.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height;
+  JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_width /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_height /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      if (dstinfo->_jpeg_height > srcinfo->output_height) {
+        if (dst_blk_y < y_crop_blocks ||
+            dst_blk_y >= y_crop_blocks + comp_height) {
+          for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+            memset(dst_buffer[offset_y], 0,
+                   compptr->width_in_blocks * sizeof(JBLOCK));
+          }
+          continue;
+        }
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y - y_crop_blocks, (JDIMENSION)compptr->v_samp_factor,
+           FALSE);
+      } else {
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks, (JDIMENSION)compptr->v_samp_factor,
+           FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        if (dstinfo->_jpeg_width > srcinfo->output_width) {
+          if (x_crop_blocks > 0) {
+            memset(dst_buffer[offset_y], 0, x_crop_blocks * sizeof(JBLOCK));
+          }
+          jcopy_block_row(src_buffer[offset_y],
+                          dst_buffer[offset_y] + x_crop_blocks, comp_width);
+          if (compptr->width_in_blocks > x_crop_blocks + comp_width) {
+            memset(dst_buffer[offset_y] + x_crop_blocks + comp_width, 0,
+                   (compptr->width_in_blocks - x_crop_blocks - comp_width) *
+                   sizeof(JBLOCK));
+          }
+        } else {
+          jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+                          dst_buffer[offset_y], compptr->width_in_blocks);
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_crop_ext_flat(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                 JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+                 jvirt_barray_ptr *src_coef_arrays,
+                 jvirt_barray_ptr *dst_coef_arrays)
+/* Crop.  This is only used when no rotate/flip is requested with the crop.
+ * Extension: The destination width is larger than the source, and we fill in
+ * the expanded region with the DC coefficient of the adjacent block.  Note
+ * that we also have to fill partial iMCUs at the right and bottom edge of the
+ * source image area in this case.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height;
+  JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, offset_y;
+  JCOEF dc;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_width /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_height /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      if (dstinfo->_jpeg_height > srcinfo->output_height) {
+        if (dst_blk_y < y_crop_blocks ||
+            dst_blk_y >= y_crop_blocks + comp_height) {
+          for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+            memset(dst_buffer[offset_y], 0,
+                   compptr->width_in_blocks * sizeof(JBLOCK));
+          }
+          continue;
+        }
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y - y_crop_blocks, (JDIMENSION)compptr->v_samp_factor,
+           FALSE);
+      } else {
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks, (JDIMENSION)compptr->v_samp_factor,
+          FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        if (x_crop_blocks > 0) {
+          memset(dst_buffer[offset_y], 0, x_crop_blocks * sizeof(JBLOCK));
+          dc = src_buffer[offset_y][0][0];
+          for (dst_blk_x = 0; dst_blk_x < x_crop_blocks; dst_blk_x++) {
+            dst_buffer[offset_y][dst_blk_x][0] = dc;
+          }
+        }
+        jcopy_block_row(src_buffer[offset_y],
+                        dst_buffer[offset_y] + x_crop_blocks, comp_width);
+        if (compptr->width_in_blocks > x_crop_blocks + comp_width) {
+          memset(dst_buffer[offset_y] + x_crop_blocks + comp_width, 0,
+                 (compptr->width_in_blocks - x_crop_blocks - comp_width) *
+                 sizeof(JBLOCK));
+          dc = src_buffer[offset_y][comp_width - 1][0];
+          for (dst_blk_x = x_crop_blocks + comp_width;
+               dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+            dst_buffer[offset_y][dst_blk_x][0] = dc;
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_crop_ext_reflect(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                    JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+                    jvirt_barray_ptr *src_coef_arrays,
+                    jvirt_barray_ptr *dst_coef_arrays)
+/* Crop.  This is only used when no rotate/flip is requested with the crop.
+ * Extension: The destination width is larger than the source, and we fill in
+ * the expanded region with repeated reflections of the source image.  Note
+ * that we also have to fill partial iMCUs at the right and bottom edge of the
+ * source image area in this case.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, src_blk_x;
+  JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, k, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_width /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_height /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      if (dstinfo->_jpeg_height > srcinfo->output_height) {
+        if (dst_blk_y < y_crop_blocks ||
+            dst_blk_y >= y_crop_blocks + comp_height) {
+          for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+            memset(dst_buffer[offset_y], 0,
+                   compptr->width_in_blocks * sizeof(JBLOCK));
+          }
+          continue;
+        }
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y - y_crop_blocks, (JDIMENSION)compptr->v_samp_factor,
+           FALSE);
+      } else {
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks, (JDIMENSION)compptr->v_samp_factor,
+           FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        /* Copy source region */
+        jcopy_block_row(src_buffer[offset_y],
+                        dst_buffer[offset_y] + x_crop_blocks, comp_width);
+        if (x_crop_blocks > 0) {
+          /* Reflect to left */
+          dst_row_ptr = dst_buffer[offset_y] + x_crop_blocks;
+          for (dst_blk_x = x_crop_blocks; dst_blk_x > 0;) {
+            src_row_ptr = dst_row_ptr;      /* (re)set axis of reflection */
+            for (src_blk_x = comp_width; src_blk_x > 0 && dst_blk_x > 0;
+                 src_blk_x--, dst_blk_x--) {
+              dst_ptr = *(--dst_row_ptr);   /* destination goes left */
+              src_ptr = *src_row_ptr++;     /* source goes right */
+              /* This unrolled loop doesn't need to know which row it's on. */
+              for (k = 0; k < DCTSIZE2; k += 2) {
+                *dst_ptr++ = *src_ptr++;    /* copy even column */
+                *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign
+                                               change */
+              }
+            }
+          }
+        }
+        if (compptr->width_in_blocks > x_crop_blocks + comp_width) {
+          /* Reflect to right */
+          dst_row_ptr = dst_buffer[offset_y] + x_crop_blocks + comp_width;
+          for (dst_blk_x = compptr->width_in_blocks - x_crop_blocks - comp_width;
+               dst_blk_x > 0;) {
+            src_row_ptr = dst_row_ptr;      /* (re)set axis of reflection */
+            for (src_blk_x = comp_width; src_blk_x > 0 && dst_blk_x > 0;
+                 src_blk_x--, dst_blk_x--) {
+              dst_ptr = *dst_row_ptr++;     /* destination goes right */
+              src_ptr = *(--src_row_ptr);   /* source goes left */
+              /* This unrolled loop doesn't need to know which row it's on. */
+              for (k = 0; k < DCTSIZE2; k += 2) {
+                *dst_ptr++ = *src_ptr++;    /* copy even column */
+                *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign
+                                               change */
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_wipe(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+        JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+        jvirt_barray_ptr *src_coef_arrays,
+        JDIMENSION drop_width, JDIMENSION drop_height)
+/* Wipe - discard image contents of specified region and fill with zero
+ * (neutral gray)
+ */
+{
+  JDIMENSION x_wipe_blocks, wipe_width;
+  JDIMENSION y_wipe_blocks, wipe_bottom;
+  int ci, offset_y;
+  JBLOCKARRAY buffer;
+  jpeg_component_info *compptr;
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_wipe_blocks = x_crop_offset * compptr->h_samp_factor;
+    wipe_width = drop_width * compptr->h_samp_factor;
+    y_wipe_blocks = y_crop_offset * compptr->v_samp_factor;
+    wipe_bottom = drop_height * compptr->v_samp_factor + y_wipe_blocks;
+    for (; y_wipe_blocks < wipe_bottom;
+         y_wipe_blocks += compptr->v_samp_factor) {
+      buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], y_wipe_blocks,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        memset(buffer[offset_y] + x_wipe_blocks, 0,
+               wipe_width * sizeof(JBLOCK));
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flatten(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+           JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+           jvirt_barray_ptr *src_coef_arrays,
+           JDIMENSION drop_width, JDIMENSION drop_height)
+/* Flatten - discard image contents of specified region, similarly to wipe,
+ * but fill with the average of adjacent blocks instead of zero.
+ */
+{
+  JDIMENSION x_wipe_blocks, wipe_width, wipe_right;
+  JDIMENSION y_wipe_blocks, wipe_bottom, blk_x;
+  int ci, offset_y, dc_left_value, dc_right_value, average;
+  JBLOCKARRAY buffer;
+  jpeg_component_info *compptr;
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_wipe_blocks = x_crop_offset * compptr->h_samp_factor;
+    wipe_width = drop_width * compptr->h_samp_factor;
+    wipe_right = wipe_width + x_wipe_blocks;
+    y_wipe_blocks = y_crop_offset * compptr->v_samp_factor;
+    wipe_bottom = drop_height * compptr->v_samp_factor + y_wipe_blocks;
+    for (; y_wipe_blocks < wipe_bottom;
+         y_wipe_blocks += compptr->v_samp_factor) {
+      buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], y_wipe_blocks,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        memset(buffer[offset_y] + x_wipe_blocks, 0,
+               wipe_width * sizeof(JBLOCK));
+        if (x_wipe_blocks > 0) {
+          dc_left_value = buffer[offset_y][x_wipe_blocks - 1][0];
+          if (wipe_right < compptr->width_in_blocks) {
+            dc_right_value = buffer[offset_y][wipe_right][0];
+            average = (dc_left_value + dc_right_value) >> 1;
+          } else {
+            average = dc_left_value;
+          }
+        } else if (wipe_right < compptr->width_in_blocks) {
+          average = buffer[offset_y][wipe_right][0];
+        } else continue;
+        for (blk_x = x_wipe_blocks; blk_x < wipe_right; blk_x++) {
+          buffer[offset_y][blk_x][0] = (JCOEF)average;
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_reflect(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+           JDIMENSION x_crop_offset, jvirt_barray_ptr *src_coef_arrays,
+           JDIMENSION drop_width, JDIMENSION drop_height)
+/* Reflect - discard image contents of specified region, similarly to wipe,
+ * but fill with repeated reflections of the outside region instead of zero.
+ * NB: y_crop_offset is assumed to be zero.
+ */
+{
+  JDIMENSION x_wipe_blocks, wipe_width;
+  JDIMENSION y_wipe_blocks, wipe_bottom;
+  JDIMENSION src_blk_x, dst_blk_x;
+  int ci, k, offset_y;
+  JBLOCKARRAY buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_wipe_blocks = x_crop_offset * compptr->h_samp_factor;
+    wipe_width = drop_width * compptr->h_samp_factor;
+    wipe_bottom = drop_height * compptr->v_samp_factor;
+    for (y_wipe_blocks = 0; y_wipe_blocks < wipe_bottom;
+         y_wipe_blocks += compptr->v_samp_factor) {
+      buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], y_wipe_blocks,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        if (x_wipe_blocks > 0) {
+          /* Reflect from left */
+          dst_row_ptr = buffer[offset_y] + x_wipe_blocks;
+          for (dst_blk_x = wipe_width; dst_blk_x > 0;) {
+            src_row_ptr = dst_row_ptr;     /* (re)set axis of reflection */
+            for (src_blk_x = x_wipe_blocks;
+                 src_blk_x > 0 && dst_blk_x > 0; src_blk_x--, dst_blk_x--) {
+              dst_ptr = *dst_row_ptr++;    /* destination goes right */
+              src_ptr = *(--src_row_ptr);  /* source goes left */
+              /* this unrolled loop doesn't need to know which row it's on... */
+              for (k = 0; k < DCTSIZE2; k += 2) {
+                *dst_ptr++ = *src_ptr++;   /* copy even column */
+                *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign change */
+              }
+            }
+          }
+        } else if (compptr->width_in_blocks > x_wipe_blocks + wipe_width) {
+          /* Reflect from right */
+          dst_row_ptr = buffer[offset_y] + x_wipe_blocks + wipe_width;
+          for (dst_blk_x = wipe_width; dst_blk_x > 0;) {
+            src_row_ptr = dst_row_ptr;     /* (re)set axis of reflection */
+            src_blk_x = compptr->width_in_blocks - x_wipe_blocks - wipe_width;
+            for (; src_blk_x > 0 && dst_blk_x > 0; src_blk_x--, dst_blk_x--) {
+              dst_ptr = *(--dst_row_ptr);  /* destination goes left */
+              src_ptr = *src_row_ptr++;    /* source goes right */
+              /* this unrolled loop doesn't need to know which row it's on... */
+              for (k = 0; k < DCTSIZE2; k += 2) {
+                *dst_ptr++ = *src_ptr++;   /* copy even column */
+                *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign change */
+              }
+            }
+          }
+        } else {
+          memset(buffer[offset_y] + x_wipe_blocks, 0,
+                 wipe_width * sizeof(JBLOCK));
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flip_h_no_crop(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                  JDIMENSION x_crop_offset, jvirt_barray_ptr *src_coef_arrays)
+/* Horizontal flip; done in-place, so no separate dest array is required.
+ * NB: this only works when y_crop_offset is zero.
+ */
+{
+  JDIMENSION MCU_cols, comp_width, blk_x, blk_y, x_crop_blocks;
+  int ci, k, offset_y;
+  JBLOCKARRAY buffer;
+  JCOEFPTR ptr1, ptr2;
+  JCOEF temp1, temp2;
+  jpeg_component_info *compptr;
+
+  /* Horizontal mirroring of DCT blocks is accomplished by swapping
+   * pairs of blocks in-place.  Within a DCT block, we perform horizontal
+   * mirroring by changing the signs of odd-numbered columns.
+   * Partial iMCUs at the right edge are left untouched.
+   */
+  MCU_cols = srcinfo->output_width /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    for (blk_y = 0; blk_y < compptr->height_in_blocks;
+         blk_y += compptr->v_samp_factor) {
+      buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        /* Do the mirroring */
+        for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) {
+          ptr1 = buffer[offset_y][blk_x];
+          ptr2 = buffer[offset_y][comp_width - blk_x - 1];
+          /* this unrolled loop doesn't need to know which row it's on... */
+          for (k = 0; k < DCTSIZE2; k += 2) {
+            temp1 = *ptr1;      /* swap even column */
+            temp2 = *ptr2;
+            *ptr1++ = temp2;
+            *ptr2++ = temp1;
+            temp1 = *ptr1;      /* swap odd column with sign change */
+            temp2 = *ptr2;
+            *ptr1++ = -temp2;
+            *ptr2++ = -temp1;
+          }
+        }
+        if (x_crop_blocks > 0) {
+          /* Now left-justify the portion of the data to be kept.
+           * We can't use a single jcopy_block_row() call because that routine
+           * depends on memcpy(), whose behavior is unspecified for overlapping
+           * source and destination areas.  Sigh.
+           */
+          for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
+            jcopy_block_row(buffer[offset_y] + blk_x + x_crop_blocks,
+                            buffer[offset_y] + blk_x, (JDIMENSION)1);
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flip_h(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+          JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+          jvirt_barray_ptr *src_coef_arrays,
+          jvirt_barray_ptr *dst_coef_arrays)
+/* Horizontal flip in general cropping case */
+{
+  JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, k, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Here we must output into a separate array because we can't touch
+   * different rows of a single virtual array simultaneously.  Otherwise,
+   * this is essentially the same as the routine above.
+   */
+  MCU_cols = srcinfo->output_width /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      src_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_y + y_crop_blocks,
+         (JDIMENSION)compptr->v_samp_factor, FALSE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        dst_row_ptr = dst_buffer[offset_y];
+        src_row_ptr = src_buffer[offset_y];
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x++) {
+          if (x_crop_blocks + dst_blk_x < comp_width) {
+            /* Do the mirrorable blocks */
+            dst_ptr = dst_row_ptr[dst_blk_x];
+            src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+            /* this unrolled loop doesn't need to know which row it's on... */
+            for (k = 0; k < DCTSIZE2; k += 2) {
+              *dst_ptr++ = *src_ptr++;    /* copy even column */
+              *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign
+                                             change */
+            }
+          } else {
+            /* Copy last partial block(s) verbatim */
+            jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
+                            dst_row_ptr + dst_blk_x, (JDIMENSION)1);
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flip_v(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+          JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+          jvirt_barray_ptr *src_coef_arrays,
+          jvirt_barray_ptr *dst_coef_arrays)
+/* Vertical flip */
+{
+  JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* We output into a separate array because we can't touch different
+   * rows of the source virtual array simultaneously.  Otherwise, this
+   * is a pretty straightforward analog of horizontal flip.
+   * Within a DCT block, vertical mirroring is done by changing the signs
+   * of odd-numbered rows.
+   * Partial iMCUs at the bottom edge are copied verbatim.
+   */
+  MCU_rows = srcinfo->output_height /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      if (y_crop_blocks + dst_blk_y < comp_height) {
+        /* Row is within the mirrorable area. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           comp_height - y_crop_blocks - dst_blk_y -
+           (JDIMENSION)compptr->v_samp_factor,
+           (JDIMENSION)compptr->v_samp_factor, FALSE);
+      } else {
+        /* Bottom-edge blocks will be copied verbatim. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks,
+           (JDIMENSION)compptr->v_samp_factor, FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        if (y_crop_blocks + dst_blk_y < comp_height) {
+          /* Row is within the mirrorable area. */
+          dst_row_ptr = dst_buffer[offset_y];
+          src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
+          src_row_ptr += x_crop_blocks;
+          for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+               dst_blk_x++) {
+            dst_ptr = dst_row_ptr[dst_blk_x];
+            src_ptr = src_row_ptr[dst_blk_x];
+            for (i = 0; i < DCTSIZE; i += 2) {
+              /* copy even row */
+              for (j = 0; j < DCTSIZE; j++)
+                *dst_ptr++ = *src_ptr++;
+              /* copy odd row with sign change */
+              for (j = 0; j < DCTSIZE; j++)
+                *dst_ptr++ = -(*src_ptr++);
+            }
+          }
+        } else {
+          /* Just copy row verbatim. */
+          jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+                          dst_buffer[offset_y], compptr->width_in_blocks);
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_transpose(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+             JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+             jvirt_barray_ptr *src_coef_arrays,
+             jvirt_barray_ptr *dst_coef_arrays)
+/* Transpose source into destination */
+{
+  JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_x, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Transposing pixels within a block just requires transposing the
+   * DCT coefficients.
+   * Partial iMCUs at the edges require no special treatment; we simply
+   * process all the available DCT blocks for every component.
+   */
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          src_buffer = (*srcinfo->mem->access_virt_barray)
+            ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+             dst_blk_x + x_crop_blocks,
+             (JDIMENSION)compptr->h_samp_factor, FALSE);
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            src_ptr =
+              src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks];
+            for (i = 0; i < DCTSIZE; i++)
+              for (j = 0; j < DCTSIZE; j++)
+                dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_rot_90(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+          JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+          jvirt_barray_ptr *src_coef_arrays,
+          jvirt_barray_ptr *dst_coef_arrays)
+/* 90 degree rotation is equivalent to
+ *   1. Transposing the image;
+ *   2. Horizontal mirroring.
+ * These two steps are merged into a single processing routine.
+ */
+{
+  JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_x, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Because of the horizontal mirror step, we can't process partial iMCUs
+   * at the (output) right edge properly.  They just get transposed and
+   * not mirrored.
+   */
+  MCU_cols = srcinfo->output_height /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          if (x_crop_blocks + dst_blk_x < comp_width) {
+            /* Block is within the mirrorable area. */
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+               comp_width - x_crop_blocks - dst_blk_x -
+               (JDIMENSION)compptr->h_samp_factor,
+               (JDIMENSION)compptr->h_samp_factor, FALSE);
+          } else {
+            /* Edge blocks are transposed but not mirrored. */
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+               dst_blk_x + x_crop_blocks,
+               (JDIMENSION)compptr->h_samp_factor, FALSE);
+          }
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            if (x_crop_blocks + dst_blk_x < comp_width) {
+              /* Block is within the mirrorable area. */
+              src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+                [dst_blk_y + offset_y + y_crop_blocks];
+              for (i = 0; i < DCTSIZE; i++) {
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+                i++;
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j];
+              }
+            } else {
+              /* Edge blocks are transposed but not mirrored. */
+              src_ptr = src_buffer[offset_x]
+                [dst_blk_y + offset_y + y_crop_blocks];
+              for (i = 0; i < DCTSIZE; i++)
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_rot_270(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+           JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+           jvirt_barray_ptr *src_coef_arrays,
+           jvirt_barray_ptr *dst_coef_arrays)
+/* 270 degree rotation is equivalent to
+ *   1. Horizontal mirroring;
+ *   2. Transposing the image.
+ * These two steps are merged into a single processing routine.
+ */
+{
+  JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_x, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Because of the horizontal mirror step, we can't process partial iMCUs
+   * at the (output) bottom edge properly.  They just get transposed and
+   * not mirrored.
+   */
+  MCU_rows = srcinfo->output_width /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          src_buffer = (*srcinfo->mem->access_virt_barray)
+            ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+             dst_blk_x + x_crop_blocks,
+             (JDIMENSION)compptr->h_samp_factor, FALSE);
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            if (y_crop_blocks + dst_blk_y < comp_height) {
+              /* Block is within the mirrorable area. */
+              src_ptr = src_buffer[offset_x]
+                [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+              for (i = 0; i < DCTSIZE; i++) {
+                for (j = 0; j < DCTSIZE; j++) {
+                  dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+                  j++;
+                  dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j];
+                }
+              }
+            } else {
+              /* Edge blocks are transposed but not mirrored. */
+              src_ptr = src_buffer[offset_x]
+                [dst_blk_y + offset_y + y_crop_blocks];
+              for (i = 0; i < DCTSIZE; i++)
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_rot_180(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+           JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+           jvirt_barray_ptr *src_coef_arrays,
+           jvirt_barray_ptr *dst_coef_arrays)
+/* 180 degree rotation is equivalent to
+ *   1. Vertical mirroring;
+ *   2. Horizontal mirroring.
+ * These two steps are merged into a single processing routine.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_width /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_height /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      if (y_crop_blocks + dst_blk_y < comp_height) {
+        /* Row is within the vertically mirrorable area. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           comp_height - y_crop_blocks - dst_blk_y -
+           (JDIMENSION)compptr->v_samp_factor,
+           (JDIMENSION)compptr->v_samp_factor, FALSE);
+      } else {
+        /* Bottom-edge rows are only mirrored horizontally. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks,
+           (JDIMENSION)compptr->v_samp_factor, FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        dst_row_ptr = dst_buffer[offset_y];
+        if (y_crop_blocks + dst_blk_y < comp_height) {
+          /* Row is within the mirrorable area. */
+          src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
+          for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+               dst_blk_x++) {
+            dst_ptr = dst_row_ptr[dst_blk_x];
+            if (x_crop_blocks + dst_blk_x < comp_width) {
+              /* Process the blocks that can be mirrored both ways. */
+              src_ptr =
+                src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+              for (i = 0; i < DCTSIZE; i += 2) {
+                /* For even row, negate every odd column. */
+                for (j = 0; j < DCTSIZE; j += 2) {
+                  *dst_ptr++ = *src_ptr++;
+                  *dst_ptr++ = -(*src_ptr++);
+                }
+                /* For odd row, negate every even column. */
+                for (j = 0; j < DCTSIZE; j += 2) {
+                  *dst_ptr++ = -(*src_ptr++);
+                  *dst_ptr++ = *src_ptr++;
+                }
+              }
+            } else {
+              /* Any remaining right-edge blocks are only mirrored vertically. */
+              src_ptr = src_row_ptr[x_crop_blocks + dst_blk_x];
+              for (i = 0; i < DCTSIZE; i += 2) {
+                for (j = 0; j < DCTSIZE; j++)
+                  *dst_ptr++ = *src_ptr++;
+                for (j = 0; j < DCTSIZE; j++)
+                  *dst_ptr++ = -(*src_ptr++);
+              }
+            }
+          }
+        } else {
+          /* Remaining rows are just mirrored horizontally. */
+          src_row_ptr = src_buffer[offset_y];
+          for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+               dst_blk_x++) {
+            if (x_crop_blocks + dst_blk_x < comp_width) {
+              /* Process the blocks that can be mirrored. */
+              dst_ptr = dst_row_ptr[dst_blk_x];
+              src_ptr =
+                src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+              for (i = 0; i < DCTSIZE2; i += 2) {
+                *dst_ptr++ = *src_ptr++;
+                *dst_ptr++ = -(*src_ptr++);
+              }
+            } else {
+              /* Any remaining right-edge blocks are only copied. */
+              jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
+                              dst_row_ptr + dst_blk_x, (JDIMENSION)1);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_transverse(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+              JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+              jvirt_barray_ptr *src_coef_arrays,
+              jvirt_barray_ptr *dst_coef_arrays)
+/* Transverse transpose is equivalent to
+ *   1. 180 degree rotation;
+ *   2. Transposition;
+ * or
+ *   1. Horizontal mirroring;
+ *   2. Transposition;
+ *   3. Horizontal mirroring.
+ * These steps are merged into a single processing routine.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_x, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_height /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_width /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          if (x_crop_blocks + dst_blk_x < comp_width) {
+            /* Block is within the mirrorable area. */
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+               comp_width - x_crop_blocks - dst_blk_x -
+               (JDIMENSION)compptr->h_samp_factor,
+               (JDIMENSION)compptr->h_samp_factor, FALSE);
+          } else {
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+               dst_blk_x + x_crop_blocks,
+               (JDIMENSION)compptr->h_samp_factor, FALSE);
+          }
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            if (y_crop_blocks + dst_blk_y < comp_height) {
+              if (x_crop_blocks + dst_blk_x < comp_width) {
+                /* Block is within the mirrorable area. */
+                src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+                  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+                for (i = 0; i < DCTSIZE; i++) {
+                  for (j = 0; j < DCTSIZE; j++) {
+                    dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+                    j++;
+                    dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j];
+                  }
+                  i++;
+                  for (j = 0; j < DCTSIZE; j++) {
+                    dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j];
+                    j++;
+                    dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+                  }
+                }
+              } else {
+                /* Right-edge blocks are mirrored in y only */
+                src_ptr = src_buffer[offset_x]
+                  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+                for (i = 0; i < DCTSIZE; i++) {
+                  for (j = 0; j < DCTSIZE; j++) {
+                    dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+                    j++;
+                    dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j];
+                  }
+                }
+              }
+            } else {
+              if (x_crop_blocks + dst_blk_x < comp_width) {
+                /* Bottom-edge blocks are mirrored in x only */
+                src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+                  [dst_blk_y + offset_y + y_crop_blocks];
+                for (i = 0; i < DCTSIZE; i++) {
+                  for (j = 0; j < DCTSIZE; j++)
+                    dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+                  i++;
+                  for (j = 0; j < DCTSIZE; j++)
+                    dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j];
+                }
+              } else {
+                /* At lower right corner, just transpose, no mirroring */
+                src_ptr = src_buffer[offset_x]
+                  [dst_blk_y + offset_y + y_crop_blocks];
+                for (i = 0; i < DCTSIZE; i++)
+                  for (j = 0; j < DCTSIZE; j++)
+                    dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+/* Parse an unsigned integer: subroutine for jtransform_parse_crop_spec.
+ * Returns TRUE if valid integer found, FALSE if not.
+ * *strptr is advanced over the digit string, and *result is set to its value.
+ */
+
+LOCAL(boolean)
+jt_read_integer(const char **strptr, JDIMENSION *result)
+{
+  const char *ptr = *strptr;
+  JDIMENSION val = 0;
+
+  for (; isdigit(*ptr); ptr++) {
+    val = val * 10 + (JDIMENSION)(*ptr - '0');
+  }
+  *result = val;
+  if (ptr == *strptr)
+    return FALSE;               /* oops, no digits */
+  *strptr = ptr;
+  return TRUE;
+}
+
+
+/* Parse a crop specification (written in X11 geometry style).
+ * The routine returns TRUE if the spec string is valid, FALSE if not.
+ *
+ * The crop spec string should have the format
+ *      <width>[{fr}]x<height>[{fr}]{+-}<xoffset>{+-}<yoffset>
+ * where width, height, xoffset, and yoffset are unsigned integers.
+ * Each of the elements can be omitted to indicate a default value.
+ * (A weakness of this style is that it is not possible to omit xoffset
+ * while specifying yoffset, since they look alike.)
+ *
+ * This code is loosely based on XParseGeometry from the X11 distribution.
+ */
+
+GLOBAL(boolean)
+jtransform_parse_crop_spec(jpeg_transform_info *info, const char *spec)
+{
+  info->crop = FALSE;
+  info->crop_width_set = JCROP_UNSET;
+  info->crop_height_set = JCROP_UNSET;
+  info->crop_xoffset_set = JCROP_UNSET;
+  info->crop_yoffset_set = JCROP_UNSET;
+
+  if (isdigit(*spec)) {
+    /* fetch width */
+    if (!jt_read_integer(&spec, &info->crop_width))
+      return FALSE;
+    if (*spec == 'f' || *spec == 'F') {
+      spec++;
+      info->crop_width_set = JCROP_FORCE;
+    } else if (*spec == 'r' || *spec == 'R') {
+      spec++;
+      info->crop_width_set = JCROP_REFLECT;
+    } else
+      info->crop_width_set = JCROP_POS;
+  }
+  if (*spec == 'x' || *spec == 'X') {
+    /* fetch height */
+    spec++;
+    if (!jt_read_integer(&spec, &info->crop_height))
+      return FALSE;
+    if (*spec == 'f' || *spec == 'F') {
+      spec++;
+      info->crop_height_set = JCROP_FORCE;
+    } else if (*spec == 'r' || *spec == 'R') {
+      spec++;
+      info->crop_height_set = JCROP_REFLECT;
+    } else
+      info->crop_height_set = JCROP_POS;
+  }
+  if (*spec == '+' || *spec == '-') {
+    /* fetch xoffset */
+    info->crop_xoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
+    spec++;
+    if (!jt_read_integer(&spec, &info->crop_xoffset))
+      return FALSE;
+  }
+  if (*spec == '+' || *spec == '-') {
+    /* fetch yoffset */
+    info->crop_yoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
+    spec++;
+    if (!jt_read_integer(&spec, &info->crop_yoffset))
+      return FALSE;
+  }
+  /* We had better have gotten to the end of the string. */
+  if (*spec != '\0')
+    return FALSE;
+  info->crop = TRUE;
+  return TRUE;
+}
+
+
+/* Trim off any partial iMCUs on the indicated destination edge */
+
+LOCAL(void)
+trim_right_edge(jpeg_transform_info *info, JDIMENSION full_width)
+{
+  JDIMENSION MCU_cols;
+
+  MCU_cols = info->output_width / info->iMCU_sample_width;
+  if (MCU_cols > 0 && info->x_crop_offset + MCU_cols ==
+      full_width / info->iMCU_sample_width)
+    info->output_width = MCU_cols * info->iMCU_sample_width;
+}
+
+LOCAL(void)
+trim_bottom_edge(jpeg_transform_info *info, JDIMENSION full_height)
+{
+  JDIMENSION MCU_rows;
+
+  MCU_rows = info->output_height / info->iMCU_sample_height;
+  if (MCU_rows > 0 && info->y_crop_offset + MCU_rows ==
+      full_height / info->iMCU_sample_height)
+    info->output_height = MCU_rows * info->iMCU_sample_height;
+}
+
+
+/* Request any required workspace.
+ *
+ * This routine figures out the size that the output image will be
+ * (which implies that all the transform parameters must be set before
+ * it is called).
+ *
+ * We allocate the workspace virtual arrays from the source decompression
+ * object, so that all the arrays (both the original data and the workspace)
+ * will be taken into account while making memory management decisions.
+ * Hence, this routine must be called after jpeg_read_header (which reads
+ * the image dimensions) and before jpeg_read_coefficients (which realizes
+ * the source's virtual arrays).
+ *
+ * This function returns FALSE right away if -perfect is given
+ * and transformation is not perfect.  Otherwise returns TRUE.
+ */
+
+GLOBAL(boolean)
+jtransform_request_workspace(j_decompress_ptr srcinfo,
+                             jpeg_transform_info *info)
+{
+  jvirt_barray_ptr *coef_arrays;
+  boolean need_workspace, transpose_it;
+  jpeg_component_info *compptr;
+  JDIMENSION xoffset, yoffset, dtemp;
+  JDIMENSION width_in_iMCUs, height_in_iMCUs;
+  JDIMENSION width_in_blocks, height_in_blocks;
+  int itemp, ci, h_samp_factor, v_samp_factor;
+
+  /* Determine number of components in output image */
+  if (info->force_grayscale &&
+      srcinfo->jpeg_color_space == JCS_YCbCr &&
+      srcinfo->num_components == 3)
+    /* We'll only process the first component */
+    info->num_components = 1;
+  else
+    /* Process all the components */
+    info->num_components = srcinfo->num_components;
+
+  /* Compute output image dimensions and related values. */
+#if JPEG_LIB_VERSION >= 80
+  jpeg_core_output_dimensions(srcinfo);
+#else
+  srcinfo->output_width = srcinfo->image_width;
+  srcinfo->output_height = srcinfo->image_height;
+#endif
+
+  /* Return right away if -perfect is given and transformation is not perfect.
+   */
+  if (info->perfect) {
+    if (info->num_components == 1) {
+      if (!jtransform_perfect_transform(srcinfo->output_width,
+          srcinfo->output_height,
+          srcinfo->_min_DCT_h_scaled_size,
+          srcinfo->_min_DCT_v_scaled_size,
+          info->transform))
+        return FALSE;
+    } else {
+      if (!jtransform_perfect_transform(srcinfo->output_width,
+          srcinfo->output_height,
+          srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size,
+          srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size,
+          info->transform))
+        return FALSE;
+    }
+  }
+
+  /* If there is only one output component, force the iMCU size to be 1;
+   * else use the source iMCU size.  (This allows us to do the right thing
+   * when reducing color to grayscale, and also provides a handy way of
+   * cleaning up "funny" grayscale images whose sampling factors are not 1x1.)
+   */
+  switch (info->transform) {
+  case JXFORM_TRANSPOSE:
+  case JXFORM_TRANSVERSE:
+  case JXFORM_ROT_90:
+  case JXFORM_ROT_270:
+    info->output_width = srcinfo->output_height;
+    info->output_height = srcinfo->output_width;
+    if (info->num_components == 1) {
+      info->iMCU_sample_width = srcinfo->_min_DCT_v_scaled_size;
+      info->iMCU_sample_height = srcinfo->_min_DCT_h_scaled_size;
+    } else {
+      info->iMCU_sample_width =
+        srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size;
+      info->iMCU_sample_height =
+        srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size;
+    }
+    break;
+  default:
+    info->output_width = srcinfo->output_width;
+    info->output_height = srcinfo->output_height;
+    if (info->num_components == 1) {
+      info->iMCU_sample_width = srcinfo->_min_DCT_h_scaled_size;
+      info->iMCU_sample_height = srcinfo->_min_DCT_v_scaled_size;
+    } else {
+      info->iMCU_sample_width =
+        srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size;
+      info->iMCU_sample_height =
+        srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size;
+    }
+    break;
+  }
+
+  /* If cropping has been requested, compute the crop area's position and
+   * dimensions, ensuring that its upper left corner falls at an iMCU boundary.
+   */
+  if (info->crop) {
+    /* Insert default values for unset crop parameters */
+    if (info->crop_xoffset_set == JCROP_UNSET)
+      info->crop_xoffset = 0;   /* default to +0 */
+    if (info->crop_yoffset_set == JCROP_UNSET)
+      info->crop_yoffset = 0;   /* default to +0 */
+    if (info->crop_width_set == JCROP_UNSET) {
+      if (info->crop_xoffset >= info->output_width)
+        ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      info->crop_width = info->output_width - info->crop_xoffset;
+    } else {
+      /* Check for crop extension */
+      if (info->crop_width > info->output_width) {
+        /* Crop extension does not work when transforming! */
+        if (info->transform != JXFORM_NONE ||
+            info->crop_xoffset >= info->crop_width ||
+            info->crop_xoffset > info->crop_width - info->output_width)
+          ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      } else {
+        if (info->crop_xoffset >= info->output_width ||
+            info->crop_width <= 0 ||
+            info->crop_xoffset > info->output_width - info->crop_width)
+          ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      }
+    }
+    if (info->crop_height_set == JCROP_UNSET) {
+      if (info->crop_yoffset >= info->output_height)
+        ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      info->crop_height = info->output_height - info->crop_yoffset;
+    } else {
+      /* Check for crop extension */
+      if (info->crop_height > info->output_height) {
+        /* Crop extension does not work when transforming! */
+        if (info->transform != JXFORM_NONE ||
+            info->crop_yoffset >= info->crop_height ||
+            info->crop_yoffset > info->crop_height - info->output_height)
+          ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      } else {
+        if (info->crop_yoffset >= info->output_height ||
+            info->crop_height <= 0 ||
+            info->crop_yoffset > info->output_height - info->crop_height)
+          ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      }
+    }
+    /* Convert negative crop offsets into regular offsets */
+    if (info->crop_xoffset_set != JCROP_NEG)
+      xoffset = info->crop_xoffset;
+    else if (info->crop_width > info->output_width) /* crop extension */
+      xoffset = info->crop_width - info->output_width - info->crop_xoffset;
+    else
+      xoffset = info->output_width - info->crop_width - info->crop_xoffset;
+    if (info->crop_yoffset_set != JCROP_NEG)
+      yoffset = info->crop_yoffset;
+    else if (info->crop_height > info->output_height) /* crop extension */
+      yoffset = info->crop_height - info->output_height - info->crop_yoffset;
+    else
+      yoffset = info->output_height - info->crop_height - info->crop_yoffset;
+    /* Now adjust so that upper left corner falls at an iMCU boundary */
+    switch (info->transform) {
+    case JXFORM_DROP:
+      /* Ensure the effective drop region will not exceed the requested */
+      itemp = info->iMCU_sample_width;
+      dtemp = itemp - 1 - ((xoffset + itemp - 1) % itemp);
+      xoffset += dtemp;
+      if (info->crop_width <= dtemp)
+        info->drop_width = 0;
+      else if (xoffset + info->crop_width - dtemp == info->output_width)
+        /* Matching right edge: include partial iMCU */
+        info->drop_width = (info->crop_width - dtemp + itemp - 1) / itemp;
+      else
+        info->drop_width = (info->crop_width - dtemp) / itemp;
+      itemp = info->iMCU_sample_height;
+      dtemp = itemp - 1 - ((yoffset + itemp - 1) % itemp);
+      yoffset += dtemp;
+      if (info->crop_height <= dtemp)
+        info->drop_height = 0;
+      else if (yoffset + info->crop_height - dtemp == info->output_height)
+        /* Matching bottom edge: include partial iMCU */
+        info->drop_height = (info->crop_height - dtemp + itemp - 1) / itemp;
+      else
+        info->drop_height = (info->crop_height - dtemp) / itemp;
+      /* Check if sampling factors match for dropping */
+      if (info->drop_width != 0 && info->drop_height != 0)
+        for (ci = 0; ci < info->num_components &&
+                     ci < info->drop_ptr->num_components; ci++) {
+          if (info->drop_ptr->comp_info[ci].h_samp_factor *
+              srcinfo->max_h_samp_factor !=
+              srcinfo->comp_info[ci].h_samp_factor *
+              info->drop_ptr->max_h_samp_factor)
+            ERREXIT6(srcinfo, JERR_BAD_DROP_SAMPLING, ci,
+              info->drop_ptr->comp_info[ci].h_samp_factor,
+              info->drop_ptr->max_h_samp_factor,
+              srcinfo->comp_info[ci].h_samp_factor,
+              srcinfo->max_h_samp_factor, 'h');
+          if (info->drop_ptr->comp_info[ci].v_samp_factor *
+              srcinfo->max_v_samp_factor !=
+              srcinfo->comp_info[ci].v_samp_factor *
+              info->drop_ptr->max_v_samp_factor)
+            ERREXIT6(srcinfo, JERR_BAD_DROP_SAMPLING, ci,
+              info->drop_ptr->comp_info[ci].v_samp_factor,
+              info->drop_ptr->max_v_samp_factor,
+              srcinfo->comp_info[ci].v_samp_factor,
+              srcinfo->max_v_samp_factor, 'v');
+        }
+      break;
+    case JXFORM_WIPE:
+      /* Ensure the effective wipe region will cover the requested */
+      info->drop_width = (JDIMENSION)jdiv_round_up
+        ((long)(info->crop_width + (xoffset % info->iMCU_sample_width)),
+         (long)info->iMCU_sample_width);
+      info->drop_height = (JDIMENSION)jdiv_round_up
+        ((long)(info->crop_height + (yoffset % info->iMCU_sample_height)),
+         (long)info->iMCU_sample_height);
+      break;
+    default:
+      /* Ensure the effective crop region will cover the requested */
+      if (info->crop_width_set == JCROP_FORCE ||
+          info->crop_width > info->output_width)
+        info->output_width = info->crop_width;
+      else
+        info->output_width =
+          info->crop_width + (xoffset % info->iMCU_sample_width);
+      if (info->crop_height_set == JCROP_FORCE ||
+          info->crop_height > info->output_height)
+        info->output_height = info->crop_height;
+      else
+        info->output_height =
+          info->crop_height + (yoffset % info->iMCU_sample_height);
+    }
+    /* Save x/y offsets measured in iMCUs */
+    info->x_crop_offset = xoffset / info->iMCU_sample_width;
+    info->y_crop_offset = yoffset / info->iMCU_sample_height;
+  } else {
+    info->x_crop_offset = 0;
+    info->y_crop_offset = 0;
+  }
+
+  /* Figure out whether we need workspace arrays,
+   * and if so whether they are transposed relative to the source.
+   */
+  need_workspace = FALSE;
+  transpose_it = FALSE;
+  switch (info->transform) {
+  case JXFORM_NONE:
+    if (info->x_crop_offset != 0 || info->y_crop_offset != 0 ||
+        info->output_width > srcinfo->output_width ||
+        info->output_height > srcinfo->output_height)
+      need_workspace = TRUE;
+    /* No workspace needed if neither cropping nor transforming */
+    break;
+  case JXFORM_FLIP_H:
+    if (info->trim)
+      trim_right_edge(info, srcinfo->output_width);
+    if (info->y_crop_offset != 0 || info->slow_hflip)
+      need_workspace = TRUE;
+    /* do_flip_h_no_crop doesn't need a workspace array */
+    break;
+  case JXFORM_FLIP_V:
+    if (info->trim)
+      trim_bottom_edge(info, srcinfo->output_height);
+    /* Need workspace arrays having same dimensions as source image. */
+    need_workspace = TRUE;
+    break;
+  case JXFORM_TRANSPOSE:
+    /* transpose does NOT have to trim anything */
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_TRANSVERSE:
+    if (info->trim) {
+      trim_right_edge(info, srcinfo->output_height);
+      trim_bottom_edge(info, srcinfo->output_width);
+    }
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_ROT_90:
+    if (info->trim)
+      trim_right_edge(info, srcinfo->output_height);
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_ROT_180:
+    if (info->trim) {
+      trim_right_edge(info, srcinfo->output_width);
+      trim_bottom_edge(info, srcinfo->output_height);
+    }
+    /* Need workspace arrays having same dimensions as source image. */
+    need_workspace = TRUE;
+    break;
+  case JXFORM_ROT_270:
+    if (info->trim)
+      trim_bottom_edge(info, srcinfo->output_width);
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_WIPE:
+    break;
+  case JXFORM_DROP:
+    break;
+  }
+
+  /* Allocate workspace if needed.
+   * Note that we allocate arrays padded out to the next iMCU boundary,
+   * so that transform routines need not worry about missing edge blocks.
+   */
+  if (need_workspace) {
+    coef_arrays = (jvirt_barray_ptr *)
+      (*srcinfo->mem->alloc_small) ((j_common_ptr)srcinfo, JPOOL_IMAGE,
+                sizeof(jvirt_barray_ptr) * info->num_components);
+    width_in_iMCUs = (JDIMENSION)
+      jdiv_round_up((long)info->output_width, (long)info->iMCU_sample_width);
+    height_in_iMCUs = (JDIMENSION)
+      jdiv_round_up((long)info->output_height, (long)info->iMCU_sample_height);
+    for (ci = 0; ci < info->num_components; ci++) {
+      compptr = srcinfo->comp_info + ci;
+      if (info->num_components == 1) {
+        /* we're going to force samp factors to 1x1 in this case */
+        h_samp_factor = v_samp_factor = 1;
+      } else if (transpose_it) {
+        h_samp_factor = compptr->v_samp_factor;
+        v_samp_factor = compptr->h_samp_factor;
+      } else {
+        h_samp_factor = compptr->h_samp_factor;
+        v_samp_factor = compptr->v_samp_factor;
+      }
+      width_in_blocks = width_in_iMCUs * h_samp_factor;
+      height_in_blocks = height_in_iMCUs * v_samp_factor;
+      coef_arrays[ci] = (*srcinfo->mem->request_virt_barray)
+        ((j_common_ptr)srcinfo, JPOOL_IMAGE, FALSE,
+         width_in_blocks, height_in_blocks, (JDIMENSION)v_samp_factor);
+    }
+    info->workspace_coef_arrays = coef_arrays;
+  } else
+    info->workspace_coef_arrays = NULL;
+
+  return TRUE;
+}
+
+
+/* Transpose destination image parameters */
+
+LOCAL(void)
+transpose_critical_parameters(j_compress_ptr dstinfo)
+{
+  int tblno, i, j, ci, itemp;
+  jpeg_component_info *compptr;
+  JQUANT_TBL *qtblptr;
+  JDIMENSION jtemp;
+  UINT16 qtemp;
+
+  /* Transpose image dimensions */
+  jtemp = dstinfo->image_width;
+  dstinfo->image_width = dstinfo->image_height;
+  dstinfo->image_height = jtemp;
+#if JPEG_LIB_VERSION >= 70
+  itemp = dstinfo->min_DCT_h_scaled_size;
+  dstinfo->min_DCT_h_scaled_size = dstinfo->min_DCT_v_scaled_size;
+  dstinfo->min_DCT_v_scaled_size = itemp;
+#endif
+
+  /* Transpose sampling factors */
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    itemp = compptr->h_samp_factor;
+    compptr->h_samp_factor = compptr->v_samp_factor;
+    compptr->v_samp_factor = itemp;
+  }
+
+  /* Transpose quantization tables */
+  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+    qtblptr = dstinfo->quant_tbl_ptrs[tblno];
+    if (qtblptr != NULL) {
+      for (i = 0; i < DCTSIZE; i++) {
+        for (j = 0; j < i; j++) {
+          qtemp = qtblptr->quantval[i * DCTSIZE + j];
+          qtblptr->quantval[i * DCTSIZE + j] =
+            qtblptr->quantval[j * DCTSIZE + i];
+          qtblptr->quantval[j * DCTSIZE + i] = qtemp;
+        }
+      }
+    }
+  }
+}
+
+
+/* Adjust Exif image parameters.
+ *
+ * We try to adjust the Tags ExifImageWidth and ExifImageHeight if possible.
+ */
+
+LOCAL(void)
+adjust_exif_parameters(JOCTET *data, unsigned int length, JDIMENSION new_width,
+                       JDIMENSION new_height)
+{
+  boolean is_motorola; /* Flag for byte order */
+  unsigned int number_of_tags, tagnum;
+  unsigned int firstoffset, offset;
+  JDIMENSION new_value;
+
+  if (length < 12) return; /* Length of an IFD entry */
+
+  /* Discover byte order */
+  if (data[0] == 0x49 && data[1] == 0x49)
+    is_motorola = FALSE;
+  else if (data[0] == 0x4D && data[1] == 0x4D)
+    is_motorola = TRUE;
+  else
+    return;
+
+  /* Check Tag Mark */
+  if (is_motorola) {
+    if (data[2] != 0) return;
+    if (data[3] != 0x2A) return;
+  } else {
+    if (data[3] != 0) return;
+    if (data[2] != 0x2A) return;
+  }
+
+  /* Get first IFD offset (offset to IFD0) */
+  if (is_motorola) {
+    if (data[4] != 0) return;
+    if (data[5] != 0) return;
+    firstoffset = data[6];
+    firstoffset <<= 8;
+    firstoffset += data[7];
+  } else {
+    if (data[7] != 0) return;
+    if (data[6] != 0) return;
+    firstoffset = data[5];
+    firstoffset <<= 8;
+    firstoffset += data[4];
+  }
+  if (firstoffset > length - 2) return; /* check end of data segment */
+
+  /* Get the number of directory entries contained in this IFD */
+  if (is_motorola) {
+    number_of_tags = data[firstoffset];
+    number_of_tags <<= 8;
+    number_of_tags += data[firstoffset + 1];
+  } else {
+    number_of_tags = data[firstoffset + 1];
+    number_of_tags <<= 8;
+    number_of_tags += data[firstoffset];
+  }
+  if (number_of_tags == 0) return;
+  firstoffset += 2;
+
+  /* Search for ExifSubIFD offset Tag in IFD0 */
+  for (;;) {
+    if (firstoffset > length - 12) return; /* check end of data segment */
+    /* Get Tag number */
+    if (is_motorola) {
+      tagnum = data[firstoffset];
+      tagnum <<= 8;
+      tagnum += data[firstoffset + 1];
+    } else {
+      tagnum = data[firstoffset + 1];
+      tagnum <<= 8;
+      tagnum += data[firstoffset];
+    }
+    if (tagnum == 0x8769) break; /* found ExifSubIFD offset Tag */
+    if (--number_of_tags == 0) return;
+    firstoffset += 12;
+  }
+
+  /* Get the ExifSubIFD offset */
+  if (is_motorola) {
+    if (data[firstoffset + 8] != 0) return;
+    if (data[firstoffset + 9] != 0) return;
+    offset = data[firstoffset + 10];
+    offset <<= 8;
+    offset += data[firstoffset + 11];
+  } else {
+    if (data[firstoffset + 11] != 0) return;
+    if (data[firstoffset + 10] != 0) return;
+    offset = data[firstoffset + 9];
+    offset <<= 8;
+    offset += data[firstoffset + 8];
+  }
+  if (offset > length - 2) return; /* check end of data segment */
+
+  /* Get the number of directory entries contained in this SubIFD */
+  if (is_motorola) {
+    number_of_tags = data[offset];
+    number_of_tags <<= 8;
+    number_of_tags += data[offset + 1];
+  } else {
+    number_of_tags = data[offset + 1];
+    number_of_tags <<= 8;
+    number_of_tags += data[offset];
+  }
+  if (number_of_tags < 2) return;
+  offset += 2;
+
+  /* Search for ExifImageWidth and ExifImageHeight Tags in this SubIFD */
+  do {
+    if (offset > length - 12) return; /* check end of data segment */
+    /* Get Tag number */
+    if (is_motorola) {
+      tagnum = data[offset];
+      tagnum <<= 8;
+      tagnum += data[offset + 1];
+    } else {
+      tagnum = data[offset + 1];
+      tagnum <<= 8;
+      tagnum += data[offset];
+    }
+    if (tagnum == 0xA002 || tagnum == 0xA003) {
+      if (tagnum == 0xA002)
+        new_value = new_width; /* ExifImageWidth Tag */
+      else
+        new_value = new_height; /* ExifImageHeight Tag */
+      if (is_motorola) {
+        data[offset + 2] = 0; /* Format = unsigned long (4 octets) */
+        data[offset + 3] = 4;
+        data[offset + 4] = 0; /* Number Of Components = 1 */
+        data[offset + 5] = 0;
+        data[offset + 6] = 0;
+        data[offset + 7] = 1;
+        data[offset + 8] = 0;
+        data[offset + 9] = 0;
+        data[offset + 10] = (JOCTET)((new_value >> 8) & 0xFF);
+        data[offset + 11] = (JOCTET)(new_value & 0xFF);
+      } else {
+        data[offset + 2] = 4; /* Format = unsigned long (4 octets) */
+        data[offset + 3] = 0;
+        data[offset + 4] = 1; /* Number Of Components = 1 */
+        data[offset + 5] = 0;
+        data[offset + 6] = 0;
+        data[offset + 7] = 0;
+        data[offset + 8] = (JOCTET)(new_value & 0xFF);
+        data[offset + 9] = (JOCTET)((new_value >> 8) & 0xFF);
+        data[offset + 10] = 0;
+        data[offset + 11] = 0;
+      }
+    }
+    offset += 12;
+  } while (--number_of_tags);
+}
+
+
+/* Adjust output image parameters as needed.
+ *
+ * This must be called after jpeg_copy_critical_parameters()
+ * and before jpeg_write_coefficients().
+ *
+ * The return value is the set of virtual coefficient arrays to be written
+ * (either the ones allocated by jtransform_request_workspace, or the
+ * original source data arrays).  The caller will need to pass this value
+ * to jpeg_write_coefficients().
+ */
+
+GLOBAL(jvirt_barray_ptr *)
+jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                             jvirt_barray_ptr *src_coef_arrays,
+                             jpeg_transform_info *info)
+{
+  /* If force-to-grayscale is requested, adjust destination parameters */
+  if (info->force_grayscale) {
+    /* First, ensure we have YCbCr or grayscale data, and that the source's
+     * Y channel is full resolution.  (No reasonable person would make Y
+     * be less than full resolution, so actually coping with that case
+     * isn't worth extra code space.  But we check it to avoid crashing.)
+     */
+    if (((dstinfo->jpeg_color_space == JCS_YCbCr &&
+          dstinfo->num_components == 3) ||
+         (dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
+          dstinfo->num_components == 1)) &&
+        srcinfo->comp_info[0].h_samp_factor == srcinfo->max_h_samp_factor &&
+        srcinfo->comp_info[0].v_samp_factor == srcinfo->max_v_samp_factor) {
+      /* We use jpeg_set_colorspace to make sure subsidiary settings get fixed
+       * properly.  Among other things, it sets the target h_samp_factor &
+       * v_samp_factor to 1, which typically won't match the source.
+       * We have to preserve the source's quantization table number, however.
+       */
+      int sv_quant_tbl_no = dstinfo->comp_info[0].quant_tbl_no;
+      jpeg_set_colorspace(dstinfo, JCS_GRAYSCALE);
+      dstinfo->comp_info[0].quant_tbl_no = sv_quant_tbl_no;
+    } else {
+      /* Sorry, can't do it */
+      ERREXIT(dstinfo, JERR_CONVERSION_NOTIMPL);
+    }
+  } else if (info->num_components == 1) {
+    /* For a single-component source, we force the destination sampling factors
+     * to 1x1, with or without force_grayscale.  This is useful because some
+     * decoders choke on grayscale images with other sampling factors.
+     */
+    dstinfo->comp_info[0].h_samp_factor = 1;
+    dstinfo->comp_info[0].v_samp_factor = 1;
+  }
+
+  /* Correct the destination's image dimensions as necessary
+   * for rotate/flip, resize, and crop operations.
+   */
+#if JPEG_LIB_VERSION >= 80
+  dstinfo->jpeg_width = info->output_width;
+  dstinfo->jpeg_height = info->output_height;
+#endif
+
+  /* Transpose destination image parameters, adjust quantization */
+  switch (info->transform) {
+  case JXFORM_TRANSPOSE:
+  case JXFORM_TRANSVERSE:
+  case JXFORM_ROT_90:
+  case JXFORM_ROT_270:
+#if JPEG_LIB_VERSION < 80
+    dstinfo->image_width = info->output_height;
+    dstinfo->image_height = info->output_width;
+#endif
+    transpose_critical_parameters(dstinfo);
+    break;
+  case JXFORM_DROP:
+    if (info->drop_width != 0 && info->drop_height != 0)
+      adjust_quant(srcinfo, src_coef_arrays,
+                   info->drop_ptr, info->drop_coef_arrays,
+                   info->trim, dstinfo);
+    break;
+  default:
+#if JPEG_LIB_VERSION < 80
+    dstinfo->image_width = info->output_width;
+    dstinfo->image_height = info->output_height;
+#endif
+    break;
+  }
+
+  /* Adjust Exif properties */
+  if (srcinfo->marker_list != NULL &&
+      srcinfo->marker_list->marker == JPEG_APP0 + 1 &&
+      srcinfo->marker_list->data_length >= 6 &&
+      srcinfo->marker_list->data[0] == 0x45 &&
+      srcinfo->marker_list->data[1] == 0x78 &&
+      srcinfo->marker_list->data[2] == 0x69 &&
+      srcinfo->marker_list->data[3] == 0x66 &&
+      srcinfo->marker_list->data[4] == 0 &&
+      srcinfo->marker_list->data[5] == 0) {
+    /* Suppress output of JFIF marker */
+    dstinfo->write_JFIF_header = FALSE;
+    /* Adjust Exif image parameters */
+#if JPEG_LIB_VERSION >= 80
+    if (dstinfo->jpeg_width != srcinfo->image_width ||
+        dstinfo->jpeg_height != srcinfo->image_height)
+      /* Align data segment to start of TIFF structure for parsing */
+      adjust_exif_parameters(srcinfo->marker_list->data + 6,
+                             srcinfo->marker_list->data_length - 6,
+                             dstinfo->jpeg_width, dstinfo->jpeg_height);
+#else
+    if (dstinfo->image_width != srcinfo->image_width ||
+        dstinfo->image_height != srcinfo->image_height)
+      /* Align data segment to start of TIFF structure for parsing */
+      adjust_exif_parameters(srcinfo->marker_list->data + 6,
+                             srcinfo->marker_list->data_length - 6,
+                             dstinfo->image_width, dstinfo->image_height);
+#endif
+  }
+
+  /* Return the appropriate output data set */
+  if (info->workspace_coef_arrays != NULL)
+    return info->workspace_coef_arrays;
+  return src_coef_arrays;
+}
+
+
+/* Execute the actual transformation, if any.
+ *
+ * This must be called *after* jpeg_write_coefficients, because it depends
+ * on jpeg_write_coefficients to have computed subsidiary values such as
+ * the per-component width and height fields in the destination object.
+ *
+ * Note that some transformations will modify the source data arrays!
+ */
+
+GLOBAL(void)
+jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                             jvirt_barray_ptr *src_coef_arrays,
+                             jpeg_transform_info *info)
+{
+  jvirt_barray_ptr *dst_coef_arrays = info->workspace_coef_arrays;
+
+  /* Note: conditions tested here should match those in switch statement
+   * in jtransform_request_workspace()
+   */
+  switch (info->transform) {
+  case JXFORM_NONE:
+    if (info->output_width > srcinfo->output_width ||
+        info->output_height > srcinfo->output_height) {
+      if (info->output_width > srcinfo->output_width &&
+          info->crop_width_set == JCROP_REFLECT)
+        do_crop_ext_reflect(srcinfo, dstinfo,
+                            info->x_crop_offset, info->y_crop_offset,
+                            src_coef_arrays, dst_coef_arrays);
+      else if (info->output_width > srcinfo->output_width &&
+               info->crop_width_set == JCROP_FORCE)
+        do_crop_ext_flat(srcinfo, dstinfo,
+                         info->x_crop_offset, info->y_crop_offset,
+                         src_coef_arrays, dst_coef_arrays);
+      else
+        do_crop_ext_zero(srcinfo, dstinfo,
+                         info->x_crop_offset, info->y_crop_offset,
+                         src_coef_arrays, dst_coef_arrays);
+    } else if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
+      do_crop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_FLIP_H:
+    if (info->y_crop_offset != 0 || info->slow_hflip)
+      do_flip_h(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+                src_coef_arrays, dst_coef_arrays);
+    else
+      do_flip_h_no_crop(srcinfo, dstinfo, info->x_crop_offset,
+                        src_coef_arrays);
+    break;
+  case JXFORM_FLIP_V:
+    do_flip_v(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_TRANSPOSE:
+    do_transpose(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+                 src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_TRANSVERSE:
+    do_transverse(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+                  src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_ROT_90:
+    do_rot_90(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_ROT_180:
+    do_rot_180(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+               src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_ROT_270:
+    do_rot_270(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+               src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_WIPE:
+    if (info->crop_width_set == JCROP_REFLECT &&
+        info->y_crop_offset == 0 && info->drop_height ==
+        (JDIMENSION)jdiv_round_up
+          ((long)info->output_height, (long)info->iMCU_sample_height) &&
+        (info->x_crop_offset == 0 ||
+         info->x_crop_offset + info->drop_width ==
+         (JDIMENSION)jdiv_round_up
+           ((long)info->output_width, (long)info->iMCU_sample_width)))
+      do_reflect(srcinfo, dstinfo, info->x_crop_offset,
+                 src_coef_arrays, info->drop_width, info->drop_height);
+    else if (info->crop_width_set == JCROP_FORCE)
+      do_flatten(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+                 src_coef_arrays, info->drop_width, info->drop_height);
+    else
+      do_wipe(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, info->drop_width, info->drop_height);
+    break;
+  case JXFORM_DROP:
+    if (info->drop_width != 0 && info->drop_height != 0)
+      do_drop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, info->drop_ptr, info->drop_coef_arrays,
+              info->drop_width, info->drop_height);
+    break;
+  }
+}
+
+/* jtransform_perfect_transform
+ *
+ * Determine whether lossless transformation is perfectly
+ * possible for a specified image and transformation.
+ *
+ * Inputs:
+ *   image_width, image_height: source image dimensions.
+ *   MCU_width, MCU_height: pixel dimensions of MCU.
+ *   transform: transformation identifier.
+ * Parameter sources from initialized jpeg_struct
+ * (after reading source header):
+ *   image_width = cinfo.image_width
+ *   image_height = cinfo.image_height
+ *   MCU_width = cinfo.max_h_samp_factor * cinfo.block_size
+ *   MCU_height = cinfo.max_v_samp_factor * cinfo.block_size
+ * Result:
+ *   TRUE = perfect transformation possible
+ *   FALSE = perfect transformation not possible
+ *           (may use custom action then)
+ */
+
+GLOBAL(boolean)
+jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
+                             int MCU_width, int MCU_height,
+                             JXFORM_CODE transform)
+{
+  boolean result = TRUE; /* initialize TRUE */
+
+  switch (transform) {
+  case JXFORM_FLIP_H:
+  case JXFORM_ROT_270:
+    if (image_width % (JDIMENSION)MCU_width)
+      result = FALSE;
+    break;
+  case JXFORM_FLIP_V:
+  case JXFORM_ROT_90:
+    if (image_height % (JDIMENSION)MCU_height)
+      result = FALSE;
+    break;
+  case JXFORM_TRANSVERSE:
+  case JXFORM_ROT_180:
+    if (image_width % (JDIMENSION)MCU_width)
+      result = FALSE;
+    if (image_height % (JDIMENSION)MCU_height)
+      result = FALSE;
+    break;
+  default:
+    break;
+  }
+
+  return result;
+}
+
+#endif /* TRANSFORMS_SUPPORTED */
+
+
+/* Setup decompression object to save desired markers in memory.
+ * This must be called before jpeg_read_header() to have the desired effect.
+ */
+
+GLOBAL(void)
+jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option)
+{
+#ifdef SAVE_MARKERS_SUPPORTED
+  int m;
+
+  /* Save comments except under NONE option */
+  if (option != JCOPYOPT_NONE && option != JCOPYOPT_ICC) {
+    jpeg_save_markers(srcinfo, JPEG_COM, 0xFFFF);
+  }
+  /* Save all types of APPn markers iff ALL option */
+  if (option == JCOPYOPT_ALL || option == JCOPYOPT_ALL_EXCEPT_ICC) {
+    for (m = 0; m < 16; m++) {
+      if (option == JCOPYOPT_ALL_EXCEPT_ICC && m == 2)
+        continue;
+      jpeg_save_markers(srcinfo, JPEG_APP0 + m, 0xFFFF);
+    }
+  }
+  /* Save only APP2 markers if ICC option selected */
+  if (option == JCOPYOPT_ICC) {
+    jpeg_save_markers(srcinfo, JPEG_APP0 + 2, 0xFFFF);
+  }
+#endif /* SAVE_MARKERS_SUPPORTED */
+}
+
+/* Copy markers saved in the given source object to the destination object.
+ * This should be called just after jpeg_start_compress() or
+ * jpeg_write_coefficients().
+ * Note that those routines will have written the SOI, and also the
+ * JFIF APP0 or Adobe APP14 markers if selected.
+ */
+
+GLOBAL(void)
+jcopy_markers_execute(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                      JCOPY_OPTION option)
+{
+  jpeg_saved_marker_ptr marker;
+
+  /* In the current implementation, we don't actually need to examine the
+   * option flag here; we just copy everything that got saved.
+   * But to avoid confusion, we do not output JFIF and Adobe APP14 markers
+   * if the encoder library already wrote one.
+   */
+  for (marker = srcinfo->marker_list; marker != NULL; marker = marker->next) {
+    if (dstinfo->write_JFIF_header &&
+        marker->marker == JPEG_APP0 &&
+        marker->data_length >= 5 &&
+        marker->data[0] == 0x4A &&
+        marker->data[1] == 0x46 &&
+        marker->data[2] == 0x49 &&
+        marker->data[3] == 0x46 &&
+        marker->data[4] == 0)
+      continue;                 /* reject duplicate JFIF */
+    if (dstinfo->write_Adobe_marker &&
+        marker->marker == JPEG_APP0 + 14 &&
+        marker->data_length >= 5 &&
+        marker->data[0] == 0x41 &&
+        marker->data[1] == 0x64 &&
+        marker->data[2] == 0x6F &&
+        marker->data[3] == 0x62 &&
+        marker->data[4] == 0x65)
+      continue;                 /* reject duplicate Adobe */
+    jpeg_write_marker(dstinfo, marker->marker,
+                      marker->data, marker->data_length);
+  }
+}
diff --git a/3rdparty/libjpeg-turbo/src/transupp.h b/3rdparty/libjpeg-turbo/src/transupp.h
new file mode 100644
index 0000000000..cea1f40921
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/transupp.h
@@ -0,0 +1,231 @@
+/*
+ * transupp.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1997-2019, Thomas G. Lane, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2017, 2021, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains declarations for image transformation routines and
+ * other utility code used by the jpegtran sample application.  These are
+ * NOT part of the core JPEG library.  But we keep these routines separate
+ * from jpegtran.c to ease the task of maintaining jpegtran-like programs
+ * that have other user interfaces.
+ *
+ * NOTE: all the routines declared here have very specific requirements
+ * about when they are to be executed during the reading and writing of the
+ * source and destination files.  See the comments in transupp.c, or see
+ * jpegtran.c for an example of correct usage.
+ */
+
+/* If you happen not to want the image transform support, disable it here */
+#ifndef TRANSFORMS_SUPPORTED
+#define TRANSFORMS_SUPPORTED  1         /* 0 disables transform code */
+#endif
+
+/*
+ * Although rotating and flipping data expressed as DCT coefficients is not
+ * hard, there is an asymmetry in the JPEG format specification for images
+ * whose dimensions aren't multiples of the iMCU size.  The right and bottom
+ * image edges are padded out to the next iMCU boundary with junk data; but
+ * no padding is possible at the top and left edges.  If we were to flip
+ * the whole image including the pad data, then pad garbage would become
+ * visible at the top and/or left, and real pixels would disappear into the
+ * pad margins --- perhaps permanently, since encoders & decoders may not
+ * bother to preserve DCT blocks that appear to be completely outside the
+ * nominal image area.  So, we have to exclude any partial iMCUs from the
+ * basic transformation.
+ *
+ * Transpose is the only transformation that can handle partial iMCUs at the
+ * right and bottom edges completely cleanly.  flip_h can flip partial iMCUs
+ * at the bottom, but leaves any partial iMCUs at the right edge untouched.
+ * Similarly flip_v leaves any partial iMCUs at the bottom edge untouched.
+ * The other transforms are defined as combinations of these basic transforms
+ * and process edge blocks in a way that preserves the equivalence.
+ *
+ * The "trim" option causes untransformable partial iMCUs to be dropped;
+ * this is not strictly lossless, but it usually gives the best-looking
+ * result for odd-size images.  Note that when this option is active,
+ * the expected mathematical equivalences between the transforms may not hold.
+ * (For example, -rot 270 -trim trims only the bottom edge, but -rot 90 -trim
+ * followed by -rot 180 -trim trims both edges.)
+ *
+ * We also offer a lossless-crop option, which discards data outside a given
+ * image region but losslessly preserves what is inside.  Like the rotate and
+ * flip transforms, lossless crop is restricted by the JPEG format: the upper
+ * left corner of the selected region must fall on an iMCU boundary.  If this
+ * does not hold for the given crop parameters, we silently move the upper left
+ * corner up and/or left to make it so, simultaneously increasing the region
+ * dimensions to keep the lower right crop corner unchanged.  (Thus, the
+ * output image covers at least the requested region, but may cover more.)
+ * The adjustment of the region dimensions may be optionally disabled.
+ *
+ * A complementary lossless wipe option is provided to discard (gray out) data
+ * inside a given image region while losslessly preserving what is outside.
+ * A lossless drop option is also provided, which allows another JPEG image to
+ * be inserted ("dropped") into the source image data at a given position,
+ * replacing the existing image data at that position.  Both the source image
+ * and the drop image must have the same subsampling level.  It is best if they
+ * also have the same quantization (quality.)  Otherwise, the quantization of
+ * the output image will be adapted to accommodate the higher of the source
+ * image quality and the drop image quality.  The trim option can be used with
+ * the drop option to requantize the drop image to match the source image.
+ *
+ * We also provide a lossless-resize option, which is kind of a lossless-crop
+ * operation in the DCT coefficient block domain - it discards higher-order
+ * coefficients and losslessly preserves lower-order coefficients of a
+ * sub-block.
+ *
+ * Rotate/flip transform, resize, and crop can be requested together in a
+ * single invocation.  The crop is applied last --- that is, the crop region
+ * is specified in terms of the destination image after transform/resize.
+ *
+ * We also offer a "force to grayscale" option, which simply discards the
+ * chrominance channels of a YCbCr image.  This is lossless in the sense that
+ * the luminance channel is preserved exactly.  It's not the same kind of
+ * thing as the rotate/flip transformations, but it's convenient to handle it
+ * as part of this package, mainly because the transformation routines have to
+ * be aware of the option to know how many components to work on.
+ */
+
+
+/*
+ * Codes for supported types of image transformations.
+ */
+
+typedef enum {
+  JXFORM_NONE,            /* no transformation */
+  JXFORM_FLIP_H,          /* horizontal flip */
+  JXFORM_FLIP_V,          /* vertical flip */
+  JXFORM_TRANSPOSE,       /* transpose across UL-to-LR axis */
+  JXFORM_TRANSVERSE,      /* transpose across UR-to-LL axis */
+  JXFORM_ROT_90,          /* 90-degree clockwise rotation */
+  JXFORM_ROT_180,         /* 180-degree rotation */
+  JXFORM_ROT_270,         /* 270-degree clockwise (or 90 ccw) */
+  JXFORM_WIPE,            /* wipe */
+  JXFORM_DROP             /* drop */
+} JXFORM_CODE;
+
+/*
+ * Codes for crop parameters, which can individually be unspecified,
+ * positive or negative for xoffset or yoffset,
+ * positive or force or reflect for width or height.
+ */
+
+typedef enum {
+  JCROP_UNSET,
+  JCROP_POS,
+  JCROP_NEG,
+  JCROP_FORCE,
+  JCROP_REFLECT
+} JCROP_CODE;
+
+/*
+ * Transform parameters struct.
+ * NB: application must not change any elements of this struct after
+ * calling jtransform_request_workspace.
+ */
+
+typedef struct {
+  /* Options: set by caller */
+  JXFORM_CODE transform;        /* image transform operator */
+  boolean perfect;              /* if TRUE, fail if partial MCUs are requested */
+  boolean trim;                 /* if TRUE, trim partial MCUs as needed */
+  boolean force_grayscale;      /* if TRUE, convert color image to grayscale */
+  boolean crop;                 /* if TRUE, crop or wipe source image, or drop */
+  boolean slow_hflip;  /* For best performance, the JXFORM_FLIP_H transform
+                          normally modifies the source coefficients in place.
+                          Setting this to TRUE will instead use a slower,
+                          double-buffered algorithm, which leaves the source
+                          coefficients in tact (necessary if other transformed
+                          images must be generated from the same set of
+                          coefficients. */
+
+  /* Crop parameters: application need not set these unless crop is TRUE.
+   * These can be filled in by jtransform_parse_crop_spec().
+   */
+  JDIMENSION crop_width;        /* Width of selected region */
+  JCROP_CODE crop_width_set;    /* (force-disables adjustment) */
+  JDIMENSION crop_height;       /* Height of selected region */
+  JCROP_CODE crop_height_set;   /* (force-disables adjustment) */
+  JDIMENSION crop_xoffset;      /* X offset of selected region */
+  JCROP_CODE crop_xoffset_set;  /* (negative measures from right edge) */
+  JDIMENSION crop_yoffset;      /* Y offset of selected region */
+  JCROP_CODE crop_yoffset_set;  /* (negative measures from bottom edge) */
+
+  /* Drop parameters: set by caller for drop request */
+  j_decompress_ptr drop_ptr;
+  jvirt_barray_ptr *drop_coef_arrays;
+
+  /* Internal workspace: caller should not touch these */
+  int num_components;           /* # of components in workspace */
+  jvirt_barray_ptr *workspace_coef_arrays; /* workspace for transformations */
+  JDIMENSION output_width;      /* cropped destination dimensions */
+  JDIMENSION output_height;
+  JDIMENSION x_crop_offset;     /* destination crop offsets measured in iMCUs */
+  JDIMENSION y_crop_offset;
+  JDIMENSION drop_width;        /* drop/wipe dimensions measured in iMCUs */
+  JDIMENSION drop_height;
+  int iMCU_sample_width;        /* destination iMCU size */
+  int iMCU_sample_height;
+} jpeg_transform_info;
+
+
+#if TRANSFORMS_SUPPORTED
+
+/* Parse a crop specification (written in X11 geometry style) */
+EXTERN(boolean) jtransform_parse_crop_spec(jpeg_transform_info *info,
+                                           const char *spec);
+/* Request any required workspace */
+EXTERN(boolean) jtransform_request_workspace(j_decompress_ptr srcinfo,
+                                             jpeg_transform_info *info);
+/* Adjust output image parameters */
+EXTERN(jvirt_barray_ptr *) jtransform_adjust_parameters
+  (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+   jvirt_barray_ptr *src_coef_arrays, jpeg_transform_info *info);
+/* Execute the actual transformation, if any */
+EXTERN(void) jtransform_execute_transform(j_decompress_ptr srcinfo,
+                                          j_compress_ptr dstinfo,
+                                          jvirt_barray_ptr *src_coef_arrays,
+                                          jpeg_transform_info *info);
+/* Determine whether lossless transformation is perfectly
+ * possible for a specified image and transformation.
+ */
+EXTERN(boolean) jtransform_perfect_transform(JDIMENSION image_width,
+                                             JDIMENSION image_height,
+                                             int MCU_width, int MCU_height,
+                                             JXFORM_CODE transform);
+
+/* jtransform_execute_transform used to be called
+ * jtransform_execute_transformation, but some compilers complain about
+ * routine names that long.  This macro is here to avoid breaking any
+ * old source code that uses the original name...
+ */
+#define jtransform_execute_transformation       jtransform_execute_transform
+
+#endif /* TRANSFORMS_SUPPORTED */
+
+
+/*
+ * Support for copying optional markers from source to destination file.
+ */
+
+typedef enum {
+  JCOPYOPT_NONE,           /* copy no optional markers */
+  JCOPYOPT_COMMENTS,       /* copy only comment (COM) markers */
+  JCOPYOPT_ALL,            /* copy all optional markers */
+  JCOPYOPT_ALL_EXCEPT_ICC, /* copy all optional markers except APP2 */
+  JCOPYOPT_ICC             /* copy only ICC profile (APP2) markers */
+} JCOPY_OPTION;
+
+#define JCOPYOPT_DEFAULT  JCOPYOPT_COMMENTS     /* recommended default */
+
+/* Setup decompression object to save desired markers in memory */
+EXTERN(void) jcopy_markers_setup(j_decompress_ptr srcinfo,
+                                 JCOPY_OPTION option);
+/* Copy markers saved in the given source object to the destination object */
+EXTERN(void) jcopy_markers_execute(j_decompress_ptr srcinfo,
+                                   j_compress_ptr dstinfo,
+                                   JCOPY_OPTION option);
diff --git a/3rdparty/libjpeg-turbo/src/turbojpeg-jni.c b/3rdparty/libjpeg-turbo/src/turbojpeg-jni.c
new file mode 100644
index 0000000000..32186f3fa0
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/turbojpeg-jni.c
@@ -0,0 +1,1400 @@
+/*
+ * Copyright (C)2011-2023 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <limits.h>
+#include "turbojpeg.h"
+#include "jinclude.h"
+#include <jni.h>
+#include "java/org_libjpegturbo_turbojpeg_TJCompressor.h"
+#include "java/org_libjpegturbo_turbojpeg_TJDecompressor.h"
+#include "java/org_libjpegturbo_turbojpeg_TJTransformer.h"
+#include "java/org_libjpegturbo_turbojpeg_TJ.h"
+
+#define BAILIF0(f) { \
+  if (!(f) || (*env)->ExceptionCheck(env)) { \
+    goto bailout; \
+  } \
+}
+
+#define BAILIF0NOEC(f) { \
+  if (!(f)) { \
+    goto bailout; \
+  } \
+}
+
+#define THROW(msg, exceptionClass) { \
+  jclass _exccls = (*env)->FindClass(env, exceptionClass); \
+  \
+  BAILIF0(_exccls); \
+  (*env)->ThrowNew(env, _exccls, msg); \
+  goto bailout; \
+}
+
+#define THROW_TJ() { \
+  jclass _exccls; \
+  jmethodID _excid; \
+  jobject _excobj; \
+  jstring _errstr; \
+  \
+  BAILIF0(_errstr = (*env)->NewStringUTF(env, tj3GetErrorStr(handle))); \
+  BAILIF0(_exccls = (*env)->FindClass(env, \
+    "org/libjpegturbo/turbojpeg/TJException")); \
+  BAILIF0(_excid = (*env)->GetMethodID(env, _exccls, "<init>", \
+                                       "(Ljava/lang/String;I)V")); \
+  BAILIF0(_excobj = (*env)->NewObject(env, _exccls, _excid, _errstr, \
+                                      tj3GetErrorCode(handle))); \
+  (*env)->Throw(env, _excobj); \
+  goto bailout; \
+}
+
+#define THROW_ARG(msg)  THROW(msg, "java/lang/IllegalArgumentException")
+
+#define THROW_MEM() \
+  THROW("Memory allocation failure", "java/lang/OutOfMemoryError");
+
+#define GET_HANDLE() \
+  jclass _cls = (*env)->GetObjectClass(env, obj); \
+  jfieldID _fid; \
+  \
+  BAILIF0(_cls); \
+  BAILIF0(_fid = (*env)->GetFieldID(env, _cls, "handle", "J")); \
+  handle = (tjhandle)(size_t)(*env)->GetLongField(env, obj, _fid);
+
+#define SAFE_RELEASE(javaArray, cArray) { \
+  if (javaArray && cArray) \
+    (*env)->ReleasePrimitiveArrayCritical(env, javaArray, (void *)cArray, 0); \
+  cArray = NULL; \
+}
+
+/* TurboJPEG 1.2.x: TJ::bufSize() */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSize
+  (JNIEnv *env, jclass cls, jint width, jint height, jint jpegSubsamp)
+{
+  size_t retval = tj3JPEGBufSize(width, height, jpegSubsamp);
+
+  if (retval == 0) THROW_ARG(tj3GetErrorStr(NULL));
+  if (retval > (size_t)INT_MAX)
+    THROW_ARG("Image is too large");
+
+bailout:
+  return (jint)retval;
+}
+
+/* TurboJPEG 1.4.x: TJ::bufSizeYUV() */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII
+  (JNIEnv *env, jclass cls, jint width, jint align, jint height, jint subsamp)
+{
+  size_t retval = tj3YUVBufSize(width, align, height, subsamp);
+
+  if (retval == 0) THROW_ARG(tj3GetErrorStr(NULL));
+  if (retval > (size_t)INT_MAX)
+    THROW_ARG("Image is too large");
+
+bailout:
+  return (jint)retval;
+}
+
+/* TurboJPEG 1.4.x: TJ::planeSizeYUV() */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeSizeYUV__IIIII
+  (JNIEnv *env, jclass cls, jint componentID, jint width, jint stride,
+   jint height, jint subsamp)
+{
+  size_t retval = tj3YUVPlaneSize(componentID, width, stride, height, subsamp);
+
+  if (retval == 0) THROW_ARG(tj3GetErrorStr(NULL));
+  if (retval > (size_t)INT_MAX)
+    THROW_ARG("Image is too large");
+
+bailout:
+  return (jint)retval;
+}
+
+/* TurboJPEG 1.4.x: TJ::planeWidth() */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeWidth__III
+  (JNIEnv *env, jclass cls, jint componentID, jint width, jint subsamp)
+{
+  jint retval = (jint)tj3YUVPlaneWidth(componentID, width, subsamp);
+
+  if (retval == 0) THROW_ARG(tj3GetErrorStr(NULL));
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.4.x: TJ::planeHeight() */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeHeight__III
+  (JNIEnv *env, jclass cls, jint componentID, jint height, jint subsamp)
+{
+  jint retval = (jint)tj3YUVPlaneHeight(componentID, height, subsamp);
+
+  if (retval == 0) THROW_ARG(tj3GetErrorStr(NULL));
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.2.x: TJCompressor::init() */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_init
+  (JNIEnv *env, jobject obj)
+{
+  jclass cls;
+  jfieldID fid;
+  tjhandle handle;
+
+  if ((handle = tj3Init(TJINIT_COMPRESS)) == NULL)
+    THROW(tj3GetErrorStr(NULL), "org/libjpegturbo/turbojpeg/TJException");
+
+  BAILIF0(cls = (*env)->GetObjectClass(env, obj));
+  BAILIF0(fid = (*env)->GetFieldID(env, cls, "handle", "J"));
+  (*env)->SetLongField(env, obj, fid, (size_t)handle);
+
+bailout:
+  return;
+}
+
+/* TurboJPEG 3: TJCompressor::set() */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_set
+  (JNIEnv *env, jobject obj, jint param, jint value)
+{
+  tjhandle handle = 0;
+
+  GET_HANDLE();
+
+  if (tj3Set(handle, param, value) == -1)
+    THROW_TJ();
+
+bailout:
+  return;
+}
+
+/* TurboJPEG 3: TJCompressor::get() */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_get
+  (JNIEnv *env, jobject obj, jint param)
+{
+  tjhandle handle = 0;
+
+  GET_HANDLE();
+
+  return tj3Get(handle, param);
+
+bailout:
+  return -1;
+}
+
+static jint TJCompressor_compress
+  (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint precision,
+   jint x, jint y, jint width, jint pitch, jint height, jint pf,
+   jbyteArray dst)
+{
+  tjhandle handle = 0;
+  size_t jpegSize = 0;
+  jsize arraySize = 0, actualPitch;
+  void *srcBuf = NULL;
+  unsigned char *jpegBuf = NULL;
+  int jpegSubsamp;
+
+  GET_HANDLE();
+
+  if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF || width < 1 ||
+      height < 1 || pitch < 0)
+    THROW_ARG("Invalid argument in compress*()");
+  if (org_libjpegturbo_turbojpeg_TJ_NUMPF != TJ_NUMPF)
+    THROW_ARG("Mismatch between Java and C API");
+
+  actualPitch = (pitch == 0) ? width * tjPixelSize[pf] : pitch;
+  arraySize = (y + height - 1) * actualPitch + (x + width) * tjPixelSize[pf];
+  if ((*env)->GetArrayLength(env, src) * srcElementSize < arraySize)
+    THROW_ARG("Source buffer is not large enough");
+  jpegSubsamp = tj3Get(handle, TJPARAM_SUBSAMP);
+  if (tj3Get(handle, TJPARAM_LOSSLESS) && jpegSubsamp != TJSAMP_GRAY)
+    jpegSubsamp = TJSAMP_444;
+  else if (jpegSubsamp == TJSAMP_UNKNOWN)
+    THROW_ARG("TJPARAM_SUBSAMP must be specified");
+  jpegSize = tj3JPEGBufSize(width, height, jpegSubsamp);
+  if ((*env)->GetArrayLength(env, dst) < (jsize)jpegSize)
+    THROW_ARG("Destination buffer is not large enough");
+
+  if (tj3Set(handle, TJPARAM_NOREALLOC, 1) == -1)
+    THROW_TJ();
+
+  BAILIF0NOEC(srcBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+  if (precision == 8) {
+    if (tj3Compress8(handle, &((unsigned char *)srcBuf)[y * actualPitch +
+                                                        x * tjPixelSize[pf]],
+                     width, pitch, height, pf, &jpegBuf, &jpegSize) == -1) {
+      SAFE_RELEASE(dst, jpegBuf);
+      SAFE_RELEASE(src, srcBuf);
+      THROW_TJ();
+    }
+  } else if (precision == 12) {
+    if (tj3Compress12(handle, &((short *)srcBuf)[y * actualPitch +
+                                                 x * tjPixelSize[pf]],
+                      width, pitch, height, pf, &jpegBuf, &jpegSize) == -1) {
+      SAFE_RELEASE(dst, jpegBuf);
+      SAFE_RELEASE(src, srcBuf);
+      THROW_TJ();
+    }
+  } else {
+    if (tj3Compress16(handle, &((unsigned short *)srcBuf)[y * actualPitch +
+                                                          x * tjPixelSize[pf]],
+                      width, pitch, height, pf, &jpegBuf, &jpegSize) == -1) {
+      SAFE_RELEASE(dst, jpegBuf);
+      SAFE_RELEASE(src, srcBuf);
+      THROW_TJ();
+    }
+  }
+
+bailout:
+  SAFE_RELEASE(dst, jpegBuf);
+  SAFE_RELEASE(src, srcBuf);
+  return (jint)jpegSize;
+}
+
+/* TurboJPEG 3: TJCompressor::compress8() byte source */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress8___3BIIIIII_3B
+  (JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width,
+   jint pitch, jint height, jint pf, jbyteArray dst)
+{
+  return TJCompressor_compress(env, obj, src, 1, 8, x, y, width, pitch, height,
+                               pf, dst);
+}
+
+/* TurboJPEG 3: TJCompressor::compress12() */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress12
+  (JNIEnv *env, jobject obj, jshortArray src, jint x, jint y, jint width,
+   jint pitch, jint height, jint pf, jbyteArray dst)
+{
+  return TJCompressor_compress(env, obj, src, 1, 12, x, y, width, pitch,
+                               height, pf, dst);
+}
+
+/* TurboJPEG 3: TJCompressor::compress16() */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress16
+  (JNIEnv *env, jobject obj, jshortArray src, jint x, jint y, jint width,
+   jint pitch, jint height, jint pf, jbyteArray dst)
+{
+  return TJCompressor_compress(env, obj, src, 1, 16, x, y, width, pitch,
+                               height, pf, dst);
+}
+
+/* TurboJPEG 3: TJCompressor::compress8() int source */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress8___3IIIIIII_3B
+  (JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width,
+   jint stride, jint height, jint pf, jbyteArray dst)
+{
+  if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF)
+    THROW_ARG("Invalid argument in compress8()");
+  if (tjPixelSize[pf] != sizeof(jint))
+    THROW_ARG("Pixel format must be 32-bit when compressing from an integer buffer.");
+
+  return TJCompressor_compress(env, obj, src, sizeof(jint), 8, x, y, width,
+                               stride * sizeof(jint), height, pf, dst);
+
+bailout:
+  return 0;
+}
+
+/* TurboJPEG 3: TJCompressor::compressFromYUV8() */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV8
+  (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets,
+   jint width, jintArray jSrcStrides, jint height, jbyteArray dst)
+{
+  tjhandle handle = 0;
+  size_t jpegSize = 0;
+  jbyteArray jSrcPlanes[3] = { NULL, NULL, NULL };
+  const unsigned char *srcPlanesTmp[3] = { NULL, NULL, NULL };
+  const unsigned char *srcPlanes[3] = { NULL, NULL, NULL };
+  jint srcOffsetsTmp[3] = { 0, 0, 0 }, srcStridesTmp[3] = { 0, 0, 0 };
+  int srcOffsets[3] = { 0, 0, 0 }, srcStrides[3] = { 0, 0, 0 };
+  unsigned char *jpegBuf = NULL;
+  int nc = 0, i, subsamp;
+
+  GET_HANDLE();
+
+  if (org_libjpegturbo_turbojpeg_TJ_NUMSAMP != TJ_NUMSAMP)
+    THROW_ARG("Mismatch between Java and C API");
+
+  if ((subsamp = tj3Get(handle, TJPARAM_SUBSAMP)) == TJSAMP_UNKNOWN)
+    THROW_ARG("TJPARAM_SUBSAMP must be specified");
+  nc = subsamp == TJSAMP_GRAY ? 1 : 3;
+  if ((*env)->GetArrayLength(env, srcobjs) < nc)
+    THROW_ARG("Planes array is too small for the subsampling type");
+  if ((*env)->GetArrayLength(env, jSrcOffsets) < nc)
+    THROW_ARG("Offsets array is too small for the subsampling type");
+  if ((*env)->GetArrayLength(env, jSrcStrides) < nc)
+    THROW_ARG("Strides array is too small for the subsampling type");
+
+  jpegSize = tj3JPEGBufSize(width, height, subsamp);
+  if ((*env)->GetArrayLength(env, dst) < (jsize)jpegSize)
+    THROW_ARG("Destination buffer is not large enough");
+
+  if (tj3Set(handle, TJPARAM_NOREALLOC, 1) == -1)
+    THROW_TJ();
+
+  (*env)->GetIntArrayRegion(env, jSrcOffsets, 0, nc, srcOffsetsTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    srcOffsets[i] = srcOffsetsTmp[i];
+
+  (*env)->GetIntArrayRegion(env, jSrcStrides, 0, nc, srcStridesTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    srcStrides[i] = srcStridesTmp[i];
+
+  for (i = 0; i < nc; i++) {
+    size_t planeSize = tj3YUVPlaneSize(i, width, srcStrides[i], height,
+                                       subsamp);
+    int pw = tj3YUVPlaneWidth(i, width, subsamp);
+
+    if (planeSize == 0 || pw == 0)
+      THROW_ARG(tj3GetErrorStr(NULL));
+
+    if (planeSize > (size_t)INT_MAX)
+      THROW_ARG("Source plane is too large");
+    if (srcOffsets[i] < 0)
+      THROW_ARG("Invalid argument in compressFromYUV8()");
+    if (srcStrides[i] < 0 && srcOffsets[i] - (int)planeSize + pw < 0)
+      THROW_ARG("Negative plane stride would cause memory to be accessed below plane boundary");
+
+    BAILIF0(jSrcPlanes[i] = (*env)->GetObjectArrayElement(env, srcobjs, i));
+    if ((*env)->GetArrayLength(env, jSrcPlanes[i]) <
+        srcOffsets[i] + (int)planeSize)
+      THROW_ARG("Source plane is not large enough");
+  }
+  for (i = 0; i < nc; i++) {
+    BAILIF0NOEC(srcPlanesTmp[i] =
+                (*env)->GetPrimitiveArrayCritical(env, jSrcPlanes[i], 0));
+    srcPlanes[i] = &srcPlanesTmp[i][srcOffsets[i]];
+  }
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+  if (tj3CompressFromYUVPlanes8(handle, srcPlanes, width, srcStrides, height,
+                                &jpegBuf, &jpegSize) == -1) {
+    SAFE_RELEASE(dst, jpegBuf);
+    for (i = 0; i < nc; i++)
+      SAFE_RELEASE(jSrcPlanes[i], srcPlanesTmp[i]);
+    THROW_TJ();
+  }
+
+bailout:
+  SAFE_RELEASE(dst, jpegBuf);
+  for (i = 0; i < nc; i++)
+    SAFE_RELEASE(jSrcPlanes[i], srcPlanesTmp[i]);
+  return (jint)jpegSize;
+}
+
+static void TJCompressor_encodeYUV8
+  (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint x, jint y,
+   jint width, jint pitch, jint height, jint pf, jobjectArray dstobjs,
+   jintArray jDstOffsets, jintArray jDstStrides)
+{
+  tjhandle handle = 0;
+  jsize arraySize = 0, actualPitch;
+  unsigned char *srcBuf = NULL;
+  jbyteArray jDstPlanes[3] = { NULL, NULL, NULL };
+  unsigned char *dstPlanesTmp[3] = { NULL, NULL, NULL };
+  unsigned char *dstPlanes[3] = { NULL, NULL, NULL };
+  jint dstOffsetsTmp[3] = { 0, 0, 0 }, dstStridesTmp[3] = { 0, 0, 0 };
+  int dstOffsets[3] = { 0, 0, 0 }, dstStrides[3] = { 0, 0, 0 };
+  int nc = 0, i, subsamp;
+
+  GET_HANDLE();
+
+  if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF || width < 1 ||
+      height < 1 || pitch < 0)
+    THROW_ARG("Invalid argument in encodeYUV8()");
+  if (org_libjpegturbo_turbojpeg_TJ_NUMPF != TJ_NUMPF ||
+      org_libjpegturbo_turbojpeg_TJ_NUMSAMP != TJ_NUMSAMP)
+    THROW_ARG("Mismatch between Java and C API");
+
+  if ((subsamp = tj3Get(handle, TJPARAM_SUBSAMP)) == TJSAMP_UNKNOWN)
+    THROW_ARG("TJPARAM_SUBSAMP must be specified");
+  nc = subsamp == TJSAMP_GRAY ? 1 : 3;
+  if ((*env)->GetArrayLength(env, dstobjs) < nc)
+    THROW_ARG("Planes array is too small for the subsampling type");
+  if ((*env)->GetArrayLength(env, jDstOffsets) < nc)
+    THROW_ARG("Offsets array is too small for the subsampling type");
+  if ((*env)->GetArrayLength(env, jDstStrides) < nc)
+    THROW_ARG("Strides array is too small for the subsampling type");
+
+  actualPitch = (pitch == 0) ? width * tjPixelSize[pf] : pitch;
+  arraySize = (y + height - 1) * actualPitch + (x + width) * tjPixelSize[pf];
+  if ((*env)->GetArrayLength(env, src) * srcElementSize < arraySize)
+    THROW_ARG("Source buffer is not large enough");
+
+  (*env)->GetIntArrayRegion(env, jDstOffsets, 0, nc, dstOffsetsTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    dstOffsets[i] = dstOffsetsTmp[i];
+
+  (*env)->GetIntArrayRegion(env, jDstStrides, 0, nc, dstStridesTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    dstStrides[i] = dstStridesTmp[i];
+
+  for (i = 0; i < nc; i++) {
+    size_t planeSize = tj3YUVPlaneSize(i, width, dstStrides[i], height,
+                                       subsamp);
+    int pw = tj3YUVPlaneWidth(i, width, subsamp);
+
+    if (planeSize == 0 || pw == 0)
+      THROW_ARG(tj3GetErrorStr(NULL));
+
+    if (planeSize > (size_t)INT_MAX)
+      THROW_ARG("Destination plane is too large");
+    if (dstOffsets[i] < 0)
+      THROW_ARG("Invalid argument in encodeYUV8()");
+    if (dstStrides[i] < 0 && dstOffsets[i] - (int)planeSize + pw < 0)
+      THROW_ARG("Negative plane stride would cause memory to be accessed below plane boundary");
+
+    BAILIF0(jDstPlanes[i] = (*env)->GetObjectArrayElement(env, dstobjs, i));
+    if ((*env)->GetArrayLength(env, jDstPlanes[i]) <
+        dstOffsets[i] + (int)planeSize)
+      THROW_ARG("Destination plane is not large enough");
+  }
+  for (i = 0; i < nc; i++) {
+    BAILIF0NOEC(dstPlanesTmp[i] =
+                (*env)->GetPrimitiveArrayCritical(env, jDstPlanes[i], 0));
+    dstPlanes[i] = &dstPlanesTmp[i][dstOffsets[i]];
+  }
+  BAILIF0NOEC(srcBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
+
+  if (tj3EncodeYUVPlanes8(handle,
+                          &srcBuf[y * actualPitch + x * tjPixelSize[pf]],
+                          width, pitch, height, pf, dstPlanes,
+                          dstStrides) == -1) {
+    SAFE_RELEASE(src, srcBuf);
+    for (i = 0; i < nc; i++)
+      SAFE_RELEASE(jDstPlanes[i], dstPlanesTmp[i]);
+    THROW_TJ();
+  }
+
+bailout:
+  SAFE_RELEASE(src, srcBuf);
+  for (i = 0; i < nc; i++)
+    SAFE_RELEASE(jDstPlanes[i], dstPlanesTmp[i]);
+}
+
+/* TurboJPEG 3: TJCompressor::encodeYUV8() byte source */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV8___3BIIIIII_3_3B_3I_3I
+  (JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width,
+   jint pitch, jint height, jint pf, jobjectArray dstobjs,
+   jintArray jDstOffsets, jintArray jDstStrides)
+{
+  TJCompressor_encodeYUV8(env, obj, src, 1, x, y, width, pitch, height, pf,
+                          dstobjs, jDstOffsets, jDstStrides);
+}
+
+/* TurboJPEG 3: TJCompressor::encodeYUV8() int source */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV8___3IIIIIII_3_3B_3I_3I
+  (JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width,
+   jint stride, jint height, jint pf, jobjectArray dstobjs,
+   jintArray jDstOffsets, jintArray jDstStrides)
+{
+  if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF)
+    THROW_ARG("Invalid argument in encodeYUV8()");
+  if (tjPixelSize[pf] != sizeof(jint))
+    THROW_ARG("Pixel format must be 32-bit when encoding from an integer buffer.");
+
+  TJCompressor_encodeYUV8(env, obj, src, sizeof(jint), x, y, width,
+                          stride * sizeof(jint), height, pf, dstobjs,
+                          jDstOffsets, jDstStrides);
+
+bailout:
+  return;
+}
+
+/* TurboJPEG 1.2.x: TJCompressor::destroy() */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy
+  (JNIEnv *env, jobject obj)
+{
+  tjhandle handle = 0;
+
+  GET_HANDLE();
+
+  tj3Destroy(handle);
+  (*env)->SetLongField(env, obj, _fid, 0);
+
+bailout:
+  return;
+}
+
+/* TurboJPEG 1.2.x: TJDecompressor::init() */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_init
+  (JNIEnv *env, jobject obj)
+{
+  jclass cls;
+  jfieldID fid;
+  tjhandle handle;
+
+  if ((handle = tj3Init(TJINIT_DECOMPRESS)) == NULL)
+    THROW(tj3GetErrorStr(NULL), "org/libjpegturbo/turbojpeg/TJException");
+
+  BAILIF0(cls = (*env)->GetObjectClass(env, obj));
+  BAILIF0(fid = (*env)->GetFieldID(env, cls, "handle", "J"));
+  (*env)->SetLongField(env, obj, fid, (size_t)handle);
+
+bailout:
+  return;
+}
+
+/* TurboJPEG 3: TJDecompressor::set() */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_set
+  (JNIEnv *env, jobject obj, jint param, jint value)
+{
+  Java_org_libjpegturbo_turbojpeg_TJCompressor_set(env, obj, param, value);
+}
+
+/* TurboJPEG 3: TJDecompressor::get() */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_get
+  (JNIEnv *env, jobject obj, jint param)
+{
+  return Java_org_libjpegturbo_turbojpeg_TJCompressor_get(env, obj, param);
+}
+
+/* TurboJPEG 1.2.x: TJDecompressor::getScalingFactors() */
+JNIEXPORT jobjectArray JNICALL Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors
+  (JNIEnv *env, jclass cls)
+{
+  jclass sfcls = NULL;
+  jfieldID fid = 0;
+  tjscalingfactor *sf = NULL;
+  int n = 0, i;
+  jobject sfobj = NULL;
+  jobjectArray sfjava = NULL;
+
+  if ((sf = tj3GetScalingFactors(&n)) == NULL || n == 0)
+    THROW_ARG(tj3GetErrorStr(NULL));
+
+  BAILIF0(sfcls = (*env)->FindClass(env,
+    "org/libjpegturbo/turbojpeg/TJScalingFactor"));
+  BAILIF0(sfjava = (jobjectArray)(*env)->NewObjectArray(env, n, sfcls, 0));
+
+  for (i = 0; i < n; i++) {
+    BAILIF0(sfobj = (*env)->AllocObject(env, sfcls));
+    BAILIF0(fid = (*env)->GetFieldID(env, sfcls, "num", "I"));
+    (*env)->SetIntField(env, sfobj, fid, sf[i].num);
+    BAILIF0(fid = (*env)->GetFieldID(env, sfcls, "denom", "I"));
+    (*env)->SetIntField(env, sfobj, fid, sf[i].denom);
+    (*env)->SetObjectArrayElement(env, sfjava, i, sfobj);
+  }
+
+bailout:
+  return sfjava;
+}
+
+/* TurboJPEG 1.2.x: TJDecompressor::decompressHeader() */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader
+  (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize)
+{
+  tjhandle handle = 0;
+  unsigned char *jpegBuf = NULL;
+
+  GET_HANDLE();
+
+  if ((*env)->GetArrayLength(env, src) < jpegSize)
+    THROW_ARG("Source buffer is not large enough");
+
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
+
+  if (tj3DecompressHeader(handle, jpegBuf, (size_t)jpegSize) == -1) {
+    SAFE_RELEASE(src, jpegBuf);
+    THROW_TJ();
+  }
+
+bailout:
+  SAFE_RELEASE(src, jpegBuf);
+}
+
+/* TurboJPEG 3: TJDecompressor::setCroppingRegion() */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_setCroppingRegion
+  (JNIEnv *env, jobject obj)
+{
+  tjhandle handle = 0;
+  jclass sfcls, crcls;
+  jobject sfobj, crobj;
+  tjregion croppingRegion;
+  tjscalingfactor scalingFactor;
+
+  GET_HANDLE();
+
+  BAILIF0(sfcls = (*env)->FindClass(env,
+    "org/libjpegturbo/turbojpeg/TJScalingFactor"));
+  BAILIF0(_fid =
+          (*env)->GetFieldID(env, _cls, "scalingFactor",
+                             "Lorg/libjpegturbo/turbojpeg/TJScalingFactor;"));
+  BAILIF0(sfobj = (*env)->GetObjectField(env, obj, _fid));
+  BAILIF0(_fid = (*env)->GetFieldID(env, sfcls, "num", "I"));
+  scalingFactor.num = (*env)->GetIntField(env, sfobj, _fid);
+  BAILIF0(_fid = (*env)->GetFieldID(env, sfcls, "denom", "I"));
+  scalingFactor.denom = (*env)->GetIntField(env, sfobj, _fid);
+
+  if (tj3SetScalingFactor(handle, scalingFactor) == -1)
+    THROW_TJ();
+
+  BAILIF0(crcls = (*env)->FindClass(env, "java/awt/Rectangle"));
+  BAILIF0(_fid = (*env)->GetFieldID(env, _cls, "croppingRegion",
+                                    "Ljava/awt/Rectangle;"));
+  BAILIF0(crobj = (*env)->GetObjectField(env, obj, _fid));
+  BAILIF0(_fid = (*env)->GetFieldID(env, crcls, "x", "I"));
+  croppingRegion.x = (*env)->GetIntField(env, crobj, _fid);
+  BAILIF0(_fid = (*env)->GetFieldID(env, crcls, "y", "I"));
+  croppingRegion.y = (*env)->GetIntField(env, crobj, _fid);
+  BAILIF0(_fid = (*env)->GetFieldID(env, crcls, "width", "I"));
+  croppingRegion.w = (*env)->GetIntField(env, crobj, _fid);
+  BAILIF0(_fid = (*env)->GetFieldID(env, crcls, "height", "I"));
+  croppingRegion.h = (*env)->GetIntField(env, crobj, _fid);
+
+  if (tj3SetCroppingRegion(handle, croppingRegion) == -1)
+    THROW_TJ();
+
+bailout:
+  return;
+}
+
+static void TJDecompressor_decompress
+  (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jarray dst,
+   jint dstElementSize, int precision, jint x, jint y, jint pitch, jint pf)
+{
+  tjhandle handle = 0;
+  jsize arraySize = 0, actualPitch;
+  unsigned char *jpegBuf = NULL;
+  void *dstBuf = NULL;
+  jclass sfcls, crcls;
+  jobject sfobj, crobj;
+  tjscalingfactor scalingFactor;
+  tjregion cr;
+  int jpegWidth, jpegHeight, scaledWidth, scaledHeight;
+
+  GET_HANDLE();
+
+  if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF)
+    THROW_ARG("Invalid argument in decompress*()");
+  if (org_libjpegturbo_turbojpeg_TJ_NUMPF != TJ_NUMPF)
+    THROW_ARG("Mismatch between Java and C API");
+
+  if ((*env)->GetArrayLength(env, src) < jpegSize)
+    THROW_ARG("Source buffer is not large enough");
+  if ((jpegWidth = tj3Get(handle, TJPARAM_JPEGWIDTH)) == -1)
+    THROW_ARG("JPEG header has not yet been read");
+  if ((jpegHeight = tj3Get(handle, TJPARAM_JPEGHEIGHT)) == -1)
+    THROW_ARG("JPEG header has not yet been read");
+
+  BAILIF0(sfcls = (*env)->FindClass(env,
+    "org/libjpegturbo/turbojpeg/TJScalingFactor"));
+  BAILIF0(_fid =
+          (*env)->GetFieldID(env, _cls, "scalingFactor",
+                             "Lorg/libjpegturbo/turbojpeg/TJScalingFactor;"));
+  BAILIF0(sfobj = (*env)->GetObjectField(env, obj, _fid));
+  BAILIF0(_fid = (*env)->GetFieldID(env, sfcls, "num", "I"));
+  scalingFactor.num = (*env)->GetIntField(env, sfobj, _fid);
+  BAILIF0(_fid = (*env)->GetFieldID(env, sfcls, "denom", "I"));
+  scalingFactor.denom = (*env)->GetIntField(env, sfobj, _fid);
+
+  if (tj3SetScalingFactor(handle, scalingFactor) == -1)
+    THROW_TJ();
+  scaledWidth = TJSCALED(jpegWidth, scalingFactor);
+  scaledHeight = TJSCALED(jpegHeight, scalingFactor);
+
+  BAILIF0(crcls = (*env)->FindClass(env, "java/awt/Rectangle"));
+  BAILIF0(_fid = (*env)->GetFieldID(env, _cls, "croppingRegion",
+                                    "Ljava/awt/Rectangle;"));
+  BAILIF0(crobj = (*env)->GetObjectField(env, obj, _fid));
+  BAILIF0(_fid = (*env)->GetFieldID(env, crcls, "x", "I"));
+  cr.x = (*env)->GetIntField(env, crobj, _fid);
+  BAILIF0(_fid = (*env)->GetFieldID(env, crcls, "y", "I"));
+  cr.y = (*env)->GetIntField(env, crobj, _fid);
+  BAILIF0(_fid = (*env)->GetFieldID(env, crcls, "width", "I"));
+  cr.w = (*env)->GetIntField(env, crobj, _fid);
+  BAILIF0(_fid = (*env)->GetFieldID(env, crcls, "height", "I"));
+  cr.h = (*env)->GetIntField(env, crobj, _fid);
+  if (cr.x != 0 || cr.y != 0 || cr.w != 0 || cr.h != 0) {
+    scaledWidth = cr.w ? cr.w : scaledWidth - cr.x;
+    scaledHeight = cr.h ? cr.h : scaledHeight - cr.y;
+  }
+
+  actualPitch = (pitch == 0) ? scaledWidth * tjPixelSize[pf] : pitch;
+  arraySize = (y + scaledHeight - 1) * actualPitch +
+              (x + scaledWidth) * tjPixelSize[pf];
+  if ((*env)->GetArrayLength(env, dst) * dstElementSize < arraySize)
+    THROW_ARG("Destination buffer is not large enough");
+
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
+  BAILIF0NOEC(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+  if (precision == 8) {
+    if (tj3Decompress8(handle, jpegBuf, (size_t)jpegSize,
+                       &((unsigned char *)dstBuf)[y * actualPitch +
+                                                  x * tjPixelSize[pf]],
+                       pitch, pf) == -1) {
+      SAFE_RELEASE(dst, dstBuf);
+      SAFE_RELEASE(src, jpegBuf);
+      THROW_TJ();
+    }
+  } else if (precision == 12) {
+    if (tj3Decompress12(handle, jpegBuf, (size_t)jpegSize,
+                        &((short *)dstBuf)[y * actualPitch +
+                                           x * tjPixelSize[pf]],
+                        pitch, pf) == -1) {
+      SAFE_RELEASE(dst, dstBuf);
+      SAFE_RELEASE(src, jpegBuf);
+      THROW_TJ();
+    }
+  } else {
+    if (tj3Decompress16(handle, jpegBuf, (size_t)jpegSize,
+                        &((unsigned short *)dstBuf)[y * actualPitch +
+                                                    x * tjPixelSize[pf]],
+                        pitch, pf) == -1) {
+      SAFE_RELEASE(dst, dstBuf);
+      SAFE_RELEASE(src, jpegBuf);
+      THROW_TJ();
+    }
+  }
+
+bailout:
+  SAFE_RELEASE(dst, dstBuf);
+  SAFE_RELEASE(src, jpegBuf);
+}
+
+/* TurboJPEG 3: TJDecompressor::decompress8() byte destination */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress8___3BI_3BIIII
+  (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst,
+   jint x, jint y, jint pitch, jint pf)
+{
+  TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, 8, x, y, pitch,
+                            pf);
+}
+
+/* TurboJPEG 3: TJDecompressor::decompress12() */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress12
+  (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jshortArray dst,
+   jint x, jint y, jint pitch, jint pf)
+{
+  TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, 12, x, y, pitch,
+                            pf);
+}
+
+/* TurboJPEG 3: TJDecompressor::decompress16() */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress16
+  (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jshortArray dst,
+   jint x, jint y, jint pitch, jint pf)
+{
+  TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, 16, x, y, pitch,
+                            pf);
+}
+
+/* TurboJPEG 3: TJDecompressor::decompress8() int destination */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress8___3BI_3IIIII
+  (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst,
+   jint x, jint y, jint stride, jint pf)
+{
+  if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF)
+    THROW_ARG("Invalid argument in decompress8()");
+  if (tjPixelSize[pf] != sizeof(jint))
+    THROW_ARG("Pixel format must be 32-bit when decompressing to an integer buffer.");
+
+  TJDecompressor_decompress(env, obj, src, jpegSize, dst, sizeof(jint), 8, x,
+                            y, stride * sizeof(jint), pf);
+
+bailout:
+  return;
+}
+
+/* TurboJPEG 3: TJDecompressor::decompressToYUV8() */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV8
+  (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize,
+   jobjectArray dstobjs, jintArray jDstOffsets, jintArray jDstStrides)
+{
+  tjhandle handle = 0;
+  unsigned char *jpegBuf = NULL;
+  jbyteArray jDstPlanes[3] = { NULL, NULL, NULL };
+  unsigned char *dstPlanesTmp[3] = { NULL, NULL, NULL };
+  unsigned char *dstPlanes[3] = { NULL, NULL, NULL };
+  jint dstOffsetsTmp[3] = { 0, 0, 0 }, dstStridesTmp[3] = { 0, 0, 0 };
+  int dstOffsets[3] = { 0, 0, 0 }, dstStrides[3] = { 0, 0, 0 };
+  jclass sfcls;
+  jobject sfobj;
+  int jpegSubsamp, jpegWidth = 0, jpegHeight = 0;
+  int nc = 0, i, scaledWidth, scaledHeight;
+  tjscalingfactor scalingFactor;
+
+  GET_HANDLE();
+
+  if ((*env)->GetArrayLength(env, src) < jpegSize)
+    THROW_ARG("Source buffer is not large enough");
+  if ((jpegWidth = tj3Get(handle, TJPARAM_JPEGWIDTH)) == -1)
+    THROW_ARG("JPEG header has not yet been read");
+  if ((jpegHeight = tj3Get(handle, TJPARAM_JPEGHEIGHT)) == -1)
+    THROW_ARG("JPEG header has not yet been read");
+
+  BAILIF0(sfcls = (*env)->FindClass(env,
+    "org/libjpegturbo/turbojpeg/TJScalingFactor"));
+  BAILIF0(_fid =
+          (*env)->GetFieldID(env, _cls, "scalingFactor",
+                             "Lorg/libjpegturbo/turbojpeg/TJScalingFactor;"));
+  BAILIF0(sfobj = (*env)->GetObjectField(env, obj, _fid));
+  BAILIF0(_fid = (*env)->GetFieldID(env, sfcls, "num", "I"));
+  scalingFactor.num = (*env)->GetIntField(env, sfobj, _fid);
+  BAILIF0(_fid = (*env)->GetFieldID(env, sfcls, "denom", "I"));
+  scalingFactor.denom = (*env)->GetIntField(env, sfobj, _fid);
+
+  if (tj3SetScalingFactor(handle, scalingFactor) == -1)
+    THROW_TJ();
+  scaledWidth = TJSCALED(jpegWidth, scalingFactor);
+  scaledHeight = TJSCALED(jpegHeight, scalingFactor);
+
+  if ((jpegSubsamp = tj3Get(handle, TJPARAM_SUBSAMP)) == TJSAMP_UNKNOWN)
+    THROW_ARG("TJPARAM_SUBSAMP must be specified");
+  nc = jpegSubsamp == TJSAMP_GRAY ? 1 : 3;
+
+  (*env)->GetIntArrayRegion(env, jDstOffsets, 0, nc, dstOffsetsTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    dstOffsets[i] = dstOffsetsTmp[i];
+
+  (*env)->GetIntArrayRegion(env, jDstStrides, 0, nc, dstStridesTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    dstStrides[i] = dstStridesTmp[i];
+
+  for (i = 0; i < nc; i++) {
+    size_t planeSize = tj3YUVPlaneSize(i, scaledWidth, dstStrides[i],
+                                       scaledHeight, jpegSubsamp);
+    int pw = tj3YUVPlaneWidth(i, scaledWidth, jpegSubsamp);
+
+    if (planeSize == 0 || pw == 0)
+      THROW_ARG(tj3GetErrorStr(NULL));
+
+    if (planeSize > (size_t)INT_MAX)
+      THROW_ARG("Destination plane is too large");
+    if (dstOffsets[i] < 0)
+      THROW_ARG("Invalid argument in decompressToYUV8()");
+    if (dstStrides[i] < 0 && dstOffsets[i] - (int)planeSize + pw < 0)
+      THROW_ARG("Negative plane stride would cause memory to be accessed below plane boundary");
+
+    BAILIF0(jDstPlanes[i] = (*env)->GetObjectArrayElement(env, dstobjs, i));
+    if ((*env)->GetArrayLength(env, jDstPlanes[i]) <
+        dstOffsets[i] + (int)planeSize)
+      THROW_ARG("Destination plane is not large enough");
+  }
+  for (i = 0; i < nc; i++) {
+    BAILIF0NOEC(dstPlanesTmp[i] =
+                (*env)->GetPrimitiveArrayCritical(env, jDstPlanes[i], 0));
+    dstPlanes[i] = &dstPlanesTmp[i][dstOffsets[i]];
+  }
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0));
+
+  if (tj3DecompressToYUVPlanes8(handle, jpegBuf, (size_t)jpegSize, dstPlanes,
+                                dstStrides) == -1) {
+    SAFE_RELEASE(src, jpegBuf);
+    for (i = 0; i < nc; i++)
+      SAFE_RELEASE(jDstPlanes[i], dstPlanesTmp[i]);
+    THROW_TJ();
+  }
+
+bailout:
+  SAFE_RELEASE(src, jpegBuf);
+  for (i = 0; i < nc; i++)
+    SAFE_RELEASE(jDstPlanes[i], dstPlanesTmp[i]);
+}
+
+static void TJDecompressor_decodeYUV8
+  (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets,
+   jintArray jSrcStrides, jarray dst, jint dstElementSize, jint x, jint y,
+   jint width, jint pitch, jint height, jint pf)
+{
+  tjhandle handle = 0;
+  jsize arraySize = 0, actualPitch;
+  jbyteArray jSrcPlanes[3] = { NULL, NULL, NULL };
+  const unsigned char *srcPlanesTmp[3] = { NULL, NULL, NULL };
+  const unsigned char *srcPlanes[3] = { NULL, NULL, NULL };
+  jint srcOffsetsTmp[3] = { 0, 0, 0 }, srcStridesTmp[3] = { 0, 0, 0 };
+  int srcOffsets[3] = { 0, 0, 0 }, srcStrides[3] = { 0, 0, 0 };
+  unsigned char *dstBuf = NULL;
+  int nc = 0, i, subsamp;
+
+  GET_HANDLE();
+
+  if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF)
+    THROW_ARG("Invalid argument in decodeYUV8()");
+  if (org_libjpegturbo_turbojpeg_TJ_NUMPF != TJ_NUMPF ||
+      org_libjpegturbo_turbojpeg_TJ_NUMSAMP != TJ_NUMSAMP)
+    THROW_ARG("Mismatch between Java and C API");
+
+  if ((subsamp = tj3Get(handle, TJPARAM_SUBSAMP)) == TJSAMP_UNKNOWN)
+    THROW_ARG("TJPARAM_SUBSAMP must be specified");
+  nc = subsamp == TJSAMP_GRAY ? 1 : 3;
+  if ((*env)->GetArrayLength(env, srcobjs) < nc)
+    THROW_ARG("Planes array is too small for the subsampling type");
+  if ((*env)->GetArrayLength(env, jSrcOffsets) < nc)
+    THROW_ARG("Offsets array is too small for the subsampling type");
+  if ((*env)->GetArrayLength(env, jSrcStrides) < nc)
+    THROW_ARG("Strides array is too small for the subsampling type");
+
+  actualPitch = (pitch == 0) ? width * tjPixelSize[pf] : pitch;
+  arraySize = (y + height - 1) * actualPitch + (x + width) * tjPixelSize[pf];
+  if ((*env)->GetArrayLength(env, dst) * dstElementSize < arraySize)
+    THROW_ARG("Destination buffer is not large enough");
+
+  (*env)->GetIntArrayRegion(env, jSrcOffsets, 0, nc, srcOffsetsTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    srcOffsets[i] = srcOffsetsTmp[i];
+
+  (*env)->GetIntArrayRegion(env, jSrcStrides, 0, nc, srcStridesTmp);
+  if ((*env)->ExceptionCheck(env)) goto bailout;
+  for (i = 0; i < 3; i++)
+    srcStrides[i] = srcStridesTmp[i];
+
+  for (i = 0; i < nc; i++) {
+    size_t planeSize = tj3YUVPlaneSize(i, width, srcStrides[i], height,
+                                       subsamp);
+    int pw = tj3YUVPlaneWidth(i, width, subsamp);
+
+    if (planeSize == 0 || pw == 0)
+      THROW_ARG(tj3GetErrorStr(NULL));
+
+    if (planeSize > (size_t)INT_MAX)
+      THROW_ARG("Source plane is too large");
+    if (srcOffsets[i] < 0)
+      THROW_ARG("Invalid argument in decodeYUV8()");
+    if (srcStrides[i] < 0 && srcOffsets[i] - (int)planeSize + pw < 0)
+      THROW_ARG("Negative plane stride would cause memory to be accessed below plane boundary");
+
+    BAILIF0(jSrcPlanes[i] = (*env)->GetObjectArrayElement(env, srcobjs, i));
+    if ((*env)->GetArrayLength(env, jSrcPlanes[i]) <
+        srcOffsets[i] + (int)planeSize)
+      THROW_ARG("Source plane is not large enough");
+  }
+  for (i = 0; i < nc; i++) {
+    BAILIF0NOEC(srcPlanesTmp[i] =
+                (*env)->GetPrimitiveArrayCritical(env, jSrcPlanes[i], 0));
+    srcPlanes[i] = &srcPlanesTmp[i][srcOffsets[i]];
+  }
+  BAILIF0NOEC(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+  if (tj3DecodeYUVPlanes8(handle, srcPlanes, srcStrides,
+                          &dstBuf[y * actualPitch + x * tjPixelSize[pf]],
+                          width, pitch, height, pf) == -1) {
+    SAFE_RELEASE(dst, dstBuf);
+    for (i = 0; i < nc; i++)
+      SAFE_RELEASE(jSrcPlanes[i], srcPlanesTmp[i]);
+    THROW_TJ();
+  }
+
+bailout:
+  SAFE_RELEASE(dst, dstBuf);
+  for (i = 0; i < nc; i++)
+    SAFE_RELEASE(jSrcPlanes[i], srcPlanesTmp[i]);
+}
+
+/* TurboJPEG 3: TJDecompressor::decodeYUV8() byte destination */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV8___3_3B_3I_3I_3BIIIIII
+  (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets,
+   jintArray jSrcStrides, jbyteArray dst, jint x, jint y, jint width,
+   jint pitch, jint height, jint pf)
+{
+  TJDecompressor_decodeYUV8(env, obj, srcobjs, jSrcOffsets, jSrcStrides, dst,
+                            1, x, y, width, pitch, height, pf);
+}
+
+/* TurboJPEG 3: TJDecompressor::decodeYUV8() int destination */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV8___3_3B_3I_3I_3IIIIIII
+  (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets,
+   jintArray jSrcStrides, jintArray dst, jint x, jint y, jint width,
+   jint stride, jint height, jint pf)
+{
+  if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF)
+    THROW_ARG("Invalid argument in decodeYUV8()");
+  if (tjPixelSize[pf] != sizeof(jint))
+    THROW_ARG("Pixel format must be 32-bit when decoding to an integer buffer.");
+
+  TJDecompressor_decodeYUV8(env, obj, srcobjs, jSrcOffsets, jSrcStrides, dst,
+                            sizeof(jint), x, y, width, stride * sizeof(jint),
+                            height, pf);
+
+bailout:
+  return;
+}
+
+/* TurboJPEG 1.2.x: TJTransformer::init() */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_init
+  (JNIEnv *env, jobject obj)
+{
+  jclass cls;
+  jfieldID fid;
+  tjhandle handle;
+
+  if ((handle = tj3Init(TJINIT_TRANSFORM)) == NULL)
+    THROW(tj3GetErrorStr(NULL), "org/libjpegturbo/turbojpeg/TJException");
+
+  BAILIF0(cls = (*env)->GetObjectClass(env, obj));
+  BAILIF0(fid = (*env)->GetFieldID(env, cls, "handle", "J"));
+  (*env)->SetLongField(env, obj, fid, (size_t)handle);
+
+bailout:
+  return;
+}
+
+typedef struct _JNICustomFilterParams {
+  JNIEnv *env;
+  jobject tobj;
+  jobject cfobj;
+} JNICustomFilterParams;
+
+static int JNICustomFilter(short *coeffs, tjregion arrayRegion,
+                           tjregion planeRegion, int componentIndex,
+                           int transformIndex, tjtransform *transform)
+{
+  JNICustomFilterParams *params = (JNICustomFilterParams *)transform->data;
+  JNIEnv *env = params->env;
+  jobject tobj = params->tobj, cfobj = params->cfobj;
+  jobject arrayRegionObj, planeRegionObj, bufobj, borobj;
+  jclass cls;
+  jmethodID mid;
+  jfieldID fid;
+
+  BAILIF0(bufobj = (*env)->NewDirectByteBuffer(env, coeffs,
+    sizeof(short) * arrayRegion.w * arrayRegion.h));
+  BAILIF0(cls = (*env)->FindClass(env, "java/nio/ByteOrder"));
+  BAILIF0(mid = (*env)->GetStaticMethodID(env, cls, "nativeOrder",
+                                          "()Ljava/nio/ByteOrder;"));
+  BAILIF0(borobj = (*env)->CallStaticObjectMethod(env, cls, mid));
+  BAILIF0(cls = (*env)->GetObjectClass(env, bufobj));
+  BAILIF0(mid = (*env)->GetMethodID(env, cls, "order",
+    "(Ljava/nio/ByteOrder;)Ljava/nio/ByteBuffer;"));
+  (*env)->CallObjectMethod(env, bufobj, mid, borobj);
+  BAILIF0(mid = (*env)->GetMethodID(env, cls, "asShortBuffer",
+                                    "()Ljava/nio/ShortBuffer;"));
+  BAILIF0(bufobj = (*env)->CallObjectMethod(env, bufobj, mid));
+
+  BAILIF0(cls = (*env)->FindClass(env, "java/awt/Rectangle"));
+  BAILIF0(arrayRegionObj = (*env)->AllocObject(env, cls));
+  BAILIF0(fid = (*env)->GetFieldID(env, cls, "x", "I"));
+  (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.x);
+  BAILIF0(fid = (*env)->GetFieldID(env, cls, "y", "I"));
+  (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.y);
+  BAILIF0(fid = (*env)->GetFieldID(env, cls, "width", "I"));
+  (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.w);
+  BAILIF0(fid = (*env)->GetFieldID(env, cls, "height", "I"));
+  (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.h);
+
+  BAILIF0(planeRegionObj = (*env)->AllocObject(env, cls));
+  BAILIF0(fid = (*env)->GetFieldID(env, cls, "x", "I"));
+  (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.x);
+  BAILIF0(fid = (*env)->GetFieldID(env, cls, "y", "I"));
+  (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.y);
+  BAILIF0(fid = (*env)->GetFieldID(env, cls, "width", "I"));
+  (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.w);
+  BAILIF0(fid = (*env)->GetFieldID(env, cls, "height", "I"));
+  (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.h);
+
+  BAILIF0(cls = (*env)->GetObjectClass(env, cfobj));
+  BAILIF0(mid = (*env)->GetMethodID(env, cls, "customFilter",
+    "(Ljava/nio/ShortBuffer;Ljava/awt/Rectangle;Ljava/awt/Rectangle;IILorg/libjpegturbo/turbojpeg/TJTransform;)V"));
+  (*env)->CallVoidMethod(env, cfobj, mid, bufobj, arrayRegionObj,
+                         planeRegionObj, componentIndex, transformIndex, tobj);
+
+  return 0;
+
+bailout:
+  return -1;
+}
+
+/* TurboJPEG 1.2.x: TJTransformer::transform() */
+JNIEXPORT jintArray JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_transform
+  (JNIEnv *env, jobject obj, jbyteArray jsrcBuf, jint jpegSize,
+   jobjectArray dstobjs, jobjectArray tobjs)
+{
+  tjhandle handle = 0;
+  unsigned char *jpegBuf = NULL, **dstBufs = NULL;
+  jsize n = 0;
+  size_t *dstSizes = NULL;
+  tjtransform *t = NULL;
+  jbyteArray *jdstBufs = NULL;
+  int i, jpegWidth = 0, jpegHeight = 0, jpegSubsamp;
+  jintArray jdstSizes = 0;
+  jint *dstSizesi = NULL;
+  JNICustomFilterParams *params = NULL;
+
+  GET_HANDLE();
+
+  if ((*env)->GetArrayLength(env, jsrcBuf) < jpegSize)
+    THROW_ARG("Source buffer is not large enough");
+  if ((jpegWidth = tj3Get(handle, TJPARAM_JPEGWIDTH)) == -1)
+    THROW_ARG("JPEG header has not yet been read");
+  if ((jpegHeight = tj3Get(handle, TJPARAM_JPEGHEIGHT)) == -1)
+    THROW_ARG("JPEG header has not yet been read");
+  if ((jpegSubsamp = tj3Get(handle, TJPARAM_SUBSAMP)) == TJSAMP_UNKNOWN)
+    THROW_ARG("TJPARAM_SUBSAMP must be specified");
+
+  n = (*env)->GetArrayLength(env, dstobjs);
+  if (n != (*env)->GetArrayLength(env, tobjs))
+    THROW_ARG("Mismatch between size of transforms array and destination buffers array");
+
+  if ((dstBufs =
+       (unsigned char **)malloc(sizeof(unsigned char *) * n)) == NULL)
+    THROW_MEM();
+  if ((jdstBufs = (jbyteArray *)malloc(sizeof(jbyteArray) * n)) == NULL)
+    THROW_MEM();
+  if ((dstSizes = (size_t *)malloc(sizeof(size_t) * n)) == NULL)
+    THROW_MEM();
+  if ((t = (tjtransform *)malloc(sizeof(tjtransform) * n)) == NULL)
+    THROW_MEM();
+  if ((params = (JNICustomFilterParams *)malloc(sizeof(JNICustomFilterParams) *
+                                                n)) == NULL)
+    THROW_MEM();
+  for (i = 0; i < n; i++) {
+    dstBufs[i] = NULL;  jdstBufs[i] = NULL;  dstSizes[i] = 0;
+    memset(&t[i], 0, sizeof(tjtransform));
+    memset(&params[i], 0, sizeof(JNICustomFilterParams));
+  }
+
+  for (i = 0; i < n; i++) {
+    jobject tobj, cfobj;
+
+    BAILIF0(tobj = (*env)->GetObjectArrayElement(env, tobjs, i));
+    BAILIF0(_cls = (*env)->GetObjectClass(env, tobj));
+    BAILIF0(_fid = (*env)->GetFieldID(env, _cls, "op", "I"));
+    t[i].op = (*env)->GetIntField(env, tobj, _fid);
+    BAILIF0(_fid = (*env)->GetFieldID(env, _cls, "options", "I"));
+    t[i].options = (*env)->GetIntField(env, tobj, _fid);
+    BAILIF0(_fid = (*env)->GetFieldID(env, _cls, "x", "I"));
+    t[i].r.x = (*env)->GetIntField(env, tobj, _fid);
+    BAILIF0(_fid = (*env)->GetFieldID(env, _cls, "y", "I"));
+    t[i].r.y = (*env)->GetIntField(env, tobj, _fid);
+    BAILIF0(_fid = (*env)->GetFieldID(env, _cls, "width", "I"));
+    t[i].r.w = (*env)->GetIntField(env, tobj, _fid);
+    BAILIF0(_fid = (*env)->GetFieldID(env, _cls, "height", "I"));
+    t[i].r.h = (*env)->GetIntField(env, tobj, _fid);
+
+    BAILIF0(_fid = (*env)->GetFieldID(env, _cls, "cf",
+      "Lorg/libjpegturbo/turbojpeg/TJCustomFilter;"));
+    cfobj = (*env)->GetObjectField(env, tobj, _fid);
+    if (cfobj) {
+      params[i].env = env;
+      params[i].tobj = tobj;
+      params[i].cfobj = cfobj;
+      t[i].customFilter = JNICustomFilter;
+      t[i].data = (void *)&params[i];
+    }
+  }
+
+  if (tj3Set(handle, TJPARAM_NOREALLOC, 1) == -1)
+    THROW_TJ();
+
+  for (i = 0; i < n; i++) {
+    int w = jpegWidth, h = jpegHeight;
+
+    if (t[i].op == TJXOP_TRANSPOSE || t[i].op == TJXOP_TRANSVERSE ||
+        t[i].op == TJXOP_ROT90 || t[i].op == TJXOP_ROT270) {
+      w = jpegHeight;  h = jpegWidth;
+    }
+    if (t[i].r.w != 0) w = t[i].r.w;
+    if (t[i].r.h != 0) h = t[i].r.h;
+    BAILIF0(jdstBufs[i] = (*env)->GetObjectArrayElement(env, dstobjs, i));
+    if ((size_t)(*env)->GetArrayLength(env, jdstBufs[i]) <
+        tj3JPEGBufSize(w, h, jpegSubsamp))
+      THROW_ARG("Destination buffer is not large enough");
+  }
+  BAILIF0NOEC(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, jsrcBuf, 0));
+  for (i = 0; i < n; i++)
+    BAILIF0NOEC(dstBufs[i] =
+                (*env)->GetPrimitiveArrayCritical(env, jdstBufs[i], 0));
+
+  if (tj3Transform(handle, jpegBuf, jpegSize, n, dstBufs, dstSizes, t) == -1) {
+    for (i = 0; i < n; i++)
+      SAFE_RELEASE(jdstBufs[i], dstBufs[i]);
+    SAFE_RELEASE(jsrcBuf, jpegBuf);
+    THROW_TJ();
+  }
+
+  for (i = 0; i < n; i++)
+    SAFE_RELEASE(jdstBufs[i], dstBufs[i]);
+  SAFE_RELEASE(jsrcBuf, jpegBuf);
+
+  jdstSizes = (*env)->NewIntArray(env, n);
+  BAILIF0(dstSizesi = (*env)->GetIntArrayElements(env, jdstSizes, 0));
+  for (i = 0; i < n; i++) dstSizesi[i] = (int)dstSizes[i];
+
+bailout:
+  if (dstSizesi) (*env)->ReleaseIntArrayElements(env, jdstSizes, dstSizesi, 0);
+  if (dstBufs) {
+    for (i = 0; i < n; i++) {
+      if (dstBufs[i] && jdstBufs && jdstBufs[i])
+        (*env)->ReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0);
+    }
+    free(dstBufs);
+  }
+  SAFE_RELEASE(jsrcBuf, jpegBuf);
+  free(jdstBufs);
+  free(dstSizes);
+  free(t);
+  return jdstSizes;
+}
+
+/* TurboJPEG 1.2.x: TJDecompressor::destroy() */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy
+  (JNIEnv *env, jobject obj)
+{
+  Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy(env, obj);
+}
+
+/* Private image I/O routines (used only by TJBench) */
+JNIEXPORT jobject JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_loadImage
+  (JNIEnv *env, jobject obj, jint precision, jstring jfilename,
+   jintArray jwidth, jint align, jintArray jheight, jintArray jpixelFormat)
+{
+  tjhandle handle = NULL;
+  void *dstBuf = NULL, *jdstPtr;
+  int width, *warr, height, *harr, pixelFormat, *pfarr, n;
+  const char *filename = NULL;
+  jboolean isCopy;
+  jobject jdstBuf = NULL;
+
+  GET_HANDLE();
+
+  if ((precision != 8 && precision != 12 && precision != 16) ||
+      jfilename == NULL || jwidth == NULL ||
+      (*env)->GetArrayLength(env, jwidth) < 1 || jheight == NULL ||
+      (*env)->GetArrayLength(env, jheight) < 1 || jpixelFormat == NULL ||
+      (*env)->GetArrayLength(env, jpixelFormat) < 1)
+    THROW_ARG("Invalid argument in loadImage()");
+
+  BAILIF0NOEC(warr = (*env)->GetPrimitiveArrayCritical(env, jwidth, 0));
+  width = warr[0];
+  (*env)->ReleasePrimitiveArrayCritical(env, jwidth, warr, 0);
+  BAILIF0NOEC(harr = (*env)->GetPrimitiveArrayCritical(env, jheight, 0));
+  height = harr[0];
+  (*env)->ReleasePrimitiveArrayCritical(env, jheight, harr, 0);
+  BAILIF0NOEC(pfarr = (*env)->GetPrimitiveArrayCritical(env, jpixelFormat, 0));
+  pixelFormat = pfarr[0];
+  (*env)->ReleasePrimitiveArrayCritical(env, jpixelFormat, pfarr, 0);
+  BAILIF0(filename = (*env)->GetStringUTFChars(env, jfilename, &isCopy));
+
+  if (precision == 8) {
+    if ((dstBuf = tj3LoadImage8(handle, filename, &width, align, &height,
+                                &pixelFormat)) == NULL)
+      THROW_TJ();
+  } else if (precision == 12) {
+    if ((dstBuf = tj3LoadImage12(handle, filename, &width, align, &height,
+                                 &pixelFormat)) == NULL)
+      THROW_TJ();
+  } else {
+    if ((dstBuf = tj3LoadImage16(handle, filename, &width, align, &height,
+                                 &pixelFormat)) == NULL)
+      THROW_TJ();
+  }
+
+  (*env)->ReleaseStringUTFChars(env, jfilename, filename);
+  filename = NULL;
+
+  if ((unsigned long long)width * (unsigned long long)height *
+      (unsigned long long)tjPixelSize[pixelFormat] >
+      (unsigned long long)((unsigned int)-1))
+    THROW_ARG("Image is too large");
+
+  BAILIF0NOEC(warr = (*env)->GetPrimitiveArrayCritical(env, jwidth, 0));
+  warr[0] = width;
+  (*env)->ReleasePrimitiveArrayCritical(env, jwidth, warr, 0);
+  BAILIF0NOEC(harr = (*env)->GetPrimitiveArrayCritical(env, jheight, 0));
+  harr[0] = height;
+  (*env)->ReleasePrimitiveArrayCritical(env, jheight, harr, 0);
+  BAILIF0NOEC(pfarr = (*env)->GetPrimitiveArrayCritical(env, jpixelFormat, 0));
+  pfarr[0] = pixelFormat;
+  (*env)->ReleasePrimitiveArrayCritical(env, jpixelFormat, pfarr, 0);
+
+  n = width * height * tjPixelSize[pixelFormat];
+  if (precision == 8)
+    jdstBuf = (*env)->NewByteArray(env, n);
+  else
+    jdstBuf = (*env)->NewShortArray(env, n);
+  BAILIF0NOEC(jdstPtr = (*env)->GetPrimitiveArrayCritical(env, jdstBuf, 0));
+  memcpy(jdstPtr, dstBuf, n * (precision > 8 ? 2 : 1));
+  (*env)->ReleasePrimitiveArrayCritical(env, jdstBuf, jdstPtr, 0);
+
+bailout:
+  if (filename) (*env)->ReleaseStringUTFChars(env, jfilename, filename);
+  tj3Free(dstBuf);
+  return jdstBuf;
+}
+
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_saveImage
+  (JNIEnv *env, jobject obj, jint precision, jstring jfilename,
+   jobject jsrcBuf, jint width, jint pitch, jint height, jint pixelFormat)
+{
+  tjhandle handle = NULL;
+  void *srcBuf = NULL, *jsrcPtr;
+  const char *filename = NULL;
+  int n;
+  jboolean isCopy;
+
+  GET_HANDLE();
+
+  if ((precision != 8 && precision != 12 && precision != 16) ||
+      jfilename == NULL || jsrcBuf == NULL || width < 1 || height < 1 ||
+      pixelFormat < 0 || pixelFormat >= TJ_NUMPF)
+    THROW_ARG("Invalid argument in saveImage()");
+
+  if ((unsigned long long)width * (unsigned long long)height *
+      (unsigned long long)tjPixelSize[pixelFormat] >
+      (unsigned long long)((unsigned int)-1))
+    THROW_ARG("Image is too large");
+  n = width * height * tjPixelSize[pixelFormat];
+  if ((*env)->GetArrayLength(env, jsrcBuf) < n)
+    THROW_ARG("Source buffer is not large enough");
+
+  if ((srcBuf = malloc(n * (precision > 8 ? 2 : 1))) == NULL)
+    THROW_MEM();
+
+  BAILIF0NOEC(jsrcPtr = (*env)->GetPrimitiveArrayCritical(env, jsrcBuf, 0));
+  memcpy(srcBuf, jsrcPtr, n * (precision > 8 ? 2 : 1));
+  (*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jsrcPtr, 0);
+  BAILIF0(filename = (*env)->GetStringUTFChars(env, jfilename, &isCopy));
+
+  if (precision == 8) {
+    if (tj3SaveImage8(handle, filename, srcBuf, width, pitch, height,
+                      pixelFormat) == -1)
+      THROW_TJ();
+  } else if (precision == 12) {
+    if (tj3SaveImage12(handle, filename, srcBuf, width, pitch, height,
+                       pixelFormat) == -1)
+      THROW_TJ();
+  } else {
+    if (tj3SaveImage16(handle, filename, srcBuf, width, pitch, height,
+                       pixelFormat) == -1)
+      THROW_TJ();
+  }
+
+bailout:
+  if (filename) (*env)->ReleaseStringUTFChars(env, jfilename, filename);
+  free(srcBuf);
+}
diff --git a/3rdparty/libjpeg-turbo/src/turbojpeg-mapfile b/3rdparty/libjpeg-turbo/src/turbojpeg-mapfile
new file mode 100644
index 0000000000..6aab87132a
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/turbojpeg-mapfile
@@ -0,0 +1,108 @@
+TURBOJPEG_1.0
+{
+  global:
+    TJBUFSIZE;
+    tjCompress;
+    tjDecompress;
+    tjDecompressHeader;
+    tjDestroy;
+    tjGetErrorStr;
+    tjInitCompress;
+    tjInitDecompress;
+  local:
+    *;
+};
+
+TURBOJPEG_1.1
+{
+  global:
+    TJBUFSIZEYUV;
+    tjDecompressHeader2;
+    tjDecompressToYUV;
+    tjEncodeYUV;
+} TURBOJPEG_1.0;
+
+TURBOJPEG_1.2
+{
+  global:
+    tjAlloc;
+    tjBufSize;
+    tjBufSizeYUV;
+    tjCompress2;
+    tjDecompress2;
+    tjEncodeYUV2;
+    tjFree;
+    tjGetScalingFactors;
+    tjInitTransform;
+    tjTransform;
+} TURBOJPEG_1.1;
+
+TURBOJPEG_1.4
+{
+  global:
+    tjBufSizeYUV2;
+    tjCompressFromYUV;
+    tjCompressFromYUVPlanes;
+    tjDecodeYUV;
+    tjDecodeYUVPlanes;
+    tjDecompressHeader3;
+    tjDecompressToYUV2;
+    tjDecompressToYUVPlanes;
+    tjEncodeYUV3;
+    tjEncodeYUVPlanes;
+    tjPlaneHeight;
+    tjPlaneSizeYUV;
+    tjPlaneWidth;
+} TURBOJPEG_1.2;
+
+TURBOJPEG_2.0
+{
+  global:
+    tjGetErrorCode;
+    tjGetErrorStr2;
+    tjLoadImage;
+    tjSaveImage;
+} TURBOJPEG_1.4;
+
+TURBOJPEG_3
+{
+  global:
+    tj3Alloc;
+    tj3Compress8;
+    tj3Compress12;
+    tj3Compress16;
+    tj3CompressFromYUV8;
+    tj3CompressFromYUVPlanes8;
+    tj3DecodeYUV8;
+    tj3DecodeYUVPlanes8;
+    tj3Decompress8;
+    tj3Decompress12;
+    tj3Decompress16;
+    tj3DecompressHeader;
+    tj3DecompressToYUV8;
+    tj3DecompressToYUVPlanes8;
+    tj3Destroy;
+    tj3EncodeYUV8;
+    tj3EncodeYUVPlanes8;
+    tj3Free;
+    tj3Get;
+    tj3GetErrorCode;
+    tj3GetErrorStr;
+    tj3GetScalingFactors;
+    tj3Init;
+    tj3JPEGBufSize;
+    tj3LoadImage8;
+    tj3LoadImage12;
+    tj3LoadImage16;
+    tj3SaveImage8;
+    tj3SaveImage12;
+    tj3SaveImage16;
+    tj3Set;
+    tj3SetCroppingRegion;
+    tj3SetScalingFactor;
+    tj3Transform;
+    tj3YUVBufSize;
+    tj3YUVPlaneHeight;
+    tj3YUVPlaneSize;
+    tj3YUVPlaneWidth;
+} TURBOJPEG_2.0;
diff --git a/3rdparty/libjpeg-turbo/src/turbojpeg-mapfile.jni b/3rdparty/libjpeg-turbo/src/turbojpeg-mapfile.jni
new file mode 100644
index 0000000000..31be750858
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/turbojpeg-mapfile.jni
@@ -0,0 +1,142 @@
+TURBOJPEG_1.0
+{
+  global:
+    TJBUFSIZE;
+    tjCompress;
+    tjDecompress;
+    tjDecompressHeader;
+    tjDestroy;
+    tjGetErrorStr;
+    tjInitCompress;
+    tjInitDecompress;
+  local:
+    *;
+};
+
+TURBOJPEG_1.1
+{
+  global:
+    TJBUFSIZEYUV;
+    tjDecompressHeader2;
+    tjDecompressToYUV;
+    tjEncodeYUV;
+} TURBOJPEG_1.0;
+
+TURBOJPEG_1.2
+{
+  global:
+    tjAlloc;
+    tjBufSize;
+    tjBufSizeYUV;
+    tjCompress2;
+    tjDecompress2;
+    tjEncodeYUV2;
+    tjFree;
+    tjGetScalingFactors;
+    tjInitTransform;
+    tjTransform;
+    Java_org_libjpegturbo_turbojpeg_TJ_bufSize;
+    Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors;
+    Java_org_libjpegturbo_turbojpeg_TJCompressor_init;
+    Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_init;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy;
+    Java_org_libjpegturbo_turbojpeg_TJTransformer_init;
+    Java_org_libjpegturbo_turbojpeg_TJTransformer_transform;
+} TURBOJPEG_1.1;
+
+TURBOJPEG_1.4
+{
+  global:
+    tjBufSizeYUV2;
+    tjCompressFromYUV;
+    tjCompressFromYUVPlanes;
+    tjDecodeYUV;
+    tjDecodeYUVPlanes;
+    tjDecompressHeader3;
+    tjDecompressToYUV2;
+    tjDecompressToYUVPlanes;
+    tjEncodeYUV3;
+    tjEncodeYUVPlanes;
+    tjPlaneHeight;
+    tjPlaneSizeYUV;
+    tjPlaneWidth;
+    Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII;
+    Java_org_libjpegturbo_turbojpeg_TJ_planeHeight__III;
+    Java_org_libjpegturbo_turbojpeg_TJ_planeSizeYUV__IIIII;
+    Java_org_libjpegturbo_turbojpeg_TJ_planeWidth__III;
+} TURBOJPEG_1.2;
+
+TURBOJPEG_2.0
+{
+  global:
+    tjGetErrorCode;
+    tjGetErrorStr2;
+    tjLoadImage;
+    tjSaveImage;
+} TURBOJPEG_1.4;
+
+TURBOJPEG_3
+{
+  global:
+    tj3Alloc;
+    tj3Compress8;
+    tj3Compress12;
+    tj3Compress16;
+    tj3CompressFromYUV8;
+    tj3CompressFromYUVPlanes8;
+    tj3DecodeYUV8;
+    tj3DecodeYUVPlanes8;
+    tj3Decompress8;
+    tj3Decompress12;
+    tj3Decompress16;
+    tj3DecompressHeader;
+    tj3DecompressToYUV8;
+    tj3DecompressToYUVPlanes8;
+    tj3Destroy;
+    tj3EncodeYUV8;
+    tj3EncodeYUVPlanes8;
+    tj3Free;
+    tj3Get;
+    tj3GetErrorCode;
+    tj3GetErrorStr;
+    tj3GetScalingFactors;
+    tj3Init;
+    tj3JPEGBufSize;
+    tj3LoadImage8;
+    tj3LoadImage12;
+    tj3LoadImage16;
+    tj3SaveImage8;
+    tj3SaveImage12;
+    tj3SaveImage16;
+    tj3Set;
+    tj3SetCroppingRegion;
+    tj3SetScalingFactor;
+    tj3Transform;
+    tj3YUVBufSize;
+    tj3YUVPlaneHeight;
+    tj3YUVPlaneSize;
+    tj3YUVPlaneWidth;
+    Java_org_libjpegturbo_turbojpeg_TJCompressor_compress8___3BIIIIII_3B;
+    Java_org_libjpegturbo_turbojpeg_TJCompressor_compress8___3IIIIIII_3B;
+    Java_org_libjpegturbo_turbojpeg_TJCompressor_compress12;
+    Java_org_libjpegturbo_turbojpeg_TJCompressor_compress16;
+    Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV8;
+    Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV8___3BIIIIII_3_3B_3I_3I;
+    Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV8___3IIIIIII_3_3B_3I_3I;
+    Java_org_libjpegturbo_turbojpeg_TJCompressor_get;
+    Java_org_libjpegturbo_turbojpeg_TJCompressor_loadImage;
+    Java_org_libjpegturbo_turbojpeg_TJCompressor_set;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV8___3_3B_3I_3I_3BIIIIII;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV8___3_3B_3I_3I_3IIIIIII;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress8___3BI_3BIIII;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress8___3BI_3IIIII;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress12;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress16;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV8;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_get;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_saveImage;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_set;
+    Java_org_libjpegturbo_turbojpeg_TJDecompressor_setCroppingRegion;
+} TURBOJPEG_2.0;
diff --git a/3rdparty/libjpeg-turbo/src/turbojpeg-mp.c b/3rdparty/libjpeg-turbo/src/turbojpeg-mp.c
new file mode 100644
index 0000000000..d4b3c74c39
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/turbojpeg-mp.c
@@ -0,0 +1,541 @@
+/*
+ * Copyright (C)2009-2024 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* TurboJPEG API functions that must be compiled for multiple data
+   precisions */
+
+#if BITS_IN_JSAMPLE == 8
+#define _JSAMPLE  JSAMPLE
+#define _JSAMPROW  JSAMPROW
+#define _buffer  buffer
+#define _jinit_read_ppm  jinit_read_ppm
+#define _jinit_write_ppm  jinit_write_ppm
+#define _jpeg_crop_scanline  jpeg_crop_scanline
+#define _jpeg_read_scanlines  jpeg_read_scanlines
+#define _jpeg_skip_scanlines  jpeg_skip_scanlines
+#define _jpeg_write_scanlines  jpeg_write_scanlines
+#elif BITS_IN_JSAMPLE == 12
+#define _JSAMPLE  J12SAMPLE
+#define _JSAMPROW  J12SAMPROW
+#define _buffer  buffer12
+#define _jinit_read_ppm  j12init_read_ppm
+#define _jinit_write_ppm  j12init_write_ppm
+#define _jpeg_crop_scanline  jpeg12_crop_scanline
+#define _jpeg_read_scanlines  jpeg12_read_scanlines
+#define _jpeg_skip_scanlines  jpeg12_skip_scanlines
+#define _jpeg_write_scanlines  jpeg12_write_scanlines
+#elif BITS_IN_JSAMPLE == 16
+#define _JSAMPLE  J16SAMPLE
+#define _JSAMPROW  J16SAMPROW
+#define _buffer  buffer16
+#define _jinit_read_ppm  j16init_read_ppm
+#define _jinit_write_ppm  j16init_write_ppm
+#define _jpeg_read_scanlines  jpeg16_read_scanlines
+#define _jpeg_write_scanlines  jpeg16_write_scanlines
+#endif
+
+#define _GET_NAME(name, suffix)  name##suffix
+#define GET_NAME(name, suffix)  _GET_NAME(name, suffix)
+#define _GET_STRING(name, suffix)  #name #suffix
+#define GET_STRING(name, suffix)  _GET_STRING(name, suffix)
+
+
+/******************************** Compressor *********************************/
+
+/* TurboJPEG 3+ */
+DLLEXPORT int GET_NAME(tj3Compress, BITS_IN_JSAMPLE)
+  (tjhandle handle, const _JSAMPLE *srcBuf, int width, int pitch, int height,
+   int pixelFormat, unsigned char **jpegBuf, size_t *jpegSize)
+{
+  static const char FUNCTION_NAME[] = GET_STRING(tj3Compress, BITS_IN_JSAMPLE);
+  int i, retval = 0;
+  boolean alloc = TRUE;
+  _JSAMPROW *row_pointer = NULL;
+
+  GET_CINSTANCE(handle)
+  if ((this->init & COMPRESS) == 0)
+    THROW("Instance has not been initialized for compression");
+
+  if (srcBuf == NULL || width <= 0 || pitch < 0 || height <= 0 ||
+      pixelFormat < 0 || pixelFormat >= TJ_NUMPF || jpegBuf == NULL ||
+      jpegSize == NULL)
+    THROW("Invalid argument");
+
+  if (!this->lossless && this->quality == -1)
+    THROW("TJPARAM_QUALITY must be specified");
+  if (!this->lossless && this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+
+  if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
+
+  if ((row_pointer = (_JSAMPROW *)malloc(sizeof(_JSAMPROW) * height)) == NULL)
+    THROW("Memory allocation failure");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  cinfo->image_width = width;
+  cinfo->image_height = height;
+  cinfo->data_precision = BITS_IN_JSAMPLE;
+
+  setCompDefaults(this, pixelFormat);
+  if (this->noRealloc) {
+    alloc = FALSE;
+    *jpegSize = tj3JPEGBufSize(width, height, this->subsamp);
+  }
+  jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
+
+  jpeg_start_compress(cinfo, TRUE);
+  for (i = 0; i < height; i++) {
+    if (this->bottomUp)
+      row_pointer[i] = (_JSAMPROW)&srcBuf[(height - i - 1) * (size_t)pitch];
+    else
+      row_pointer[i] = (_JSAMPROW)&srcBuf[i * (size_t)pitch];
+  }
+  while (cinfo->next_scanline < cinfo->image_height)
+    _jpeg_write_scanlines(cinfo, &row_pointer[cinfo->next_scanline],
+                          cinfo->image_height - cinfo->next_scanline);
+  jpeg_finish_compress(cinfo);
+
+bailout:
+  if (cinfo->global_state > CSTATE_START && alloc)
+    (*cinfo->dest->term_destination) (cinfo);
+  if (cinfo->global_state > CSTATE_START || retval == -1)
+    jpeg_abort_compress(cinfo);
+  free(row_pointer);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+
+/******************************* Decompressor ********************************/
+
+/* TurboJPEG 3+ */
+DLLEXPORT int GET_NAME(tj3Decompress, BITS_IN_JSAMPLE)
+  (tjhandle handle, const unsigned char *jpegBuf, size_t jpegSize,
+   _JSAMPLE *dstBuf, int pitch, int pixelFormat)
+{
+  static const char FUNCTION_NAME[] =
+    GET_STRING(tj3Decompress, BITS_IN_JSAMPLE);
+  _JSAMPROW *row_pointer = NULL;
+  int croppedHeight, i, retval = 0;
+#if BITS_IN_JSAMPLE != 16
+  int scaledWidth;
+#endif
+  struct my_progress_mgr progress;
+
+  GET_DINSTANCE(handle);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || pitch < 0 ||
+      pixelFormat < 0 || pixelFormat >= TJ_NUMPF)
+    THROW("Invalid argument");
+
+  if (this->scanLimit) {
+    memset(&progress, 0, sizeof(struct my_progress_mgr));
+    progress.pub.progress_monitor = my_progress_monitor;
+    progress.this = this;
+    dinfo->progress = &progress.pub;
+  } else
+    dinfo->progress = NULL;
+
+  dinfo->mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  if (dinfo->global_state <= DSTATE_INHEADER) {
+    jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+    jpeg_read_header(dinfo, TRUE);
+  }
+  setDecompParameters(this);
+  if (this->maxPixels &&
+      (unsigned long long)this->jpegWidth * this->jpegHeight >
+      (unsigned long long)this->maxPixels)
+    THROW("Image is too large");
+  this->dinfo.out_color_space = pf2cs[pixelFormat];
+#if BITS_IN_JSAMPLE != 16
+  scaledWidth = TJSCALED(dinfo->image_width, this->scalingFactor);
+#endif
+  dinfo->do_fancy_upsampling = !this->fastUpsample;
+  this->dinfo.dct_method = this->fastDCT ? JDCT_FASTEST : JDCT_ISLOW;
+
+  dinfo->scale_num = this->scalingFactor.num;
+  dinfo->scale_denom = this->scalingFactor.denom;
+
+  jpeg_start_decompress(dinfo);
+
+#if BITS_IN_JSAMPLE != 16
+  if (this->croppingRegion.x != 0 ||
+      (this->croppingRegion.w != 0 && this->croppingRegion.w != scaledWidth)) {
+    JDIMENSION crop_x = this->croppingRegion.x;
+    JDIMENSION crop_w = this->croppingRegion.w;
+
+    _jpeg_crop_scanline(dinfo, &crop_x, &crop_w);
+    if ((int)crop_x != this->croppingRegion.x)
+      THROWI("Unexplained mismatch between specified (%d) and\n"
+             "actual (%d) cropping region left boundary",
+             this->croppingRegion.x, (int)crop_x);
+    if ((int)crop_w != this->croppingRegion.w)
+      THROWI("Unexplained mismatch between specified (%d) and\n"
+             "actual (%d) cropping region width",
+             this->croppingRegion.w, (int)crop_w);
+  }
+#endif
+
+  if (pitch == 0) pitch = dinfo->output_width * tjPixelSize[pixelFormat];
+
+  croppedHeight = dinfo->output_height;
+#if BITS_IN_JSAMPLE != 16
+  if (this->croppingRegion.y != 0 || this->croppingRegion.h != 0)
+    croppedHeight = this->croppingRegion.h;
+#endif
+  if ((row_pointer =
+       (_JSAMPROW *)malloc(sizeof(_JSAMPROW) * croppedHeight)) == NULL)
+    THROW("Memory allocation failure");
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+  for (i = 0; i < (int)croppedHeight; i++) {
+    if (this->bottomUp)
+      row_pointer[i] = &dstBuf[(croppedHeight - i - 1) * (size_t)pitch];
+    else
+      row_pointer[i] = &dstBuf[i * (size_t)pitch];
+  }
+
+#if BITS_IN_JSAMPLE != 16
+  if (this->croppingRegion.y != 0 || this->croppingRegion.h != 0) {
+    if (this->croppingRegion.y != 0) {
+      JDIMENSION lines = _jpeg_skip_scanlines(dinfo, this->croppingRegion.y);
+
+      if ((int)lines != this->croppingRegion.y)
+        THROWI("Unexplained mismatch between specified (%d) and\n"
+               "actual (%d) cropping region upper boundary",
+               this->croppingRegion.y, (int)lines);
+    }
+    while ((int)dinfo->output_scanline <
+           this->croppingRegion.y + this->croppingRegion.h)
+      _jpeg_read_scanlines(dinfo, &row_pointer[dinfo->output_scanline -
+                                               this->croppingRegion.y],
+                           this->croppingRegion.y + this->croppingRegion.h -
+                           dinfo->output_scanline);
+    if (this->croppingRegion.y + this->croppingRegion.h !=
+        (int)dinfo->output_height) {
+      JDIMENSION lines = _jpeg_skip_scanlines(dinfo, dinfo->output_height -
+                                                     this->croppingRegion.y -
+                                                     this->croppingRegion.h);
+
+      if (lines != dinfo->output_height - this->croppingRegion.y -
+                   this->croppingRegion.h)
+        THROWI("Unexplained mismatch between specified (%d) and\n"
+               "actual (%d) cropping region lower boundary",
+               this->croppingRegion.y + this->croppingRegion.h,
+               (int)(dinfo->output_height - lines));
+    }
+  } else
+#endif
+  {
+    while (dinfo->output_scanline < dinfo->output_height)
+      _jpeg_read_scanlines(dinfo, &row_pointer[dinfo->output_scanline],
+                           dinfo->output_height - dinfo->output_scanline);
+  }
+  jpeg_finish_decompress(dinfo);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  free(row_pointer);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+
+/*************************** Packed-Pixel Image I/O **************************/
+
+/* TurboJPEG 3+ */
+DLLEXPORT _JSAMPLE *GET_NAME(tj3LoadImage, BITS_IN_JSAMPLE)
+  (tjhandle handle, const char *filename, int *width, int align, int *height,
+   int *pixelFormat)
+{
+  static const char FUNCTION_NAME[] =
+    GET_STRING(tj3LoadImage, BITS_IN_JSAMPLE);
+
+#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
+
+  int retval = 0, tempc;
+  size_t pitch;
+  tjhandle handle2 = NULL;
+  tjinstance *this2;
+  j_compress_ptr cinfo = NULL;
+  cjpeg_source_ptr src;
+  _JSAMPLE *dstBuf = NULL;
+  FILE *file = NULL;
+  boolean invert;
+
+  GET_TJINSTANCE(handle, NULL)
+
+  if (!filename || !width || align < 1 || !height || !pixelFormat ||
+      *pixelFormat < TJPF_UNKNOWN || *pixelFormat >= TJ_NUMPF)
+    THROW("Invalid argument");
+  if ((align & (align - 1)) != 0)
+    THROW("Alignment must be a power of 2");
+
+  /* The instance handle passed to this function is used only for parameter
+     retrieval.  Create a new temporary instance to avoid interfering with the
+     libjpeg state of the primary instance. */
+  if ((handle2 = tj3Init(TJINIT_COMPRESS)) == NULL) return NULL;
+  this2 = (tjinstance *)handle2;
+  cinfo = &this2->cinfo;
+
+#ifdef _MSC_VER
+  if (fopen_s(&file, filename, "rb") || file == NULL)
+#else
+  if ((file = fopen(filename, "rb")) == NULL)
+#endif
+    THROW_UNIX("Cannot open input file");
+
+  if ((tempc = getc(file)) < 0 || ungetc(tempc, file) == EOF)
+    THROW_UNIX("Could not read input file")
+  else if (tempc == EOF)
+    THROW("Input file contains no data");
+
+  if (setjmp(this2->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  cinfo->data_precision = BITS_IN_JSAMPLE;
+  if (*pixelFormat == TJPF_UNKNOWN) cinfo->in_color_space = JCS_UNKNOWN;
+  else cinfo->in_color_space = pf2cs[*pixelFormat];
+  if (tempc == 'B') {
+    if ((src = jinit_read_bmp(cinfo, FALSE)) == NULL)
+      THROW("Could not initialize bitmap loader");
+    invert = !this->bottomUp;
+  } else if (tempc == 'P') {
+    if ((src = _jinit_read_ppm(cinfo)) == NULL)
+      THROW("Could not initialize PPM loader");
+    invert = this->bottomUp;
+  } else
+    THROW("Unsupported file type");
+
+  cinfo->mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+
+  src->input_file = file;
+  /* Refuse to load images larger than the specified size. */
+  src->max_pixels = this->maxPixels;
+  (*src->start_input) (cinfo, src);
+  if (tempc == 'B') {
+    if (cinfo->X_density && cinfo->Y_density) {
+      this->xDensity = cinfo->X_density;
+      this->yDensity = cinfo->Y_density;
+      this->densityUnits = cinfo->density_unit;
+    }
+  }
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo);
+
+  *width = cinfo->image_width;  *height = cinfo->image_height;
+  *pixelFormat = cs2pf[cinfo->in_color_space];
+
+  pitch = PAD((*width) * tjPixelSize[*pixelFormat], align);
+  if (
+#if ULLONG_MAX > SIZE_MAX
+      (unsigned long long)pitch * (unsigned long long)(*height) >
+      (unsigned long long)((size_t)-1) ||
+#endif
+      (dstBuf = (_JSAMPLE *)malloc(pitch * (*height) *
+                                   sizeof(_JSAMPLE))) == NULL)
+    THROW("Memory allocation failure");
+
+  if (setjmp(this2->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  while (cinfo->next_scanline < cinfo->image_height) {
+    int i, nlines = (*src->get_pixel_rows) (cinfo, src);
+
+    for (i = 0; i < nlines; i++) {
+      _JSAMPLE *dstptr;
+      int row;
+
+      row = cinfo->next_scanline + i;
+      if (invert) dstptr = &dstBuf[((*height) - row - 1) * pitch];
+      else dstptr = &dstBuf[row * pitch];
+      memcpy(dstptr, src->_buffer[i],
+             (*width) * tjPixelSize[*pixelFormat] * sizeof(_JSAMPLE));
+    }
+    cinfo->next_scanline += nlines;
+  }
+
+  (*src->finish_input) (cinfo, src);
+
+bailout:
+  tj3Destroy(handle2);
+  if (file) fclose(file);
+  if (retval < 0) { free(dstBuf);  dstBuf = NULL; }
+  return dstBuf;
+
+#else /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
+
+  static const char ERROR_MSG[] =
+    "16-bit data precision requires lossless JPEG,\n"
+    "which was disabled at build time.";
+  _JSAMPLE *retval = NULL;
+
+  GET_TJINSTANCE(handle, NULL)
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "%s(): %s", FUNCTION_NAME,
+           ERROR_MSG);
+  this->isInstanceError = TRUE;  THROWG(ERROR_MSG, NULL)
+
+bailout:
+  return retval;
+
+#endif
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int GET_NAME(tj3SaveImage, BITS_IN_JSAMPLE)
+  (tjhandle handle, const char *filename, const _JSAMPLE *buffer, int width,
+   int pitch, int height, int pixelFormat)
+{
+  static const char FUNCTION_NAME[] =
+    GET_STRING(tj3SaveImage, BITS_IN_JSAMPLE);
+  int retval = 0;
+
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
+  tjhandle handle2 = NULL;
+  tjinstance *this2;
+  j_decompress_ptr dinfo = NULL;
+  djpeg_dest_ptr dst;
+  FILE *file = NULL;
+  char *ptr = NULL;
+  boolean invert;
+
+  GET_TJINSTANCE(handle, -1)
+
+  if (!filename || !buffer || width < 1 || pitch < 0 || height < 1 ||
+      pixelFormat < 0 || pixelFormat >= TJ_NUMPF)
+    THROW("Invalid argument");
+
+  /* The instance handle passed to this function is used only for parameter
+     retrieval.  Create a new temporary instance to avoid interfering with the
+     libjpeg state of the primary instance. */
+  if ((handle2 = tj3Init(TJINIT_DECOMPRESS)) == NULL)
+    return -1;
+  this2 = (tjinstance *)handle2;
+  dinfo = &this2->dinfo;
+
+#ifdef _MSC_VER
+  if (fopen_s(&file, filename, "wb") || file == NULL)
+#else
+  if ((file = fopen(filename, "wb")) == NULL)
+#endif
+    THROW_UNIX("Cannot open output file");
+
+  if (setjmp(this2->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  this2->dinfo.out_color_space = pf2cs[pixelFormat];
+  dinfo->image_width = width;  dinfo->image_height = height;
+  dinfo->global_state = DSTATE_READY;
+  dinfo->scale_num = dinfo->scale_denom = 1;
+  dinfo->data_precision = BITS_IN_JSAMPLE;
+
+  ptr = strrchr(filename, '.');
+  if (ptr && !strcasecmp(ptr, ".bmp")) {
+    if ((dst = jinit_write_bmp(dinfo, FALSE, FALSE)) == NULL)
+      THROW("Could not initialize bitmap writer");
+    invert = !this->bottomUp;
+    dinfo->X_density = (UINT16)this->xDensity;
+    dinfo->Y_density = (UINT16)this->yDensity;
+    dinfo->density_unit = (UINT8)this->densityUnits;
+  } else {
+    if ((dst = _jinit_write_ppm(dinfo)) == NULL)
+      THROW("Could not initialize PPM writer");
+    invert = this->bottomUp;
+  }
+
+  dinfo->mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+
+  dst->output_file = file;
+  (*dst->start_output) (dinfo, dst);
+  (*dinfo->mem->realize_virt_arrays) ((j_common_ptr)dinfo);
+
+  if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
+
+  while (dinfo->output_scanline < dinfo->output_height) {
+    _JSAMPLE *rowptr;
+
+    if (invert)
+      rowptr =
+        (_JSAMPLE *)&buffer[(height - dinfo->output_scanline - 1) * pitch];
+    else
+      rowptr = (_JSAMPLE *)&buffer[dinfo->output_scanline * pitch];
+    memcpy(dst->_buffer[0], rowptr,
+           width * tjPixelSize[pixelFormat] * sizeof(_JSAMPLE));
+    (*dst->put_pixel_rows) (dinfo, dst, 1);
+    dinfo->output_scanline++;
+  }
+
+  (*dst->finish_output) (dinfo, dst);
+
+bailout:
+  tj3Destroy(handle2);
+  if (file) fclose(file);
+  return retval;
+
+#else /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
+
+  GET_TJINSTANCE(handle, -1)
+  THROW("16-bit data precision requires lossless JPEG,\n"
+        "which was disabled at build time.")
+bailout:
+  return retval;
+
+#endif
+}
+
+
+#undef _JSAMPLE
+#undef _JSAMPROW
+#undef _buffer
+#undef _jinit_read_ppm
+#undef _jinit_write_ppm
+#undef _jpeg_crop_scanline
+#undef _jpeg_read_scanlines
+#undef _jpeg_skip_scanlines
+#undef _jpeg_write_scanlines
diff --git a/3rdparty/libjpeg-turbo/src/turbojpeg.c b/3rdparty/libjpeg-turbo/src/turbojpeg.c
new file mode 100644
index 0000000000..3c936160df
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/turbojpeg.c
@@ -0,0 +1,2921 @@
+/*
+ * Copyright (C)2009-2024 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2021 Alex Richardson.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* TurboJPEG/LJT:  this implements the TurboJPEG API using libjpeg or
+   libjpeg-turbo */
+
+#include <ctype.h>
+#include <limits.h>
+#if !defined(_MSC_VER) || _MSC_VER > 1600
+#include <stdint.h>
+#endif
+#include <jinclude.h>
+#define JPEG_INTERNALS
+#include <jpeglib.h>
+#include <jerror.h>
+#include <setjmp.h>
+#include <errno.h>
+#include "./turbojpeg.h"
+#include "./tjutil.h"
+#include "transupp.h"
+#include "./jpegapicomp.h"
+#include "./cdjpeg.h"
+
+extern void jpeg_mem_dest_tj(j_compress_ptr, unsigned char **, size_t *,
+                             boolean);
+extern void jpeg_mem_src_tj(j_decompress_ptr, const unsigned char *, size_t);
+
+#define PAD(v, p)  ((v + (p) - 1) & (~((p) - 1)))
+#define IS_POW2(x)  (((x) & (x - 1)) == 0)
+
+
+/* Error handling (based on example in example.c) */
+
+static THREAD_LOCAL char errStr[JMSG_LENGTH_MAX] = "No error";
+
+struct my_error_mgr {
+  struct jpeg_error_mgr pub;
+  jmp_buf setjmp_buffer;
+  void (*emit_message) (j_common_ptr, int);
+  boolean warning, stopOnWarning;
+};
+typedef struct my_error_mgr *my_error_ptr;
+
+#define JMESSAGE(code, string)  string,
+static const char *turbojpeg_message_table[] = {
+#include "cderror.h"
+  NULL
+};
+
+static void my_error_exit(j_common_ptr cinfo)
+{
+  my_error_ptr myerr = (my_error_ptr)cinfo->err;
+
+  (*cinfo->err->output_message) (cinfo);
+  longjmp(myerr->setjmp_buffer, 1);
+}
+
+/* Based on output_message() in jerror.c */
+
+static void my_output_message(j_common_ptr cinfo)
+{
+  (*cinfo->err->format_message) (cinfo, errStr);
+}
+
+static void my_emit_message(j_common_ptr cinfo, int msg_level)
+{
+  my_error_ptr myerr = (my_error_ptr)cinfo->err;
+
+  myerr->emit_message(cinfo, msg_level);
+  if (msg_level < 0) {
+    myerr->warning = TRUE;
+    if (myerr->stopOnWarning) longjmp(myerr->setjmp_buffer, 1);
+  }
+}
+
+
+/********************** Global structures, macros, etc. **********************/
+
+enum { COMPRESS = 1, DECOMPRESS = 2 };
+
+typedef struct _tjinstance {
+  struct jpeg_compress_struct cinfo;
+  struct jpeg_decompress_struct dinfo;
+  struct my_error_mgr jerr;
+  int init;
+  char errStr[JMSG_LENGTH_MAX];
+  boolean isInstanceError;
+  /* Parameters */
+  boolean bottomUp;
+  boolean noRealloc;
+  int quality;
+  int subsamp;
+  int jpegWidth;
+  int jpegHeight;
+  int precision;
+  int colorspace;
+  boolean fastUpsample;
+  boolean fastDCT;
+  boolean optimize;
+  boolean progressive;
+  int scanLimit;
+  boolean arithmetic;
+  boolean lossless;
+  int losslessPSV;
+  int losslessPt;
+  int restartIntervalBlocks;
+  int restartIntervalRows;
+  int xDensity;
+  int yDensity;
+  int densityUnits;
+  tjscalingfactor scalingFactor;
+  tjregion croppingRegion;
+  int maxMemory;
+  int maxPixels;
+} tjinstance;
+
+static tjhandle _tjInitCompress(tjinstance *this);
+static tjhandle _tjInitDecompress(tjinstance *this);
+
+struct my_progress_mgr {
+  struct jpeg_progress_mgr pub;
+  tjinstance *this;
+};
+typedef struct my_progress_mgr *my_progress_ptr;
+
+static void my_progress_monitor(j_common_ptr dinfo)
+{
+  my_error_ptr myerr = (my_error_ptr)dinfo->err;
+  my_progress_ptr myprog = (my_progress_ptr)dinfo->progress;
+
+  if (dinfo->is_decompressor) {
+    int scan_no = ((j_decompress_ptr)dinfo)->input_scan_number;
+
+    if (scan_no > myprog->this->scanLimit) {
+      SNPRINTF(myprog->this->errStr, JMSG_LENGTH_MAX,
+               "Progressive JPEG image has more than %d scans",
+               myprog->this->scanLimit);
+      SNPRINTF(errStr, JMSG_LENGTH_MAX,
+               "Progressive JPEG image has more than %d scans",
+               myprog->this->scanLimit);
+      myprog->this->isInstanceError = TRUE;
+      myerr->warning = FALSE;
+      longjmp(myerr->setjmp_buffer, 1);
+    }
+  }
+}
+
+static const JXFORM_CODE xformtypes[TJ_NUMXOP] = {
+  JXFORM_NONE, JXFORM_FLIP_H, JXFORM_FLIP_V, JXFORM_TRANSPOSE,
+  JXFORM_TRANSVERSE, JXFORM_ROT_90, JXFORM_ROT_180, JXFORM_ROT_270
+};
+
+#define NUMSF  16
+static const tjscalingfactor sf[NUMSF] = {
+  { 2, 1 },
+  { 15, 8 },
+  { 7, 4 },
+  { 13, 8 },
+  { 3, 2 },
+  { 11, 8 },
+  { 5, 4 },
+  { 9, 8 },
+  { 1, 1 },
+  { 7, 8 },
+  { 3, 4 },
+  { 5, 8 },
+  { 1, 2 },
+  { 3, 8 },
+  { 1, 4 },
+  { 1, 8 }
+};
+
+static J_COLOR_SPACE pf2cs[TJ_NUMPF] = {
+  JCS_EXT_RGB, JCS_EXT_BGR, JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR,
+  JCS_EXT_XRGB, JCS_GRAYSCALE, JCS_EXT_RGBA, JCS_EXT_BGRA, JCS_EXT_ABGR,
+  JCS_EXT_ARGB, JCS_CMYK
+};
+
+static int cs2pf[JPEG_NUMCS] = {
+  TJPF_UNKNOWN, TJPF_GRAY,
+#if RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 3
+  TJPF_RGB,
+#elif RGB_RED == 2 && RGB_GREEN == 1 && RGB_BLUE == 0 && RGB_PIXELSIZE == 3
+  TJPF_BGR,
+#elif RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 4
+  TJPF_RGBX,
+#elif RGB_RED == 2 && RGB_GREEN == 1 && RGB_BLUE == 0 && RGB_PIXELSIZE == 4
+  TJPF_BGRX,
+#elif RGB_RED == 3 && RGB_GREEN == 2 && RGB_BLUE == 1 && RGB_PIXELSIZE == 4
+  TJPF_XBGR,
+#elif RGB_RED == 1 && RGB_GREEN == 2 && RGB_BLUE == 3 && RGB_PIXELSIZE == 4
+  TJPF_XRGB,
+#endif
+  TJPF_UNKNOWN, TJPF_CMYK, TJPF_UNKNOWN, TJPF_RGB, TJPF_RGBX, TJPF_BGR,
+  TJPF_BGRX, TJPF_XBGR, TJPF_XRGB, TJPF_RGBA, TJPF_BGRA, TJPF_ABGR, TJPF_ARGB,
+  TJPF_UNKNOWN
+};
+
+#define THROWG(m, rv) { \
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): %s", FUNCTION_NAME, m); \
+  retval = rv;  goto bailout; \
+}
+#ifdef _MSC_VER
+#define THROW_UNIX(m) { \
+  char strerrorBuf[80] = { 0 }; \
+  strerror_s(strerrorBuf, 80, errno); \
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "%s(): %s\n%s", FUNCTION_NAME, m, \
+           strerrorBuf); \
+  this->isInstanceError = TRUE; \
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): %s\n%s", FUNCTION_NAME, m, \
+           strerrorBuf); \
+  retval = -1;  goto bailout; \
+}
+#else
+#define THROW_UNIX(m) { \
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "%s(): %s\n%s", FUNCTION_NAME, m, \
+           strerror(errno)); \
+  this->isInstanceError = TRUE; \
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): %s\n%s", FUNCTION_NAME, m, \
+           strerror(errno)); \
+  retval = -1;  goto bailout; \
+}
+#endif
+#define THROW(m) { \
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "%s(): %s", FUNCTION_NAME, m); \
+  this->isInstanceError = TRUE;  THROWG(m, -1) \
+}
+#define THROWI(format, val1, val2) { \
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "%s(): " format, FUNCTION_NAME, \
+           val1, val2); \
+  this->isInstanceError = TRUE; \
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): " format, FUNCTION_NAME, val1, \
+           val2); \
+  retval = -1;  goto bailout; \
+}
+
+#define GET_INSTANCE(handle) \
+  tjinstance *this = (tjinstance *)handle; \
+  j_compress_ptr cinfo = NULL; \
+  j_decompress_ptr dinfo = NULL; \
+  \
+  if (!this) { \
+    SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): Invalid handle", FUNCTION_NAME); \
+    return -1; \
+  } \
+  cinfo = &this->cinfo;  dinfo = &this->dinfo; \
+  this->jerr.warning = FALSE; \
+  this->isInstanceError = FALSE;
+
+#define GET_CINSTANCE(handle) \
+  tjinstance *this = (tjinstance *)handle; \
+  j_compress_ptr cinfo = NULL; \
+  \
+  if (!this) { \
+    SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): Invalid handle", FUNCTION_NAME); \
+    return -1; \
+  } \
+  cinfo = &this->cinfo; \
+  this->jerr.warning = FALSE; \
+  this->isInstanceError = FALSE;
+
+#define GET_DINSTANCE(handle) \
+  tjinstance *this = (tjinstance *)handle; \
+  j_decompress_ptr dinfo = NULL; \
+  \
+  if (!this) { \
+    SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): Invalid handle", FUNCTION_NAME); \
+    return -1; \
+  } \
+  dinfo = &this->dinfo; \
+  this->jerr.warning = FALSE; \
+  this->isInstanceError = FALSE;
+
+#define GET_TJINSTANCE(handle, errorReturn) \
+  tjinstance *this = (tjinstance *)handle; \
+  \
+  if (!this) { \
+    SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): Invalid handle", FUNCTION_NAME); \
+    return errorReturn; \
+  } \
+  this->jerr.warning = FALSE; \
+  this->isInstanceError = FALSE;
+
+static int getPixelFormat(int pixelSize, int flags)
+{
+  if (pixelSize == 1) return TJPF_GRAY;
+  if (pixelSize == 3) {
+    if (flags & TJ_BGR) return TJPF_BGR;
+    else return TJPF_RGB;
+  }
+  if (pixelSize == 4) {
+    if (flags & TJ_ALPHAFIRST) {
+      if (flags & TJ_BGR) return TJPF_XBGR;
+      else return TJPF_XRGB;
+    } else {
+      if (flags & TJ_BGR) return TJPF_BGRX;
+      else return TJPF_RGBX;
+    }
+  }
+  return -1;
+}
+
+static void setCompDefaults(tjinstance *this, int pixelFormat)
+{
+  this->cinfo.in_color_space = pf2cs[pixelFormat];
+  this->cinfo.input_components = tjPixelSize[pixelFormat];
+  jpeg_set_defaults(&this->cinfo);
+
+  this->cinfo.restart_interval = this->restartIntervalBlocks;
+  this->cinfo.restart_in_rows = this->restartIntervalRows;
+  this->cinfo.X_density = (UINT16)this->xDensity;
+  this->cinfo.Y_density = (UINT16)this->yDensity;
+  this->cinfo.density_unit = (UINT8)this->densityUnits;
+  this->cinfo.mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+
+  if (this->lossless) {
+#ifdef C_LOSSLESS_SUPPORTED
+    jpeg_enable_lossless(&this->cinfo, this->losslessPSV, this->losslessPt);
+#endif
+    if (pixelFormat == TJPF_GRAY)
+      this->subsamp = TJSAMP_GRAY;
+    else if (this->subsamp != TJSAMP_GRAY)
+      this->subsamp = TJSAMP_444;
+    return;
+  }
+
+  jpeg_set_quality(&this->cinfo, this->quality, TRUE);
+  this->cinfo.dct_method = this->fastDCT ? JDCT_FASTEST : JDCT_ISLOW;
+
+  switch (this->colorspace) {
+  case TJCS_RGB:
+    jpeg_set_colorspace(&this->cinfo, JCS_RGB);  break;
+  case TJCS_YCbCr:
+    jpeg_set_colorspace(&this->cinfo, JCS_YCbCr);  break;
+  case TJCS_GRAY:
+    jpeg_set_colorspace(&this->cinfo, JCS_GRAYSCALE);  break;
+  case TJCS_CMYK:
+    jpeg_set_colorspace(&this->cinfo, JCS_CMYK);  break;
+  case TJCS_YCCK:
+    jpeg_set_colorspace(&this->cinfo, JCS_YCCK);  break;
+  default:
+    if (this->subsamp == TJSAMP_GRAY)
+      jpeg_set_colorspace(&this->cinfo, JCS_GRAYSCALE);
+    else if (pixelFormat == TJPF_CMYK)
+      jpeg_set_colorspace(&this->cinfo, JCS_YCCK);
+    else
+      jpeg_set_colorspace(&this->cinfo, JCS_YCbCr);
+  }
+
+  if (this->cinfo.data_precision == 8)
+    this->cinfo.optimize_coding = this->optimize;
+#ifdef C_PROGRESSIVE_SUPPORTED
+  if (this->progressive) jpeg_simple_progression(&this->cinfo);
+#endif
+  this->cinfo.arith_code = this->arithmetic;
+
+  this->cinfo.comp_info[0].h_samp_factor = tjMCUWidth[this->subsamp] / 8;
+  this->cinfo.comp_info[1].h_samp_factor = 1;
+  this->cinfo.comp_info[2].h_samp_factor = 1;
+  if (this->cinfo.num_components > 3)
+    this->cinfo.comp_info[3].h_samp_factor = tjMCUWidth[this->subsamp] / 8;
+  this->cinfo.comp_info[0].v_samp_factor = tjMCUHeight[this->subsamp] / 8;
+  this->cinfo.comp_info[1].v_samp_factor = 1;
+  this->cinfo.comp_info[2].v_samp_factor = 1;
+  if (this->cinfo.num_components > 3)
+    this->cinfo.comp_info[3].v_samp_factor = tjMCUHeight[this->subsamp] / 8;
+}
+
+
+static int getSubsamp(j_decompress_ptr dinfo)
+{
+  int retval = TJSAMP_UNKNOWN, i, k;
+
+  /* The sampling factors actually have no meaning with grayscale JPEG files,
+     and in fact it's possible to generate grayscale JPEGs with sampling
+     factors > 1 (even though those sampling factors are ignored by the
+     decompressor.)  Thus, we need to treat grayscale as a special case. */
+  if (dinfo->num_components == 1 && dinfo->jpeg_color_space == JCS_GRAYSCALE)
+    return TJSAMP_GRAY;
+
+  for (i = 0; i < TJ_NUMSAMP; i++) {
+    if (i == TJSAMP_GRAY) continue;
+
+    if (dinfo->num_components == 3 ||
+        ((dinfo->jpeg_color_space == JCS_YCCK ||
+          dinfo->jpeg_color_space == JCS_CMYK) &&
+         dinfo->num_components == 4)) {
+      if (dinfo->comp_info[0].h_samp_factor == tjMCUWidth[i] / 8 &&
+          dinfo->comp_info[0].v_samp_factor == tjMCUHeight[i] / 8) {
+        int match = 0;
+
+        for (k = 1; k < dinfo->num_components; k++) {
+          int href = 1, vref = 1;
+
+          if ((dinfo->jpeg_color_space == JCS_YCCK ||
+               dinfo->jpeg_color_space == JCS_CMYK) && k == 3) {
+            href = tjMCUWidth[i] / 8;  vref = tjMCUHeight[i] / 8;
+          }
+          if (dinfo->comp_info[k].h_samp_factor == href &&
+              dinfo->comp_info[k].v_samp_factor == vref)
+            match++;
+        }
+        if (match == dinfo->num_components - 1) {
+          retval = i;  break;
+        }
+      }
+      /* Handle 4:2:2 and 4:4:0 images whose sampling factors are specified
+         in non-standard ways. */
+      if (dinfo->comp_info[0].h_samp_factor == 2 &&
+          dinfo->comp_info[0].v_samp_factor == 2 &&
+          (i == TJSAMP_422 || i == TJSAMP_440)) {
+        int match = 0;
+
+        for (k = 1; k < dinfo->num_components; k++) {
+          int href = tjMCUHeight[i] / 8, vref = tjMCUWidth[i] / 8;
+
+          if ((dinfo->jpeg_color_space == JCS_YCCK ||
+               dinfo->jpeg_color_space == JCS_CMYK) && k == 3) {
+            href = vref = 2;
+          }
+          if (dinfo->comp_info[k].h_samp_factor == href &&
+              dinfo->comp_info[k].v_samp_factor == vref)
+            match++;
+        }
+        if (match == dinfo->num_components - 1) {
+          retval = i;  break;
+        }
+      }
+      /* Handle 4:4:4 images whose sampling factors are specified in
+         non-standard ways. */
+      if (dinfo->comp_info[0].h_samp_factor *
+          dinfo->comp_info[0].v_samp_factor <=
+          D_MAX_BLOCKS_IN_MCU / 3 && i == TJSAMP_444) {
+        int match = 0;
+        for (k = 1; k < dinfo->num_components; k++) {
+          if (dinfo->comp_info[k].h_samp_factor ==
+              dinfo->comp_info[0].h_samp_factor &&
+              dinfo->comp_info[k].v_samp_factor ==
+              dinfo->comp_info[0].v_samp_factor)
+            match++;
+          if (match == dinfo->num_components - 1) {
+            retval = i;  break;
+          }
+        }
+      }
+    }
+  }
+  return retval;
+}
+
+
+static void setDecompParameters(tjinstance *this)
+{
+  this->subsamp = getSubsamp(&this->dinfo);
+  this->jpegWidth = this->dinfo.image_width;
+  this->jpegHeight = this->dinfo.image_height;
+  this->precision = this->dinfo.data_precision;
+  switch (this->dinfo.jpeg_color_space) {
+  case JCS_GRAYSCALE:  this->colorspace = TJCS_GRAY;  break;
+  case JCS_RGB:        this->colorspace = TJCS_RGB;  break;
+  case JCS_YCbCr:      this->colorspace = TJCS_YCbCr;  break;
+  case JCS_CMYK:       this->colorspace = TJCS_CMYK;  break;
+  case JCS_YCCK:       this->colorspace = TJCS_YCCK;  break;
+  default:             this->colorspace = -1;  break;
+  }
+  this->progressive = this->dinfo.progressive_mode;
+  this->arithmetic = this->dinfo.arith_code;
+  this->lossless = this->dinfo.master->lossless;
+  this->losslessPSV = this->dinfo.Ss;
+  this->losslessPt = this->dinfo.Al;
+  this->xDensity = this->dinfo.X_density;
+  this->yDensity = this->dinfo.Y_density;
+  this->densityUnits = this->dinfo.density_unit;
+}
+
+
+static void processFlags(tjhandle handle, int flags, int operation)
+{
+  tjinstance *this = (tjinstance *)handle;
+
+  this->bottomUp = !!(flags & TJFLAG_BOTTOMUP);
+
+#ifndef NO_PUTENV
+  if (flags & TJFLAG_FORCEMMX) PUTENV_S("JSIMD_FORCEMMX", "1");
+  else if (flags & TJFLAG_FORCESSE) PUTENV_S("JSIMD_FORCESSE", "1");
+  else if (flags & TJFLAG_FORCESSE2) PUTENV_S("JSIMD_FORCESSE2", "1");
+#endif
+
+  this->fastUpsample = !!(flags & TJFLAG_FASTUPSAMPLE);
+  this->noRealloc = !!(flags & TJFLAG_NOREALLOC);
+
+  if (operation == COMPRESS) {
+    if (this->quality >= 96 || flags & TJFLAG_ACCURATEDCT)
+      this->fastDCT = FALSE;
+    else
+      this->fastDCT = TRUE;
+  } else
+    this->fastDCT = !!(flags & TJFLAG_FASTDCT);
+
+  this->jerr.stopOnWarning = !!(flags & TJFLAG_STOPONWARNING);
+  this->progressive = !!(flags & TJFLAG_PROGRESSIVE);
+
+  if (flags & TJFLAG_LIMITSCANS) this->scanLimit = 500;
+}
+
+
+/*************************** General API functions ***************************/
+
+/* TurboJPEG 3+ */
+DLLEXPORT tjhandle tj3Init(int initType)
+{
+  static const char FUNCTION_NAME[] = "tj3Init";
+  tjinstance *this = NULL;
+  tjhandle retval = NULL;
+
+  if (initType < 0 || initType >= TJ_NUMINIT)
+    THROWG("Invalid argument", NULL);
+
+  if ((this = (tjinstance *)malloc(sizeof(tjinstance))) == NULL)
+    THROWG("Memory allocation failure", NULL);
+  memset(this, 0, sizeof(tjinstance));
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "No error");
+
+  this->quality = -1;
+  this->subsamp = TJSAMP_UNKNOWN;
+  this->jpegWidth = -1;
+  this->jpegHeight = -1;
+  this->precision = 8;
+  this->colorspace = -1;
+  this->losslessPSV = 1;
+  this->xDensity = 1;
+  this->yDensity = 1;
+  this->scalingFactor = TJUNSCALED;
+
+  switch (initType) {
+  case TJINIT_COMPRESS:  return _tjInitCompress(this);
+  case TJINIT_DECOMPRESS:  return _tjInitDecompress(this);
+  case TJINIT_TRANSFORM:
+    retval = _tjInitCompress(this);
+    if (!retval) return NULL;
+    retval = _tjInitDecompress(this);
+    return retval;
+  }
+
+bailout:
+  return retval;
+}
+
+
+#define SET_PARAM(field, minValue, maxValue) { \
+  if (value < minValue || (maxValue > 0 && value > maxValue)) \
+    THROW("Parameter value out of range"); \
+  this->field = value; \
+}
+
+#define SET_BOOL_PARAM(field) { \
+  if (value < 0 || value > 1) \
+    THROW("Parameter value out of range"); \
+  this->field = (boolean)value; \
+}
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3Set(tjhandle handle, int param, int value)
+{
+  static const char FUNCTION_NAME[] = "tj3Set";
+  int retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+
+  switch (param) {
+  case TJPARAM_STOPONWARNING:
+    SET_BOOL_PARAM(jerr.stopOnWarning);
+    break;
+  case TJPARAM_BOTTOMUP:
+    SET_BOOL_PARAM(bottomUp);
+    break;
+  case TJPARAM_NOREALLOC:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_NOREALLOC is not applicable to decompression instances.");
+    SET_BOOL_PARAM(noRealloc);
+    break;
+  case TJPARAM_QUALITY:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_QUALITY is not applicable to decompression instances.");
+    SET_PARAM(quality, 1, 100);
+    break;
+  case TJPARAM_SUBSAMP:
+    SET_PARAM(subsamp, 0, TJ_NUMSAMP - 1);
+    break;
+  case TJPARAM_JPEGWIDTH:
+    if (!(this->init & DECOMPRESS))
+      THROW("TJPARAM_JPEGWIDTH is not applicable to compression instances.");
+    THROW("TJPARAM_JPEGWIDTH is read-only in decompression instances.");
+    break;
+  case TJPARAM_JPEGHEIGHT:
+    if (!(this->init & DECOMPRESS))
+      THROW("TJPARAM_JPEGHEIGHT is not applicable to compression instances.");
+    THROW("TJPARAM_JPEGHEIGHT is read-only in decompression instances.");
+    break;
+  case TJPARAM_PRECISION:
+    if (!(this->init & DECOMPRESS))
+      THROW("TJPARAM_PRECISION is not applicable to compression instances.");
+    THROW("TJPARAM_PRECISION is read-only in decompression instances.");
+    break;
+  case TJPARAM_COLORSPACE:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_COLORSPACE is read-only in decompression instances.");
+    SET_PARAM(colorspace, 0, TJ_NUMCS - 1);
+    break;
+  case TJPARAM_FASTUPSAMPLE:
+    if (!(this->init & DECOMPRESS))
+      THROW("TJPARAM_FASTUPSAMPLE is not applicable to compression instances.");
+    SET_BOOL_PARAM(fastUpsample);
+    break;
+  case TJPARAM_FASTDCT:
+    SET_BOOL_PARAM(fastDCT);
+    break;
+  case TJPARAM_OPTIMIZE:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_OPTIMIZE is not applicable to decompression instances.");
+    SET_BOOL_PARAM(optimize);
+    break;
+  case TJPARAM_PROGRESSIVE:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_PROGRESSIVE is read-only in decompression instances.");
+    SET_BOOL_PARAM(progressive);
+    break;
+  case TJPARAM_SCANLIMIT:
+    if (!(this->init & DECOMPRESS))
+      THROW("TJPARAM_SCANLIMIT is not applicable to compression instances.");
+    SET_PARAM(scanLimit, 0, -1);
+    break;
+  case TJPARAM_ARITHMETIC:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_ARITHMETIC is read-only in decompression instances.");
+    SET_BOOL_PARAM(arithmetic);
+    break;
+  case TJPARAM_LOSSLESS:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_LOSSLESS is read-only in decompression instances.");
+    SET_BOOL_PARAM(lossless);
+    break;
+  case TJPARAM_LOSSLESSPSV:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_LOSSLESSPSV is read-only in decompression instances.");
+    SET_PARAM(losslessPSV, 1, 7);
+    break;
+  case TJPARAM_LOSSLESSPT:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_LOSSLESSPT is read-only in decompression instances.");
+    SET_PARAM(losslessPt, 0, this->precision - 1);
+    break;
+  case TJPARAM_RESTARTBLOCKS:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_RESTARTBLOCKS is not applicable to decompression instances.");
+    SET_PARAM(restartIntervalBlocks, 0, 65535);
+    if (value != 0) this->restartIntervalRows = 0;
+    break;
+  case TJPARAM_RESTARTROWS:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_RESTARTROWS is not applicable to decompression instances.");
+    SET_PARAM(restartIntervalRows, 0, 65535);
+    if (value != 0) this->restartIntervalBlocks = 0;
+    break;
+  case TJPARAM_XDENSITY:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_XDENSITY is read-only in decompression instances.");
+    SET_PARAM(xDensity, 1, 65535);
+    break;
+  case TJPARAM_YDENSITY:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_YDENSITY is read-only in decompression instances.");
+    SET_PARAM(yDensity, 1, 65535);
+    break;
+  case TJPARAM_DENSITYUNITS:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_DENSITYUNITS is read-only in decompression instances.");
+    SET_PARAM(densityUnits, 0, 2);
+    break;
+  case TJPARAM_MAXMEMORY:
+    SET_PARAM(maxMemory, 0, (int)(min(LONG_MAX / 1048576L, (long)INT_MAX)));
+    break;
+  case TJPARAM_MAXPIXELS:
+    SET_PARAM(maxPixels, 0, -1);
+    break;
+  default:
+    THROW("Invalid parameter");
+  }
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3Get(tjhandle handle, int param)
+{
+  tjinstance *this = (tjinstance *)handle;
+  if (!this) return -1;
+
+  switch (param) {
+  case TJPARAM_STOPONWARNING:
+    return this->jerr.stopOnWarning;
+  case TJPARAM_BOTTOMUP:
+    return this->bottomUp;
+  case TJPARAM_NOREALLOC:
+    return this->noRealloc;
+  case TJPARAM_QUALITY:
+    return this->quality;
+  case TJPARAM_SUBSAMP:
+    return this->subsamp;
+  case TJPARAM_JPEGWIDTH:
+    return this->jpegWidth;
+  case TJPARAM_JPEGHEIGHT:
+    return this->jpegHeight;
+  case TJPARAM_PRECISION:
+    return this->precision;
+  case TJPARAM_COLORSPACE:
+    return this->colorspace;
+  case TJPARAM_FASTUPSAMPLE:
+    return this->fastUpsample;
+  case TJPARAM_FASTDCT:
+    return this->fastDCT;
+  case TJPARAM_OPTIMIZE:
+    return this->optimize;
+  case TJPARAM_PROGRESSIVE:
+    return this->progressive;
+  case TJPARAM_SCANLIMIT:
+    return this->scanLimit;
+  case TJPARAM_ARITHMETIC:
+    return this->arithmetic;
+  case TJPARAM_LOSSLESS:
+    return this->lossless;
+  case TJPARAM_LOSSLESSPSV:
+    return this->losslessPSV;
+  case TJPARAM_LOSSLESSPT:
+    return this->losslessPt;
+  case TJPARAM_RESTARTBLOCKS:
+    return this->restartIntervalBlocks;
+  case TJPARAM_RESTARTROWS:
+    return this->restartIntervalRows;
+  case TJPARAM_XDENSITY:
+    return this->xDensity;
+  case TJPARAM_YDENSITY:
+    return this->yDensity;
+  case TJPARAM_DENSITYUNITS:
+    return this->densityUnits;
+  case TJPARAM_MAXMEMORY:
+    return this->maxMemory;
+  case TJPARAM_MAXPIXELS:
+    return this->maxPixels;
+  }
+
+  return -1;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT char *tj3GetErrorStr(tjhandle handle)
+{
+  tjinstance *this = (tjinstance *)handle;
+
+  if (this && this->isInstanceError) {
+    this->isInstanceError = FALSE;
+    return this->errStr;
+  } else
+    return errStr;
+}
+
+/* TurboJPEG 2.0+ */
+DLLEXPORT char *tjGetErrorStr2(tjhandle handle)
+{
+  return tj3GetErrorStr(handle);
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT char *tjGetErrorStr(void)
+{
+  return errStr;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3GetErrorCode(tjhandle handle)
+{
+  tjinstance *this = (tjinstance *)handle;
+
+  if (this && this->jerr.warning) return TJERR_WARNING;
+  else return TJERR_FATAL;
+}
+
+/* TurboJPEG 2.0+ */
+DLLEXPORT int tjGetErrorCode(tjhandle handle)
+{
+  return tj3GetErrorCode(handle);
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT void tj3Destroy(tjhandle handle)
+{
+  tjinstance *this = (tjinstance *)handle;
+  j_compress_ptr cinfo = NULL;
+  j_decompress_ptr dinfo = NULL;
+
+  if (!this) return;
+
+  cinfo = &this->cinfo;  dinfo = &this->dinfo;
+  this->jerr.warning = FALSE;
+  this->isInstanceError = FALSE;
+
+  if (setjmp(this->jerr.setjmp_buffer)) return;
+  if (this->init & COMPRESS) jpeg_destroy_compress(cinfo);
+  if (this->init & DECOMPRESS) jpeg_destroy_decompress(dinfo);
+  free(this);
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT int tjDestroy(tjhandle handle)
+{
+  static const char FUNCTION_NAME[] = "tjDestroy";
+  int retval = 0;
+
+  if (!handle) THROWG("Invalid handle", -1);
+
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "No error");
+  tj3Destroy(handle);
+  if (strcmp(errStr, "No error")) retval = -1;
+
+bailout:
+  return retval;
+}
+
+
+/* These are exposed mainly because Windows can't malloc() and free() across
+   DLL boundaries except when the CRT DLL is used, and we don't use the CRT DLL
+   with turbojpeg.dll for compatibility reasons.  However, these functions
+   can potentially be used for other purposes by different implementations. */
+
+/* TurboJPEG 3+ */
+DLLEXPORT void tj3Free(void *buf)
+{
+  free(buf);
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT void tjFree(unsigned char *buf)
+{
+  tj3Free(buf);
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT void *tj3Alloc(size_t bytes)
+{
+  return malloc(bytes);
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT unsigned char *tjAlloc(int bytes)
+{
+  return (unsigned char *)tj3Alloc((size_t)bytes);
+}
+
+
+/******************************** Compressor *********************************/
+
+static tjhandle _tjInitCompress(tjinstance *this)
+{
+  static unsigned char buffer[1];
+  unsigned char *buf = buffer;
+  size_t size = 1;
+
+  /* This is also straight out of example.c */
+  this->cinfo.err = jpeg_std_error(&this->jerr.pub);
+  this->jerr.pub.error_exit = my_error_exit;
+  this->jerr.pub.output_message = my_output_message;
+  this->jerr.emit_message = this->jerr.pub.emit_message;
+  this->jerr.pub.emit_message = my_emit_message;
+  this->jerr.pub.addon_message_table = turbojpeg_message_table;
+  this->jerr.pub.first_addon_message = JMSG_FIRSTADDONCODE;
+  this->jerr.pub.last_addon_message = JMSG_LASTADDONCODE;
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    free(this);
+    return NULL;
+  }
+
+  jpeg_create_compress(&this->cinfo);
+  /* Make an initial call so it will create the destination manager */
+  jpeg_mem_dest_tj(&this->cinfo, &buf, &size, 0);
+
+  this->init |= COMPRESS;
+  return (tjhandle)this;
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT tjhandle tjInitCompress(void)
+{
+  return tj3Init(TJINIT_COMPRESS);
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT size_t tj3JPEGBufSize(int width, int height, int jpegSubsamp)
+{
+  static const char FUNCTION_NAME[] = "tj3JPEGBufSize";
+  unsigned long long retval = 0;
+  int mcuw, mcuh, chromasf;
+
+  if (width < 1 || height < 1 || jpegSubsamp < TJSAMP_UNKNOWN ||
+      jpegSubsamp >= TJ_NUMSAMP)
+    THROWG("Invalid argument", 0);
+
+  if (jpegSubsamp == TJSAMP_UNKNOWN)
+    jpegSubsamp = TJSAMP_444;
+
+  /* This allows for rare corner cases in which a JPEG image can actually be
+     larger than the uncompressed input (we wouldn't mention it if it hadn't
+     happened before.) */
+  mcuw = tjMCUWidth[jpegSubsamp];
+  mcuh = tjMCUHeight[jpegSubsamp];
+  chromasf = jpegSubsamp == TJSAMP_GRAY ? 0 : 4 * 64 / (mcuw * mcuh);
+  retval = PAD(width, mcuw) * PAD(height, mcuh) * (2ULL + chromasf) + 2048ULL;
+#if ULLONG_MAX > ULONG_MAX
+  if (retval > (unsigned long long)((unsigned long)-1))
+    THROWG("Image is too large", 0);
+#endif
+
+bailout:
+  return (size_t)retval;
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT unsigned long tjBufSize(int width, int height, int jpegSubsamp)
+{
+  static const char FUNCTION_NAME[] = "tjBufSize";
+  size_t retval;
+
+  if (jpegSubsamp < 0)
+    THROWG("Invalid argument", 0);
+
+  retval = tj3JPEGBufSize(width, height, jpegSubsamp);
+
+bailout:
+  return (retval == 0) ? (unsigned long)-1 : (unsigned long)retval;
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT unsigned long TJBUFSIZE(int width, int height)
+{
+  static const char FUNCTION_NAME[] = "TJBUFSIZE";
+  unsigned long long retval = 0;
+
+  if (width < 1 || height < 1)
+    THROWG("Invalid argument", (unsigned long)-1);
+
+  /* This allows for rare corner cases in which a JPEG image can actually be
+     larger than the uncompressed input (we wouldn't mention it if it hadn't
+     happened before.) */
+  retval = PAD(width, 16) * PAD(height, 16) * 6ULL + 2048ULL;
+#if ULLONG_MAX > ULONG_MAX
+  if (retval > (unsigned long long)((unsigned long)-1))
+    THROWG("Image is too large", (unsigned long)-1);
+#endif
+
+bailout:
+  return (unsigned long)retval;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT size_t tj3YUVBufSize(int width, int align, int height, int subsamp)
+{
+  static const char FUNCTION_NAME[] = "tj3YUVBufSize";
+  unsigned long long retval = 0;
+  int nc, i;
+
+  if (align < 1 || !IS_POW2(align) || subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROWG("Invalid argument", 0);
+
+  nc = (subsamp == TJSAMP_GRAY ? 1 : 3);
+  for (i = 0; i < nc; i++) {
+    int pw = tj3YUVPlaneWidth(i, width, subsamp);
+    int stride = PAD(pw, align);
+    int ph = tj3YUVPlaneHeight(i, height, subsamp);
+
+    if (pw == 0 || ph == 0) return 0;
+    else retval += (unsigned long long)stride * ph;
+  }
+#if ULLONG_MAX > ULONG_MAX
+  if (retval > (unsigned long long)((unsigned long)-1))
+    THROWG("Image is too large", 0);
+#endif
+
+bailout:
+  return (size_t)retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT unsigned long tjBufSizeYUV2(int width, int align, int height,
+                                      int subsamp)
+{
+  size_t retval = tj3YUVBufSize(width, align, height, subsamp);
+  return (retval == 0) ? (unsigned long)-1 : (unsigned long)retval;
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp)
+{
+  return tjBufSizeYUV2(width, 4, height, subsamp);
+}
+
+/* TurboJPEG 1.1+ */
+DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int subsamp)
+{
+  return tjBufSizeYUV(width, height, subsamp);
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3YUVPlaneWidth(int componentID, int width, int subsamp)
+{
+  static const char FUNCTION_NAME[] = "tj3YUVPlaneWidth";
+  unsigned long long pw, retval = 0;
+  int nc;
+
+  if (width < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROWG("Invalid argument", 0);
+  nc = (subsamp == TJSAMP_GRAY ? 1 : 3);
+  if (componentID < 0 || componentID >= nc)
+    THROWG("Invalid argument", 0);
+
+  pw = PAD((unsigned long long)width, tjMCUWidth[subsamp] / 8);
+  if (componentID == 0)
+    retval = pw;
+  else
+    retval = pw * 8 / tjMCUWidth[subsamp];
+
+  if (retval > (unsigned long long)INT_MAX)
+    THROWG("Width is too large", 0);
+
+bailout:
+  return (int)retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjPlaneWidth(int componentID, int width, int subsamp)
+{
+  int retval = tj3YUVPlaneWidth(componentID, width, subsamp);
+  return (retval == 0) ? -1 : retval;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3YUVPlaneHeight(int componentID, int height, int subsamp)
+{
+  static const char FUNCTION_NAME[] = "tj3YUVPlaneHeight";
+  unsigned long long ph, retval = 0;
+  int nc;
+
+  if (height < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROWG("Invalid argument", 0);
+  nc = (subsamp == TJSAMP_GRAY ? 1 : 3);
+  if (componentID < 0 || componentID >= nc)
+    THROWG("Invalid argument", 0);
+
+  ph = PAD((unsigned long long)height, tjMCUHeight[subsamp] / 8);
+  if (componentID == 0)
+    retval = ph;
+  else
+    retval = ph * 8 / tjMCUHeight[subsamp];
+
+  if (retval > (unsigned long long)INT_MAX)
+    THROWG("Height is too large", 0);
+
+bailout:
+  return (int)retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp)
+{
+  int retval = tj3YUVPlaneHeight(componentID, height, subsamp);
+  return (retval == 0) ? -1 : retval;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT size_t tj3YUVPlaneSize(int componentID, int width, int stride,
+                                 int height, int subsamp)
+{
+  static const char FUNCTION_NAME[] = "tj3YUVPlaneSize";
+  unsigned long long retval = 0;
+  int pw, ph;
+
+  if (width < 1 || height < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROWG("Invalid argument", 0);
+
+  pw = tj3YUVPlaneWidth(componentID, width, subsamp);
+  ph = tj3YUVPlaneHeight(componentID, height, subsamp);
+  if (pw == 0 || ph == 0) return 0;
+
+  if (stride == 0) stride = pw;
+  else stride = abs(stride);
+
+  retval = (unsigned long long)stride * (ph - 1) + pw;
+#if ULLONG_MAX > ULONG_MAX
+  if (retval > (unsigned long long)((unsigned long)-1))
+    THROWG("Image is too large", 0);
+#endif
+
+bailout:
+  return (size_t)retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT unsigned long tjPlaneSizeYUV(int componentID, int width, int stride,
+                                       int height, int subsamp)
+{
+  size_t retval = tj3YUVPlaneSize(componentID, width, stride, height, subsamp);
+  return (retval == 0) ? -1 : (unsigned long)retval;
+}
+
+
+/* tj3Compress*() is implemented in turbojpeg-mp.c */
+#define BITS_IN_JSAMPLE  8
+#include "turbojpeg-mp.c"
+#undef BITS_IN_JSAMPLE
+#define BITS_IN_JSAMPLE  12
+#include "turbojpeg-mp.c"
+#undef BITS_IN_JSAMPLE
+#define BITS_IN_JSAMPLE  16
+#include "turbojpeg-mp.c"
+#undef BITS_IN_JSAMPLE
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf,
+                          int width, int pitch, int height, int pixelFormat,
+                          unsigned char **jpegBuf, unsigned long *jpegSize,
+                          int jpegSubsamp, int jpegQual, int flags)
+{
+  static const char FUNCTION_NAME[] = "tjCompress2";
+  int retval = 0;
+  size_t size;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (jpegSize == NULL || jpegSubsamp < 0 || jpegSubsamp >= TJ_NUMSAMP ||
+      jpegQual < 0 || jpegQual > 100)
+    THROW("Invalid argument");
+
+  this->quality = jpegQual;
+  this->subsamp = jpegSubsamp;
+  processFlags(handle, flags, COMPRESS);
+
+  size = (size_t)(*jpegSize);
+  retval = tj3Compress8(handle, srcBuf, width, pitch, height, pixelFormat,
+                        jpegBuf, &size);
+  *jpegSize = (unsigned long)size;
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width,
+                         int pitch, int height, int pixelSize,
+                         unsigned char *jpegBuf, unsigned long *jpegSize,
+                         int jpegSubsamp, int jpegQual, int flags)
+{
+  int retval = 0;
+  unsigned long size = jpegSize ? *jpegSize : 0;
+
+  if (flags & TJ_YUV) {
+    size = tjBufSizeYUV(width, height, jpegSubsamp);
+    retval = tjEncodeYUV2(handle, srcBuf, width, pitch, height,
+                          getPixelFormat(pixelSize, flags), jpegBuf,
+                          jpegSubsamp, flags);
+  } else {
+    retval = tjCompress2(handle, srcBuf, width, pitch, height,
+                         getPixelFormat(pixelSize, flags), &jpegBuf, &size,
+                         jpegSubsamp, jpegQual, flags | TJFLAG_NOREALLOC);
+  }
+  *jpegSize = size;
+  return retval;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3EncodeYUVPlanes8(tjhandle handle, const unsigned char *srcBuf,
+                                  int width, int pitch, int height,
+                                  int pixelFormat, unsigned char **dstPlanes,
+                                  int *strides)
+{
+  static const char FUNCTION_NAME[] = "tj3EncodeYUVPlanes8";
+  JSAMPROW *row_pointer = NULL;
+  JSAMPLE *_tmpbuf[MAX_COMPONENTS], *_tmpbuf2[MAX_COMPONENTS];
+  JSAMPROW *tmpbuf[MAX_COMPONENTS], *tmpbuf2[MAX_COMPONENTS];
+  JSAMPROW *outbuf[MAX_COMPONENTS];
+  int i, retval = 0, row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS];
+  JSAMPLE *ptr;
+  jpeg_component_info *compptr;
+
+  GET_CINSTANCE(handle)
+
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    tmpbuf[i] = NULL;  _tmpbuf[i] = NULL;
+    tmpbuf2[i] = NULL;  _tmpbuf2[i] = NULL;  outbuf[i] = NULL;
+  }
+
+  if ((this->init & COMPRESS) == 0)
+    THROW("Instance has not been initialized for compression");
+
+  if (srcBuf == NULL || width <= 0 || pitch < 0 || height <= 0 ||
+      pixelFormat < 0 || pixelFormat >= TJ_NUMPF || !dstPlanes ||
+      !dstPlanes[0])
+    THROW("Invalid argument");
+  if (this->subsamp != TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2]))
+    THROW("Invalid argument");
+
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+  if (pixelFormat == TJPF_CMYK)
+    THROW("Cannot generate YUV images from packed-pixel CMYK images");
+
+  if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  cinfo->image_width = width;
+  cinfo->image_height = height;
+  cinfo->data_precision = 8;
+
+  setCompDefaults(this, pixelFormat);
+
+  /* Execute only the parts of jpeg_start_compress() that we need.  If we
+     were to call the whole jpeg_start_compress() function, then it would try
+     to write the file headers, which could overflow the output buffer if the
+     YUV image were very small. */
+  if (cinfo->global_state != CSTATE_START)
+    THROW("libjpeg API is in the wrong state");
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo);
+  jinit_c_master_control(cinfo, FALSE);
+  jinit_color_converter(cinfo);
+  jinit_downsampler(cinfo);
+  (*cinfo->cconvert->start_pass) (cinfo);
+
+  pw0 = PAD(width, cinfo->max_h_samp_factor);
+  ph0 = PAD(height, cinfo->max_v_samp_factor);
+
+  if ((row_pointer = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph0)) == NULL)
+    THROW("Memory allocation failure");
+  for (i = 0; i < height; i++) {
+    if (this->bottomUp)
+      row_pointer[i] = (JSAMPROW)&srcBuf[(height - i - 1) * (size_t)pitch];
+    else
+      row_pointer[i] = (JSAMPROW)&srcBuf[i * (size_t)pitch];
+  }
+  if (height < ph0)
+    for (i = height; i < ph0; i++) row_pointer[i] = row_pointer[height - 1];
+
+  for (i = 0; i < cinfo->num_components; i++) {
+    compptr = &cinfo->comp_info[i];
+    _tmpbuf[i] = (JSAMPLE *)malloc(
+      PAD((compptr->width_in_blocks * cinfo->max_h_samp_factor * DCTSIZE) /
+          compptr->h_samp_factor, 32) *
+      cinfo->max_v_samp_factor + 32);
+    if (!_tmpbuf[i])
+      THROW("Memory allocation failure");
+    tmpbuf[i] =
+      (JSAMPROW *)malloc(sizeof(JSAMPROW) * cinfo->max_v_samp_factor);
+    if (!tmpbuf[i])
+      THROW("Memory allocation failure");
+    for (row = 0; row < cinfo->max_v_samp_factor; row++) {
+      unsigned char *_tmpbuf_aligned =
+        (unsigned char *)PAD((JUINTPTR)_tmpbuf[i], 32);
+
+      tmpbuf[i][row] = &_tmpbuf_aligned[
+        PAD((compptr->width_in_blocks * cinfo->max_h_samp_factor * DCTSIZE) /
+            compptr->h_samp_factor, 32) * row];
+    }
+    _tmpbuf2[i] =
+      (JSAMPLE *)malloc(PAD(compptr->width_in_blocks * DCTSIZE, 32) *
+                        compptr->v_samp_factor + 32);
+    if (!_tmpbuf2[i])
+      THROW("Memory allocation failure");
+    tmpbuf2[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * compptr->v_samp_factor);
+    if (!tmpbuf2[i])
+      THROW("Memory allocation failure");
+    for (row = 0; row < compptr->v_samp_factor; row++) {
+      unsigned char *_tmpbuf2_aligned =
+        (unsigned char *)PAD((JUINTPTR)_tmpbuf2[i], 32);
+
+      tmpbuf2[i][row] =
+        &_tmpbuf2_aligned[PAD(compptr->width_in_blocks * DCTSIZE, 32) * row];
+    }
+    pw[i] = pw0 * compptr->h_samp_factor / cinfo->max_h_samp_factor;
+    ph[i] = ph0 * compptr->v_samp_factor / cinfo->max_v_samp_factor;
+    outbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i]);
+    if (!outbuf[i])
+      THROW("Memory allocation failure");
+    ptr = dstPlanes[i];
+    for (row = 0; row < ph[i]; row++) {
+      outbuf[i][row] = ptr;
+      ptr += (strides && strides[i] != 0) ? strides[i] : pw[i];
+    }
+  }
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  for (row = 0; row < ph0; row += cinfo->max_v_samp_factor) {
+    (*cinfo->cconvert->color_convert) (cinfo, &row_pointer[row], tmpbuf, 0,
+                                       cinfo->max_v_samp_factor);
+    (cinfo->downsample->downsample) (cinfo, tmpbuf, 0, tmpbuf2, 0);
+    for (i = 0, compptr = cinfo->comp_info; i < cinfo->num_components;
+         i++, compptr++)
+      jcopy_sample_rows(tmpbuf2[i], 0, outbuf[i],
+        row * compptr->v_samp_factor / cinfo->max_v_samp_factor,
+        compptr->v_samp_factor, pw[i]);
+  }
+  cinfo->next_scanline += height;
+  jpeg_abort_compress(cinfo);
+
+bailout:
+  if (cinfo->global_state > CSTATE_START) jpeg_abort_compress(cinfo);
+  free(row_pointer);
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    free(tmpbuf[i]);
+    free(_tmpbuf[i]);
+    free(tmpbuf2[i]);
+    free(_tmpbuf2[i]);
+    free(outbuf[i]);
+  }
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf,
+                                int width, int pitch, int height,
+                                int pixelFormat, unsigned char **dstPlanes,
+                                int *strides, int subsamp, int flags)
+{
+  static const char FUNCTION_NAME[] = "tjEncodeYUVPlanes";
+  int retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROW("Invalid argument");
+
+  this->subsamp = subsamp;
+  processFlags(handle, flags, COMPRESS);
+
+  return tj3EncodeYUVPlanes8(handle, srcBuf, width, pitch, height, pixelFormat,
+                             dstPlanes, strides);
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3EncodeYUV8(tjhandle handle, const unsigned char *srcBuf,
+                            int width, int pitch, int height, int pixelFormat,
+                            unsigned char *dstBuf, int align)
+{
+  static const char FUNCTION_NAME[] = "tj3EncodeYUV8";
+  unsigned char *dstPlanes[3];
+  int pw0, ph0, strides[3], retval = -1;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (width <= 0 || height <= 0 || dstBuf == NULL || align < 1 ||
+      !IS_POW2(align))
+    THROW("Invalid argument");
+
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+
+  pw0 = tj3YUVPlaneWidth(0, width, this->subsamp);
+  ph0 = tj3YUVPlaneHeight(0, height, this->subsamp);
+  dstPlanes[0] = dstBuf;
+  strides[0] = PAD(pw0, align);
+  if (this->subsamp == TJSAMP_GRAY) {
+    strides[1] = strides[2] = 0;
+    dstPlanes[1] = dstPlanes[2] = NULL;
+  } else {
+    int pw1 = tj3YUVPlaneWidth(1, width, this->subsamp);
+    int ph1 = tj3YUVPlaneHeight(1, height, this->subsamp);
+
+    strides[1] = strides[2] = PAD(pw1, align);
+    if ((unsigned long long)strides[0] * (unsigned long long)ph0 >
+        (unsigned long long)INT_MAX ||
+        (unsigned long long)strides[1] * (unsigned long long)ph1 >
+        (unsigned long long)INT_MAX)
+      THROW("Image or row alignment is too large");
+    dstPlanes[1] = dstPlanes[0] + strides[0] * ph0;
+    dstPlanes[2] = dstPlanes[1] + strides[1] * ph1;
+  }
+
+  return tj3EncodeYUVPlanes8(handle, srcBuf, width, pitch, height, pixelFormat,
+                             dstPlanes, strides);
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf,
+                           int width, int pitch, int height, int pixelFormat,
+                           unsigned char *dstBuf, int align, int subsamp,
+                           int flags)
+{
+  static const char FUNCTION_NAME[] = "tjEncodeYUV3";
+  int retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROW("Invalid argument");
+
+  this->subsamp = subsamp;
+  processFlags(handle, flags, COMPRESS);
+
+  return tj3EncodeYUV8(handle, srcBuf, width, pitch, height, pixelFormat,
+                       dstBuf, align);
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width,
+                           int pitch, int height, int pixelFormat,
+                           unsigned char *dstBuf, int subsamp, int flags)
+{
+  return tjEncodeYUV3(handle, srcBuf, width, pitch, height, pixelFormat,
+                      dstBuf, 4, subsamp, flags);
+}
+
+/* TurboJPEG 1.1+ */
+DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width,
+                          int pitch, int height, int pixelSize,
+                          unsigned char *dstBuf, int subsamp, int flags)
+{
+  return tjEncodeYUV2(handle, srcBuf, width, pitch, height,
+                      getPixelFormat(pixelSize, flags), dstBuf, subsamp,
+                      flags);
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3CompressFromYUVPlanes8(tjhandle handle,
+                                        const unsigned char * const *srcPlanes,
+                                        int width, const int *strides,
+                                        int height, unsigned char **jpegBuf,
+                                        size_t *jpegSize)
+{
+  static const char FUNCTION_NAME[] = "tj3CompressFromYUVPlanes8";
+  int i, row, retval = 0;
+  boolean alloc = TRUE;
+  int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS],
+    tmpbufsize = 0, usetmpbuf = 0, th[MAX_COMPONENTS];
+  JSAMPLE *_tmpbuf = NULL, *ptr;
+  JSAMPROW *inbuf[MAX_COMPONENTS], *tmpbuf[MAX_COMPONENTS];
+
+  GET_CINSTANCE(handle)
+
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    tmpbuf[i] = NULL;  inbuf[i] = NULL;
+  }
+
+  if ((this->init & COMPRESS) == 0)
+    THROW("Instance has not been initialized for compression");
+
+  if (!srcPlanes || !srcPlanes[0] || width <= 0 || height <= 0 ||
+      jpegBuf == NULL || jpegSize == NULL)
+    THROW("Invalid argument");
+  if (this->subsamp != TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2]))
+    THROW("Invalid argument");
+
+  if (this->quality == -1)
+    THROW("TJPARAM_QUALITY must be specified");
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  cinfo->image_width = width;
+  cinfo->image_height = height;
+  cinfo->data_precision = 8;
+
+  if (this->noRealloc) {
+    alloc = FALSE;  *jpegSize = tj3JPEGBufSize(width, height, this->subsamp);
+  }
+  jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
+  setCompDefaults(this, TJPF_RGB);
+  cinfo->raw_data_in = TRUE;
+
+  jpeg_start_compress(cinfo, TRUE);
+  for (i = 0; i < cinfo->num_components; i++) {
+    jpeg_component_info *compptr = &cinfo->comp_info[i];
+    int ih;
+
+    iw[i] = compptr->width_in_blocks * DCTSIZE;
+    ih = compptr->height_in_blocks * DCTSIZE;
+    pw[i] = PAD(cinfo->image_width, cinfo->max_h_samp_factor) *
+            compptr->h_samp_factor / cinfo->max_h_samp_factor;
+    ph[i] = PAD(cinfo->image_height, cinfo->max_v_samp_factor) *
+            compptr->v_samp_factor / cinfo->max_v_samp_factor;
+    if (iw[i] != pw[i] || ih != ph[i]) usetmpbuf = 1;
+    th[i] = compptr->v_samp_factor * DCTSIZE;
+    tmpbufsize += iw[i] * th[i];
+    if ((inbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i])) == NULL)
+      THROW("Memory allocation failure");
+    ptr = (JSAMPLE *)srcPlanes[i];
+    for (row = 0; row < ph[i]; row++) {
+      inbuf[i][row] = ptr;
+      ptr += (strides && strides[i] != 0) ? strides[i] : pw[i];
+    }
+  }
+  if (usetmpbuf) {
+    if ((_tmpbuf = (JSAMPLE *)malloc(sizeof(JSAMPLE) * tmpbufsize)) == NULL)
+      THROW("Memory allocation failure");
+    ptr = _tmpbuf;
+    for (i = 0; i < cinfo->num_components; i++) {
+      if ((tmpbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * th[i])) == NULL)
+        THROW("Memory allocation failure");
+      for (row = 0; row < th[i]; row++) {
+        tmpbuf[i][row] = ptr;
+        ptr += iw[i];
+      }
+    }
+  }
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  for (row = 0; row < (int)cinfo->image_height;
+       row += cinfo->max_v_samp_factor * DCTSIZE) {
+    JSAMPARRAY yuvptr[MAX_COMPONENTS];
+    int crow[MAX_COMPONENTS];
+
+    for (i = 0; i < cinfo->num_components; i++) {
+      jpeg_component_info *compptr = &cinfo->comp_info[i];
+
+      crow[i] = row * compptr->v_samp_factor / cinfo->max_v_samp_factor;
+      if (usetmpbuf) {
+        int j, k;
+
+        for (j = 0; j < MIN(th[i], ph[i] - crow[i]); j++) {
+          memcpy(tmpbuf[i][j], inbuf[i][crow[i] + j], pw[i]);
+          /* Duplicate last sample in row to fill out MCU */
+          for (k = pw[i]; k < iw[i]; k++)
+            tmpbuf[i][j][k] = tmpbuf[i][j][pw[i] - 1];
+        }
+        /* Duplicate last row to fill out MCU */
+        for (j = ph[i] - crow[i]; j < th[i]; j++)
+          memcpy(tmpbuf[i][j], tmpbuf[i][ph[i] - crow[i] - 1], iw[i]);
+        yuvptr[i] = tmpbuf[i];
+      } else
+        yuvptr[i] = &inbuf[i][crow[i]];
+    }
+    jpeg_write_raw_data(cinfo, yuvptr, cinfo->max_v_samp_factor * DCTSIZE);
+  }
+  jpeg_finish_compress(cinfo);
+
+bailout:
+  if (cinfo->global_state > CSTATE_START && alloc)
+    (*cinfo->dest->term_destination) (cinfo);
+  if (cinfo->global_state > CSTATE_START || retval == -1)
+    jpeg_abort_compress(cinfo);
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    free(tmpbuf[i]);
+    free(inbuf[i]);
+  }
+  free(_tmpbuf);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle,
+                                      const unsigned char **srcPlanes,
+                                      int width, const int *strides,
+                                      int height, int subsamp,
+                                      unsigned char **jpegBuf,
+                                      unsigned long *jpegSize, int jpegQual,
+                                      int flags)
+{
+  static const char FUNCTION_NAME[] = "tjCompressFromYUVPlanes";
+  int retval = 0;
+  size_t size;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (subsamp < 0 || subsamp >= TJ_NUMSAMP || jpegSize == NULL ||
+      jpegQual < 0 || jpegQual > 100)
+    THROW("Invalid argument");
+
+  this->quality = jpegQual;
+  this->subsamp = subsamp;
+  processFlags(handle, flags, COMPRESS);
+
+  size = (size_t)(*jpegSize);
+  retval = tj3CompressFromYUVPlanes8(handle, srcPlanes, width, strides, height,
+                                     jpegBuf, &size);
+  *jpegSize = (unsigned long)size;
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3CompressFromYUV8(tjhandle handle,
+                                  const unsigned char *srcBuf, int width,
+                                  int align, int height,
+                                  unsigned char **jpegBuf, size_t *jpegSize)
+{
+  static const char FUNCTION_NAME[] = "tj3CompressFromYUV8";
+  const unsigned char *srcPlanes[3];
+  int pw0, ph0, strides[3], retval = -1;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (srcBuf == NULL || width <= 0 || align < 1 || !IS_POW2(align) ||
+      height <= 0)
+    THROW("Invalid argument");
+
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+
+  pw0 = tj3YUVPlaneWidth(0, width, this->subsamp);
+  ph0 = tj3YUVPlaneHeight(0, height, this->subsamp);
+  srcPlanes[0] = srcBuf;
+  strides[0] = PAD(pw0, align);
+  if (this->subsamp == TJSAMP_GRAY) {
+    strides[1] = strides[2] = 0;
+    srcPlanes[1] = srcPlanes[2] = NULL;
+  } else {
+    int pw1 = tjPlaneWidth(1, width, this->subsamp);
+    int ph1 = tjPlaneHeight(1, height, this->subsamp);
+
+    strides[1] = strides[2] = PAD(pw1, align);
+    if ((unsigned long long)strides[0] * (unsigned long long)ph0 >
+        (unsigned long long)INT_MAX ||
+        (unsigned long long)strides[1] * (unsigned long long)ph1 >
+        (unsigned long long)INT_MAX)
+      THROW("Image or row alignment is too large");
+    srcPlanes[1] = srcPlanes[0] + strides[0] * ph0;
+    srcPlanes[2] = srcPlanes[1] + strides[1] * ph1;
+  }
+
+  return tj3CompressFromYUVPlanes8(handle, srcPlanes, width, strides, height,
+                                   jpegBuf, jpegSize);
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf,
+                                int width, int align, int height, int subsamp,
+                                unsigned char **jpegBuf,
+                                unsigned long *jpegSize, int jpegQual,
+                                int flags)
+{
+  static const char FUNCTION_NAME[] = "tjCompressFromYUV";
+  int retval = -1;
+  size_t size;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROW("Invalid argument");
+
+  this->quality = jpegQual;
+  this->subsamp = subsamp;
+  processFlags(handle, flags, COMPRESS);
+
+  size = (size_t)(*jpegSize);
+  retval = tj3CompressFromYUV8(handle, srcBuf, width, align, height, jpegBuf,
+                               &size);
+  *jpegSize = (unsigned long)size;
+
+bailout:
+  return retval;
+}
+
+
+/******************************* Decompressor ********************************/
+
+static tjhandle _tjInitDecompress(tjinstance *this)
+{
+  static unsigned char buffer[1];
+
+  /* This is also straight out of example.c */
+  this->dinfo.err = jpeg_std_error(&this->jerr.pub);
+  this->jerr.pub.error_exit = my_error_exit;
+  this->jerr.pub.output_message = my_output_message;
+  this->jerr.emit_message = this->jerr.pub.emit_message;
+  this->jerr.pub.emit_message = my_emit_message;
+  this->jerr.pub.addon_message_table = turbojpeg_message_table;
+  this->jerr.pub.first_addon_message = JMSG_FIRSTADDONCODE;
+  this->jerr.pub.last_addon_message = JMSG_LASTADDONCODE;
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    free(this);
+    return NULL;
+  }
+
+  jpeg_create_decompress(&this->dinfo);
+  /* Make an initial call so it will create the source manager */
+  jpeg_mem_src_tj(&this->dinfo, buffer, 1);
+
+  this->init |= DECOMPRESS;
+  return (tjhandle)this;
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT tjhandle tjInitDecompress(void)
+{
+  return tj3Init(TJINIT_DECOMPRESS);
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3DecompressHeader(tjhandle handle,
+                                  const unsigned char *jpegBuf,
+                                  size_t jpegSize)
+{
+  static const char FUNCTION_NAME[] = "tj3DecompressHeader";
+  int retval = 0;
+
+  GET_DINSTANCE(handle);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (jpegBuf == NULL || jpegSize <= 0)
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    return -1;
+  }
+
+  jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+
+  /* jpeg_read_header() calls jpeg_abort() and returns JPEG_HEADER_TABLES_ONLY
+     if the datastream is a tables-only datastream.  Since we aren't using a
+     suspending data source, the only other value it can return is
+     JPEG_HEADER_OK. */
+  if (jpeg_read_header(dinfo, FALSE) == JPEG_HEADER_TABLES_ONLY)
+    return 0;
+
+  setDecompParameters(this);
+
+  jpeg_abort_decompress(dinfo);
+
+  if (this->colorspace < 0)
+    THROW("Could not determine colorspace of JPEG image");
+  if (this->jpegWidth < 1 || this->jpegHeight < 1)
+    THROW("Invalid data returned in header");
+
+bailout:
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjDecompressHeader3(tjhandle handle,
+                                  const unsigned char *jpegBuf,
+                                  unsigned long jpegSize, int *width,
+                                  int *height, int *jpegSubsamp,
+                                  int *jpegColorspace)
+{
+  static const char FUNCTION_NAME[] = "tjDecompressHeader3";
+  int retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (width == NULL || height == NULL || jpegSubsamp == NULL ||
+      jpegColorspace == NULL)
+    THROW("Invalid argument");
+
+  retval = tj3DecompressHeader(handle, jpegBuf, jpegSize);
+
+  *width = tj3Get(handle, TJPARAM_JPEGWIDTH);
+  *height = tj3Get(handle, TJPARAM_JPEGHEIGHT);
+  *jpegSubsamp = tj3Get(handle, TJPARAM_SUBSAMP);
+  if (*jpegSubsamp == TJSAMP_UNKNOWN)
+    THROW("Could not determine subsampling level of JPEG image");
+  *jpegColorspace = tj3Get(handle, TJPARAM_COLORSPACE);
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.1+ */
+DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf,
+                                  unsigned long jpegSize, int *width,
+                                  int *height, int *jpegSubsamp)
+{
+  int jpegColorspace;
+
+  return tjDecompressHeader3(handle, jpegBuf, jpegSize, width, height,
+                             jpegSubsamp, &jpegColorspace);
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf,
+                                 unsigned long jpegSize, int *width,
+                                 int *height)
+{
+  int jpegSubsamp;
+
+  return tjDecompressHeader2(handle, jpegBuf, jpegSize, width, height,
+                             &jpegSubsamp);
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT tjscalingfactor *tj3GetScalingFactors(int *numScalingFactors)
+{
+  static const char FUNCTION_NAME[] = "tj3GetScalingFactors";
+  tjscalingfactor *retval = (tjscalingfactor *)sf;
+
+  if (numScalingFactors == NULL)
+    THROWG("Invalid argument", NULL);
+
+  *numScalingFactors = NUMSF;
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numScalingFactors)
+{
+  return tj3GetScalingFactors(numScalingFactors);
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3SetScalingFactor(tjhandle handle,
+                                  tjscalingfactor scalingFactor)
+{
+  static const char FUNCTION_NAME[] = "tj3SetScalingFactor";
+  int i, retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  for (i = 0; i < NUMSF; i++) {
+    if (scalingFactor.num == sf[i].num && scalingFactor.denom == sf[i].denom)
+      break;
+  }
+  if (i >= NUMSF)
+    THROW("Unsupported scaling factor");
+
+  this->scalingFactor = scalingFactor;
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3SetCroppingRegion(tjhandle handle, tjregion croppingRegion)
+{
+  static const char FUNCTION_NAME[] = "tj3SetCroppingRegion";
+  int retval = 0, scaledWidth, scaledHeight;
+
+  GET_TJINSTANCE(handle, -1);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (croppingRegion.x == 0 && croppingRegion.y == 0 &&
+      croppingRegion.w == 0 && croppingRegion.h == 0) {
+    this->croppingRegion = croppingRegion;
+    return 0;
+  }
+
+  if (croppingRegion.x < 0 || croppingRegion.y < 0 || croppingRegion.w < 0 ||
+      croppingRegion.h < 0)
+    THROW("Invalid cropping region");
+  if (this->jpegWidth < 0 || this->jpegHeight < 0)
+    THROW("JPEG header has not yet been read");
+  if (this->precision == 16 || this->lossless)
+    THROW("Cannot partially decompress lossless JPEG images");
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("Could not determine subsampling level of JPEG image");
+
+  scaledWidth = TJSCALED(this->jpegWidth, this->scalingFactor);
+  scaledHeight = TJSCALED(this->jpegHeight, this->scalingFactor);
+
+  if (croppingRegion.x %
+      TJSCALED(tjMCUWidth[this->subsamp], this->scalingFactor) != 0)
+    THROWI("The left boundary of the cropping region (%d) is not\n"
+           "divisible by the scaled MCU width (%d)",
+           croppingRegion.x,
+           TJSCALED(tjMCUWidth[this->subsamp], this->scalingFactor));
+  if (croppingRegion.w == 0)
+    croppingRegion.w = scaledWidth - croppingRegion.x;
+  if (croppingRegion.h == 0)
+    croppingRegion.h = scaledHeight - croppingRegion.y;
+  if (croppingRegion.w < 0 || croppingRegion.h < 0 ||
+      croppingRegion.x + croppingRegion.w > scaledWidth ||
+      croppingRegion.y + croppingRegion.h > scaledHeight)
+    THROW("The cropping region exceeds the scaled image dimensions");
+
+  this->croppingRegion = croppingRegion;
+
+bailout:
+  return retval;
+}
+
+
+/* tj3Decompress*() is implemented in turbojpeg-mp.c */
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT int tjDecompress2(tjhandle handle, const unsigned char *jpegBuf,
+                            unsigned long jpegSize, unsigned char *dstBuf,
+                            int width, int pitch, int height, int pixelFormat,
+                            int flags)
+{
+  static const char FUNCTION_NAME[] = "tjDecompress2";
+  int i, retval = 0, jpegwidth, jpegheight, scaledw, scaledh;
+
+  GET_DINSTANCE(handle);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (jpegBuf == NULL || jpegSize <= 0 || width < 0 || height < 0)
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+  jpeg_read_header(dinfo, TRUE);
+  jpegwidth = dinfo->image_width;  jpegheight = dinfo->image_height;
+  if (width == 0) width = jpegwidth;
+  if (height == 0) height = jpegheight;
+  for (i = 0; i < NUMSF; i++) {
+    scaledw = TJSCALED(jpegwidth, sf[i]);
+    scaledh = TJSCALED(jpegheight, sf[i]);
+    if (scaledw <= width && scaledh <= height)
+      break;
+  }
+  if (i >= NUMSF)
+    THROW("Could not scale down to desired image dimensions");
+
+  processFlags(handle, flags, DECOMPRESS);
+
+  if (tj3SetScalingFactor(handle, sf[i]) == -1)
+    return -1;
+  if (tj3SetCroppingRegion(handle, TJUNCROPPED) == -1)
+    return -1;
+  return tj3Decompress8(handle, jpegBuf, jpegSize, dstBuf, pitch, pixelFormat);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf,
+                           unsigned long jpegSize, unsigned char *dstBuf,
+                           int width, int pitch, int height, int pixelSize,
+                           int flags)
+{
+  if (flags & TJ_YUV)
+    return tjDecompressToYUV(handle, jpegBuf, jpegSize, dstBuf, flags);
+  else
+    return tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, width, pitch,
+                         height, getPixelFormat(pixelSize, flags), flags);
+}
+
+
+static void setDecodeDefaults(tjinstance *this, int pixelFormat)
+{
+  int i;
+
+  this->dinfo.scale_num = this->dinfo.scale_denom = 1;
+
+  if (this->subsamp == TJSAMP_GRAY) {
+    this->dinfo.num_components = this->dinfo.comps_in_scan = 1;
+    this->dinfo.jpeg_color_space = JCS_GRAYSCALE;
+  } else {
+    this->dinfo.num_components = this->dinfo.comps_in_scan = 3;
+    this->dinfo.jpeg_color_space = JCS_YCbCr;
+  }
+
+  this->dinfo.comp_info = (jpeg_component_info *)
+    (*this->dinfo.mem->alloc_small) ((j_common_ptr)&this->dinfo, JPOOL_IMAGE,
+                                     this->dinfo.num_components *
+                                     sizeof(jpeg_component_info));
+
+  for (i = 0; i < this->dinfo.num_components; i++) {
+    jpeg_component_info *compptr = &this->dinfo.comp_info[i];
+
+    compptr->h_samp_factor = (i == 0) ? tjMCUWidth[this->subsamp] / 8 : 1;
+    compptr->v_samp_factor = (i == 0) ? tjMCUHeight[this->subsamp] / 8 : 1;
+    compptr->component_index = i;
+    compptr->component_id = i + 1;
+    compptr->quant_tbl_no = compptr->dc_tbl_no =
+      compptr->ac_tbl_no = (i == 0) ? 0 : 1;
+    this->dinfo.cur_comp_info[i] = compptr;
+  }
+  this->dinfo.data_precision = 8;
+  for (i = 0; i < 2; i++) {
+    if (this->dinfo.quant_tbl_ptrs[i] == NULL)
+      this->dinfo.quant_tbl_ptrs[i] =
+        jpeg_alloc_quant_table((j_common_ptr)&this->dinfo);
+  }
+
+  this->dinfo.mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+}
+
+
+static int my_read_markers(j_decompress_ptr dinfo)
+{
+  return JPEG_REACHED_SOS;
+}
+
+static void my_reset_marker_reader(j_decompress_ptr dinfo)
+{
+}
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3DecodeYUVPlanes8(tjhandle handle,
+                                  const unsigned char * const *srcPlanes,
+                                  const int *strides, unsigned char *dstBuf,
+                                  int width, int pitch, int height,
+                                  int pixelFormat)
+{
+  static const char FUNCTION_NAME[] = "tj3DecodeYUVPlanes8";
+  JSAMPROW *row_pointer = NULL;
+  JSAMPLE *_tmpbuf[MAX_COMPONENTS];
+  JSAMPROW *tmpbuf[MAX_COMPONENTS], *inbuf[MAX_COMPONENTS];
+  int i, retval = 0, row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS];
+  JSAMPLE *ptr;
+  jpeg_component_info *compptr;
+  int (*old_read_markers) (j_decompress_ptr);
+  void (*old_reset_marker_reader) (j_decompress_ptr);
+
+  GET_DINSTANCE(handle);
+
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    tmpbuf[i] = NULL;  _tmpbuf[i] = NULL;  inbuf[i] = NULL;
+  }
+
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (!srcPlanes || !srcPlanes[0] || dstBuf == NULL || width <= 0 ||
+      pitch < 0 || height <= 0 || pixelFormat < 0 || pixelFormat >= TJ_NUMPF)
+    THROW("Invalid argument");
+  if (this->subsamp != TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2]))
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+  if (pixelFormat == TJPF_CMYK)
+    THROW("Cannot decode YUV images into packed-pixel CMYK images.");
+
+  if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
+  dinfo->image_width = width;
+  dinfo->image_height = height;
+
+  dinfo->progressive_mode = dinfo->inputctl->has_multiple_scans = FALSE;
+  dinfo->Ss = dinfo->Ah = dinfo->Al = 0;
+  dinfo->Se = DCTSIZE2 - 1;
+  setDecodeDefaults(this, pixelFormat);
+  old_read_markers = dinfo->marker->read_markers;
+  dinfo->marker->read_markers = my_read_markers;
+  old_reset_marker_reader = dinfo->marker->reset_marker_reader;
+  dinfo->marker->reset_marker_reader = my_reset_marker_reader;
+  jpeg_read_header(dinfo, TRUE);
+  dinfo->marker->read_markers = old_read_markers;
+  dinfo->marker->reset_marker_reader = old_reset_marker_reader;
+
+  this->dinfo.out_color_space = pf2cs[pixelFormat];
+  this->dinfo.dct_method = this->fastDCT ? JDCT_FASTEST : JDCT_ISLOW;
+  dinfo->do_fancy_upsampling = FALSE;
+  dinfo->Se = DCTSIZE2 - 1;
+  jinit_master_decompress(dinfo);
+  (*dinfo->upsample->start_pass) (dinfo);
+
+  pw0 = PAD(width, dinfo->max_h_samp_factor);
+  ph0 = PAD(height, dinfo->max_v_samp_factor);
+
+  if (pitch == 0) pitch = dinfo->output_width * tjPixelSize[pixelFormat];
+
+  if ((row_pointer = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph0)) == NULL)
+    THROW("Memory allocation failure");
+  for (i = 0; i < height; i++) {
+    if (this->bottomUp)
+      row_pointer[i] = &dstBuf[(height - i - 1) * (size_t)pitch];
+    else
+      row_pointer[i] = &dstBuf[i * (size_t)pitch];
+  }
+  if (height < ph0)
+    for (i = height; i < ph0; i++) row_pointer[i] = row_pointer[height - 1];
+
+  for (i = 0; i < dinfo->num_components; i++) {
+    compptr = &dinfo->comp_info[i];
+    _tmpbuf[i] =
+      (JSAMPLE *)malloc(PAD(compptr->width_in_blocks * DCTSIZE, 32) *
+                        compptr->v_samp_factor + 32);
+    if (!_tmpbuf[i])
+      THROW("Memory allocation failure");
+    tmpbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * compptr->v_samp_factor);
+    if (!tmpbuf[i])
+      THROW("Memory allocation failure");
+    for (row = 0; row < compptr->v_samp_factor; row++) {
+      unsigned char *_tmpbuf_aligned =
+        (unsigned char *)PAD((JUINTPTR)_tmpbuf[i], 32);
+
+      tmpbuf[i][row] =
+        &_tmpbuf_aligned[PAD(compptr->width_in_blocks * DCTSIZE, 32) * row];
+    }
+    pw[i] = pw0 * compptr->h_samp_factor / dinfo->max_h_samp_factor;
+    ph[i] = ph0 * compptr->v_samp_factor / dinfo->max_v_samp_factor;
+    inbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i]);
+    if (!inbuf[i])
+      THROW("Memory allocation failure");
+    ptr = (JSAMPLE *)srcPlanes[i];
+    for (row = 0; row < ph[i]; row++) {
+      inbuf[i][row] = ptr;
+      ptr += (strides && strides[i] != 0) ? strides[i] : pw[i];
+    }
+  }
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  for (row = 0; row < ph0; row += dinfo->max_v_samp_factor) {
+    JDIMENSION inrow = 0, outrow = 0;
+
+    for (i = 0, compptr = dinfo->comp_info; i < dinfo->num_components;
+         i++, compptr++)
+      jcopy_sample_rows(inbuf[i],
+        row * compptr->v_samp_factor / dinfo->max_v_samp_factor, tmpbuf[i], 0,
+        compptr->v_samp_factor, pw[i]);
+    (dinfo->upsample->upsample) (dinfo, tmpbuf, &inrow,
+                                 dinfo->max_v_samp_factor, &row_pointer[row],
+                                 &outrow, dinfo->max_v_samp_factor);
+  }
+  jpeg_abort_decompress(dinfo);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  free(row_pointer);
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    free(tmpbuf[i]);
+    free(_tmpbuf[i]);
+    free(inbuf[i]);
+  }
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle,
+                                const unsigned char **srcPlanes,
+                                const int *strides, int subsamp,
+                                unsigned char *dstBuf, int width, int pitch,
+                                int height, int pixelFormat, int flags)
+{
+  static const char FUNCTION_NAME[] = "tjDecodeYUVPlanes";
+  int retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROW("Invalid argument");
+
+  this->subsamp = subsamp;
+  processFlags(handle, flags, DECOMPRESS);
+
+  return tj3DecodeYUVPlanes8(handle, srcPlanes, strides, dstBuf, width, pitch,
+                             height, pixelFormat);
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3DecodeYUV8(tjhandle handle, const unsigned char *srcBuf,
+                            int align, unsigned char *dstBuf, int width,
+                            int pitch, int height, int pixelFormat)
+{
+  static const char FUNCTION_NAME[] = "tj3DecodeYUV8";
+  const unsigned char *srcPlanes[3];
+  int pw0, ph0, strides[3], retval = -1;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (srcBuf == NULL || align < 1 || !IS_POW2(align) || width <= 0 ||
+      height <= 0)
+    THROW("Invalid argument");
+
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+
+  pw0 = tj3YUVPlaneWidth(0, width, this->subsamp);
+  ph0 = tj3YUVPlaneHeight(0, height, this->subsamp);
+  srcPlanes[0] = srcBuf;
+  strides[0] = PAD(pw0, align);
+  if (this->subsamp == TJSAMP_GRAY) {
+    strides[1] = strides[2] = 0;
+    srcPlanes[1] = srcPlanes[2] = NULL;
+  } else {
+    int pw1 = tj3YUVPlaneWidth(1, width, this->subsamp);
+    int ph1 = tj3YUVPlaneHeight(1, height, this->subsamp);
+
+    strides[1] = strides[2] = PAD(pw1, align);
+    if ((unsigned long long)strides[0] * (unsigned long long)ph0 >
+        (unsigned long long)INT_MAX ||
+        (unsigned long long)strides[1] * (unsigned long long)ph1 >
+        (unsigned long long)INT_MAX)
+      THROW("Image or row alignment is too large");
+    srcPlanes[1] = srcPlanes[0] + strides[0] * ph0;
+    srcPlanes[2] = srcPlanes[1] + strides[1] * ph1;
+  }
+
+  return tj3DecodeYUVPlanes8(handle, srcPlanes, strides, dstBuf, width, pitch,
+                             height, pixelFormat);
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf,
+                          int align, int subsamp, unsigned char *dstBuf,
+                          int width, int pitch, int height, int pixelFormat,
+                          int flags)
+{
+  static const char FUNCTION_NAME[] = "tjDecodeYUV";
+  int retval = -1;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROW("Invalid argument");
+
+  this->subsamp = subsamp;
+  processFlags(handle, flags, DECOMPRESS);
+
+  return tj3DecodeYUV8(handle, srcBuf, align, dstBuf, width, pitch, height,
+                       pixelFormat);
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3DecompressToYUVPlanes8(tjhandle handle,
+                                        const unsigned char *jpegBuf,
+                                        size_t jpegSize,
+                                        unsigned char **dstPlanes,
+                                        int *strides)
+{
+  static const char FUNCTION_NAME[] = "tj3DecompressToYUVPlanes8";
+  int i, row, retval = 0;
+  int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS],
+    tmpbufsize = 0, usetmpbuf = 0, th[MAX_COMPONENTS];
+  JSAMPLE *_tmpbuf = NULL, *ptr;
+  JSAMPROW *outbuf[MAX_COMPONENTS], *tmpbuf[MAX_COMPONENTS];
+  int dctsize;
+  struct my_progress_mgr progress;
+
+  GET_DINSTANCE(handle);
+
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    tmpbuf[i] = NULL;  outbuf[i] = NULL;
+  }
+
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (jpegBuf == NULL || jpegSize <= 0 || !dstPlanes || !dstPlanes[0])
+    THROW("Invalid argument");
+
+  if (this->scanLimit) {
+    memset(&progress, 0, sizeof(struct my_progress_mgr));
+    progress.pub.progress_monitor = my_progress_monitor;
+    progress.this = this;
+    dinfo->progress = &progress.pub;
+  } else
+    dinfo->progress = NULL;
+
+  dinfo->mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  if (dinfo->global_state <= DSTATE_INHEADER) {
+    jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+    jpeg_read_header(dinfo, TRUE);
+  }
+  setDecompParameters(this);
+  if (this->maxPixels &&
+      (unsigned long long)this->jpegWidth * this->jpegHeight >
+      (unsigned long long)this->maxPixels)
+    THROW("Image is too large");
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("Could not determine subsampling level of JPEG image");
+
+  if (this->subsamp != TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2]))
+    THROW("Invalid argument");
+
+  if (dinfo->num_components > 3)
+    THROW("JPEG image must have 3 or fewer components");
+
+  dinfo->scale_num = this->scalingFactor.num;
+  dinfo->scale_denom = this->scalingFactor.denom;
+  jpeg_calc_output_dimensions(dinfo);
+
+  dctsize = DCTSIZE * this->scalingFactor.num / this->scalingFactor.denom;
+
+  for (i = 0; i < dinfo->num_components; i++) {
+    jpeg_component_info *compptr = &dinfo->comp_info[i];
+    int ih;
+
+    iw[i] = compptr->width_in_blocks * dctsize;
+    ih = compptr->height_in_blocks * dctsize;
+    pw[i] = tj3YUVPlaneWidth(i, dinfo->output_width, this->subsamp);
+    ph[i] = tj3YUVPlaneHeight(i, dinfo->output_height, this->subsamp);
+    if (iw[i] != pw[i] || ih != ph[i]) usetmpbuf = 1;
+    th[i] = compptr->v_samp_factor * dctsize;
+    tmpbufsize += iw[i] * th[i];
+    if ((outbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i])) == NULL)
+      THROW("Memory allocation failure");
+    ptr = dstPlanes[i];
+    for (row = 0; row < ph[i]; row++) {
+      outbuf[i][row] = ptr;
+      ptr += (strides && strides[i] != 0) ? strides[i] : pw[i];
+    }
+  }
+  if (usetmpbuf) {
+    if ((_tmpbuf = (JSAMPLE *)malloc(sizeof(JSAMPLE) * tmpbufsize)) == NULL)
+      THROW("Memory allocation failure");
+    ptr = _tmpbuf;
+    for (i = 0; i < dinfo->num_components; i++) {
+      if ((tmpbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * th[i])) == NULL)
+        THROW("Memory allocation failure");
+      for (row = 0; row < th[i]; row++) {
+        tmpbuf[i][row] = ptr;
+        ptr += iw[i];
+      }
+    }
+  }
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  dinfo->do_fancy_upsampling = !this->fastUpsample;
+  dinfo->dct_method = this->fastDCT ? JDCT_FASTEST : JDCT_ISLOW;
+  dinfo->raw_data_out = TRUE;
+
+  dinfo->mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+
+  jpeg_start_decompress(dinfo);
+  for (row = 0; row < (int)dinfo->output_height;
+       row += dinfo->max_v_samp_factor * dinfo->_min_DCT_scaled_size) {
+    JSAMPARRAY yuvptr[MAX_COMPONENTS];
+    int crow[MAX_COMPONENTS];
+
+    for (i = 0; i < dinfo->num_components; i++) {
+      jpeg_component_info *compptr = &dinfo->comp_info[i];
+
+      if (this->subsamp == TJSAMP_420) {
+        /* When 4:2:0 subsampling is used with IDCT scaling, libjpeg will try
+           to be clever and use the IDCT to perform upsampling on the U and V
+           planes.  For instance, if the output image is to be scaled by 1/2
+           relative to the JPEG image, then the scaling factor and upsampling
+           effectively cancel each other, so a normal 8x8 IDCT can be used.
+           However, this is not desirable when using the decompress-to-YUV
+           functionality in TurboJPEG, since we want to output the U and V
+           planes in their subsampled form.  Thus, we have to override some
+           internal libjpeg parameters to force it to use the "scaled" IDCT
+           functions on the U and V planes. */
+        compptr->_DCT_scaled_size = dctsize;
+        compptr->MCU_sample_width = tjMCUWidth[this->subsamp] *
+          this->scalingFactor.num / this->scalingFactor.denom *
+          compptr->v_samp_factor / dinfo->max_v_samp_factor;
+        dinfo->idct->inverse_DCT[i] = dinfo->idct->inverse_DCT[0];
+      }
+      crow[i] = row * compptr->v_samp_factor / dinfo->max_v_samp_factor;
+      if (usetmpbuf) yuvptr[i] = tmpbuf[i];
+      else yuvptr[i] = &outbuf[i][crow[i]];
+    }
+    jpeg_read_raw_data(dinfo, yuvptr,
+                       dinfo->max_v_samp_factor * dinfo->_min_DCT_scaled_size);
+    if (usetmpbuf) {
+      int j;
+
+      for (i = 0; i < dinfo->num_components; i++) {
+        for (j = 0; j < MIN(th[i], ph[i] - crow[i]); j++) {
+          memcpy(outbuf[i][crow[i] + j], tmpbuf[i][j], pw[i]);
+        }
+      }
+    }
+  }
+  jpeg_finish_decompress(dinfo);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    free(tmpbuf[i]);
+    free(outbuf[i]);
+  }
+  free(_tmpbuf);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle,
+                                      const unsigned char *jpegBuf,
+                                      unsigned long jpegSize,
+                                      unsigned char **dstPlanes, int width,
+                                      int *strides, int height, int flags)
+{
+  static const char FUNCTION_NAME[] = "tjDecompressToYUVPlanes";
+  int i, retval = 0, jpegwidth, jpegheight, scaledw, scaledh;
+
+  GET_DINSTANCE(handle);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (jpegBuf == NULL || jpegSize <= 0 || width < 0 || height < 0)
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+  jpeg_read_header(dinfo, TRUE);
+  jpegwidth = dinfo->image_width;  jpegheight = dinfo->image_height;
+  if (width == 0) width = jpegwidth;
+  if (height == 0) height = jpegheight;
+  for (i = 0; i < NUMSF; i++) {
+    scaledw = TJSCALED(jpegwidth, sf[i]);
+    scaledh = TJSCALED(jpegheight, sf[i]);
+    if (scaledw <= width && scaledh <= height)
+      break;
+  }
+  if (i >= NUMSF)
+    THROW("Could not scale down to desired image dimensions");
+
+  processFlags(handle, flags, DECOMPRESS);
+
+  if (tj3SetScalingFactor(handle, sf[i]) == -1)
+    return -1;
+  return tj3DecompressToYUVPlanes8(handle, jpegBuf, jpegSize, dstPlanes,
+                                   strides);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3DecompressToYUV8(tjhandle handle,
+                                  const unsigned char *jpegBuf,
+                                  size_t jpegSize,
+                                  unsigned char *dstBuf, int align)
+{
+  static const char FUNCTION_NAME[] = "tj3DecompressToYUV8";
+  unsigned char *dstPlanes[3];
+  int pw0, ph0, strides[3], retval = -1;
+  int width, height;
+
+  GET_DINSTANCE(handle);
+
+  if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || align < 1 ||
+      !IS_POW2(align))
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  if (dinfo->global_state <= DSTATE_INHEADER) {
+    jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+    jpeg_read_header(dinfo, TRUE);
+  }
+  setDecompParameters(this);
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("Could not determine subsampling level of JPEG image");
+
+  width = TJSCALED(dinfo->image_width, this->scalingFactor);
+  height = TJSCALED(dinfo->image_height, this->scalingFactor);
+
+  pw0 = tj3YUVPlaneWidth(0, width, this->subsamp);
+  ph0 = tj3YUVPlaneHeight(0, height, this->subsamp);
+  dstPlanes[0] = dstBuf;
+  strides[0] = PAD(pw0, align);
+  if (this->subsamp == TJSAMP_GRAY) {
+    strides[1] = strides[2] = 0;
+    dstPlanes[1] = dstPlanes[2] = NULL;
+  } else {
+    int pw1 = tj3YUVPlaneWidth(1, width, this->subsamp);
+    int ph1 = tj3YUVPlaneHeight(1, height, this->subsamp);
+
+    strides[1] = strides[2] = PAD(pw1, align);
+    if ((unsigned long long)strides[0] * (unsigned long long)ph0 >
+        (unsigned long long)INT_MAX ||
+        (unsigned long long)strides[1] * (unsigned long long)ph1 >
+        (unsigned long long)INT_MAX)
+      THROW("Image or row alignment is too large");
+    dstPlanes[1] = dstPlanes[0] + strides[0] * ph0;
+    dstPlanes[2] = dstPlanes[1] + strides[1] * ph1;
+  }
+
+  return tj3DecompressToYUVPlanes8(handle, jpegBuf, jpegSize, dstPlanes,
+                                   strides);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf,
+                                 unsigned long jpegSize, unsigned char *dstBuf,
+                                 int width, int align, int height, int flags)
+{
+  static const char FUNCTION_NAME[] = "tjDecompressToYUV2";
+  int i, retval = 0, jpegwidth, jpegheight, scaledw, scaledh;
+
+  GET_DINSTANCE(handle);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (jpegBuf == NULL || jpegSize <= 0 || width < 0 || height < 0)
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+  jpeg_read_header(dinfo, TRUE);
+  jpegwidth = dinfo->image_width;  jpegheight = dinfo->image_height;
+  if (width == 0) width = jpegwidth;
+  if (height == 0) height = jpegheight;
+  for (i = 0; i < NUMSF; i++) {
+    scaledw = TJSCALED(jpegwidth, sf[i]);
+    scaledh = TJSCALED(jpegheight, sf[i]);
+    if (scaledw <= width && scaledh <= height)
+      break;
+  }
+  if (i >= NUMSF)
+    THROW("Could not scale down to desired image dimensions");
+
+  width = scaledw;  height = scaledh;
+
+  processFlags(handle, flags, DECOMPRESS);
+
+  if (tj3SetScalingFactor(handle, sf[i]) == -1)
+    return -1;
+  return tj3DecompressToYUV8(handle, jpegBuf, (size_t)jpegSize, dstBuf, align);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.1+ */
+DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf,
+                                unsigned long jpegSize, unsigned char *dstBuf,
+                                int flags)
+{
+  return tjDecompressToYUV2(handle, jpegBuf, jpegSize, dstBuf, 0, 4, 0, flags);
+}
+
+
+/******************************** Transformer ********************************/
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT tjhandle tjInitTransform(void)
+{
+  return tj3Init(TJINIT_TRANSFORM);
+}
+
+
+/* TurboJPEG 3+ */
+DLLEXPORT int tj3Transform(tjhandle handle, const unsigned char *jpegBuf,
+                           size_t jpegSize, int n, unsigned char **dstBufs,
+                           size_t *dstSizes, const tjtransform *t)
+{
+  static const char FUNCTION_NAME[] = "tj3Transform";
+  jpeg_transform_info *xinfo = NULL;
+  jvirt_barray_ptr *srccoefs, *dstcoefs;
+  int retval = 0, i, saveMarkers = 0;
+  boolean alloc = TRUE;
+  struct my_progress_mgr progress;
+
+  GET_INSTANCE(handle);
+  if ((this->init & COMPRESS) == 0 || (this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for transformation");
+
+  if (jpegBuf == NULL || jpegSize <= 0 || n < 1 || dstBufs == NULL ||
+      dstSizes == NULL || t == NULL)
+    THROW("Invalid argument");
+
+  if (this->scanLimit) {
+    memset(&progress, 0, sizeof(struct my_progress_mgr));
+    progress.pub.progress_monitor = my_progress_monitor;
+    progress.this = this;
+    dinfo->progress = &progress.pub;
+  } else
+    dinfo->progress = NULL;
+
+  dinfo->mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+
+  if ((xinfo =
+       (jpeg_transform_info *)malloc(sizeof(jpeg_transform_info) * n)) == NULL)
+    THROW("Memory allocation failure");
+  memset(xinfo, 0, sizeof(jpeg_transform_info) * n);
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  if (dinfo->global_state <= DSTATE_INHEADER)
+    jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+
+  for (i = 0; i < n; i++) {
+    if (t[i].op < 0 || t[i].op >= TJ_NUMXOP)
+      THROW("Invalid transform operation");
+    xinfo[i].transform = xformtypes[t[i].op];
+    xinfo[i].perfect = (t[i].options & TJXOPT_PERFECT) ? 1 : 0;
+    xinfo[i].trim = (t[i].options & TJXOPT_TRIM) ? 1 : 0;
+    xinfo[i].force_grayscale = (t[i].options & TJXOPT_GRAY) ? 1 : 0;
+    xinfo[i].crop = (t[i].options & TJXOPT_CROP) ? 1 : 0;
+    if (n != 1 && t[i].op == TJXOP_HFLIP) xinfo[i].slow_hflip = 1;
+    else xinfo[i].slow_hflip = 0;
+
+    if (xinfo[i].crop) {
+      xinfo[i].crop_xoffset = t[i].r.x;  xinfo[i].crop_xoffset_set = JCROP_POS;
+      xinfo[i].crop_yoffset = t[i].r.y;  xinfo[i].crop_yoffset_set = JCROP_POS;
+      if (t[i].r.w != 0) {
+        xinfo[i].crop_width = t[i].r.w;  xinfo[i].crop_width_set = JCROP_POS;
+      } else
+        xinfo[i].crop_width = JCROP_UNSET;
+      if (t[i].r.h != 0) {
+        xinfo[i].crop_height = t[i].r.h;  xinfo[i].crop_height_set = JCROP_POS;
+      } else
+        xinfo[i].crop_height = JCROP_UNSET;
+    }
+    if (!(t[i].options & TJXOPT_COPYNONE)) saveMarkers = 1;
+  }
+
+  jcopy_markers_setup(dinfo, saveMarkers ? JCOPYOPT_ALL : JCOPYOPT_NONE);
+  if (dinfo->global_state <= DSTATE_INHEADER)
+    jpeg_read_header(dinfo, TRUE);
+  if (this->maxPixels &&
+      (unsigned long long)dinfo->image_width * dinfo->image_height >
+      (unsigned long long)this->maxPixels)
+    THROW("Image is too large");
+  this->subsamp = getSubsamp(&this->dinfo);
+
+  for (i = 0; i < n; i++) {
+    if (!jtransform_request_workspace(dinfo, &xinfo[i]))
+      THROW("Transform is not perfect");
+
+    if (xinfo[i].crop) {
+      if (this->subsamp == TJSAMP_UNKNOWN)
+        THROW("Could not determine subsampling level of JPEG image");
+      if ((t[i].r.x % tjMCUWidth[this->subsamp]) != 0 ||
+          (t[i].r.y % tjMCUHeight[this->subsamp]) != 0)
+        THROWI("To crop this JPEG image, x must be a multiple of %d\n"
+               "and y must be a multiple of %d.", tjMCUWidth[this->subsamp],
+               tjMCUHeight[this->subsamp]);
+    }
+  }
+
+  srccoefs = jpeg_read_coefficients(dinfo);
+
+  for (i = 0; i < n; i++) {
+    int w, h;
+
+    if (!xinfo[i].crop) {
+      w = dinfo->image_width;  h = dinfo->image_height;
+      if (t[i].op == TJXOP_TRANSPOSE || t[i].op == TJXOP_TRANSVERSE ||
+          t[i].op == TJXOP_ROT90 || t[i].op == TJXOP_ROT270) {
+        w = dinfo->image_height;  h = dinfo->image_width;
+      }
+    } else {
+      w = xinfo[i].crop_width;  h = xinfo[i].crop_height;
+    }
+    if (this->noRealloc) {
+      alloc = FALSE;  dstSizes[i] = tj3JPEGBufSize(w, h, this->subsamp);
+    }
+    if (!(t[i].options & TJXOPT_NOOUTPUT))
+      jpeg_mem_dest_tj(cinfo, &dstBufs[i], &dstSizes[i], alloc);
+    jpeg_copy_critical_parameters(dinfo, cinfo);
+    dstcoefs = jtransform_adjust_parameters(dinfo, cinfo, srccoefs, &xinfo[i]);
+    if (this->optimize || t[i].options & TJXOPT_OPTIMIZE)
+      cinfo->optimize_coding = TRUE;
+#ifdef C_PROGRESSIVE_SUPPORTED
+    if (this->progressive || t[i].options & TJXOPT_PROGRESSIVE)
+      jpeg_simple_progression(cinfo);
+#endif
+    if (this->arithmetic || t[i].options & TJXOPT_ARITHMETIC) {
+      cinfo->arith_code = TRUE;
+      cinfo->optimize_coding = FALSE;
+    }
+    if (!(t[i].options & TJXOPT_NOOUTPUT)) {
+      jpeg_write_coefficients(cinfo, dstcoefs);
+      jcopy_markers_execute(dinfo, cinfo, t[i].options & TJXOPT_COPYNONE ?
+                                          JCOPYOPT_NONE : JCOPYOPT_ALL);
+    } else
+      jinit_c_master_control(cinfo, TRUE);
+    jtransform_execute_transformation(dinfo, cinfo, srccoefs, &xinfo[i]);
+    if (t[i].customFilter) {
+      int ci, y;
+      JDIMENSION by;
+
+      for (ci = 0; ci < cinfo->num_components; ci++) {
+        jpeg_component_info *compptr = &cinfo->comp_info[ci];
+        tjregion arrayRegion = { 0, 0, 0, 0 };
+        tjregion planeRegion = { 0, 0, 0, 0 };
+
+        arrayRegion.w = compptr->width_in_blocks * DCTSIZE;
+        arrayRegion.h = DCTSIZE;
+        planeRegion.w = compptr->width_in_blocks * DCTSIZE;
+        planeRegion.h = compptr->height_in_blocks * DCTSIZE;
+
+        for (by = 0; by < compptr->height_in_blocks;
+             by += compptr->v_samp_factor) {
+          JBLOCKARRAY barray = (dinfo->mem->access_virt_barray)
+            ((j_common_ptr)dinfo, dstcoefs[ci], by, compptr->v_samp_factor,
+             TRUE);
+
+          for (y = 0; y < compptr->v_samp_factor; y++) {
+            if (t[i].customFilter(barray[y][0], arrayRegion, planeRegion, ci,
+                                  i, (tjtransform *)&t[i]) == -1)
+              THROW("Error in custom filter");
+            arrayRegion.y += DCTSIZE;
+          }
+        }
+      }
+    }
+    if (!(t[i].options & TJXOPT_NOOUTPUT)) jpeg_finish_compress(cinfo);
+  }
+
+  jpeg_finish_decompress(dinfo);
+
+bailout:
+  if (cinfo->global_state > CSTATE_START) {
+    if (alloc) (*cinfo->dest->term_destination) (cinfo);
+    jpeg_abort_compress(cinfo);
+  }
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  free(xinfo);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT int tjTransform(tjhandle handle, const unsigned char *jpegBuf,
+                          unsigned long jpegSize, int n,
+                          unsigned char **dstBufs, unsigned long *dstSizes,
+                          tjtransform *t, int flags)
+{
+  static const char FUNCTION_NAME[] = "tjTransform";
+  int i, retval = 0;
+  size_t *sizes = NULL;
+
+  GET_DINSTANCE(handle);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (n < 1 || dstSizes == NULL)
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+  jpeg_read_header(dinfo, TRUE);
+  if (getSubsamp(dinfo) == TJSAMP_UNKNOWN)
+    THROW("Could not determine subsampling level of JPEG image");
+  processFlags(handle, flags, COMPRESS);
+
+  if ((sizes = (size_t *)malloc(n * sizeof(size_t))) == NULL)
+    THROW("Memory allocation failure");
+  for (i = 0; i < n; i++)
+    sizes[i] = (size_t)dstSizes[i];
+  retval = tj3Transform(handle, jpegBuf, (size_t)jpegSize, n, dstBufs, sizes,
+                        t);
+  for (i = 0; i < n; i++)
+    dstSizes[i] = (unsigned long)sizes[i];
+
+bailout:
+  free(sizes);
+  return retval;
+}
+
+
+/*************************** Packed-Pixel Image I/O **************************/
+
+/* tj3LoadImage*() is implemented in turbojpeg-mp.c */
+
+/* TurboJPEG 2.0+ */
+DLLEXPORT unsigned char *tjLoadImage(const char *filename, int *width,
+                                     int align, int *height,
+                                     int *pixelFormat, int flags)
+{
+  tjhandle handle = NULL;
+  unsigned char *dstBuf = NULL;
+
+  if ((handle = tj3Init(TJINIT_COMPRESS)) == NULL) return NULL;
+
+  processFlags(handle, flags, COMPRESS);
+
+  dstBuf = tj3LoadImage8(handle, filename, width, align, height, pixelFormat);
+
+  tj3Destroy(handle);
+  return dstBuf;
+}
+
+
+/* tj3SaveImage*() is implemented in turbojpeg-mp.c */
+
+/* TurboJPEG 2.0+ */
+DLLEXPORT int tjSaveImage(const char *filename, unsigned char *buffer,
+                          int width, int pitch, int height, int pixelFormat,
+                          int flags)
+{
+  tjhandle handle = NULL;
+  int retval = -1;
+
+  if ((handle = tj3Init(TJINIT_DECOMPRESS)) == NULL) return -1;
+
+  processFlags(handle, flags, DECOMPRESS);
+
+  retval = tj3SaveImage8(handle, filename, buffer, width, pitch, height,
+                         pixelFormat);
+
+  tj3Destroy(handle);
+  return retval;
+}
diff --git a/3rdparty/libjpeg-turbo/src/turbojpeg.h b/3rdparty/libjpeg-turbo/src/turbojpeg.h
new file mode 100644
index 0000000000..68b88a4104
--- /dev/null
+++ b/3rdparty/libjpeg-turbo/src/turbojpeg.h
@@ -0,0 +1,2328 @@
+/*
+ * Copyright (C)2009-2015, 2017, 2020-2023 D. R. Commander.
+ *                                         All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __TURBOJPEG_H__
+#define __TURBOJPEG_H__
+
+#include <stddef.h>
+
+#if defined(_WIN32) && defined(DLLDEFINE)
+#define DLLEXPORT  __declspec(dllexport)
+#else
+#define DLLEXPORT
+#endif
+#define DLLCALL
+
+
+/**
+ * @addtogroup TurboJPEG
+ * TurboJPEG API.  This API provides an interface for generating, decoding, and
+ * transforming planar YUV and JPEG images in memory.
+ *
+ * @anchor YUVnotes
+ * YUV Image Format Notes
+ * ----------------------
+ * Technically, the JPEG format uses the YCbCr colorspace (which is technically
+ * not a colorspace but a color transform), but per the convention of the
+ * digital video community, the TurboJPEG API uses "YUV" to refer to an image
+ * format consisting of Y, Cb, and Cr image planes.
+ *
+ * Each plane is simply a 2D array of bytes, each byte representing the value
+ * of one of the components (Y, Cb, or Cr) at a particular location in the
+ * image.  The width and height of each plane are determined by the image
+ * width, height, and level of chrominance subsampling.  The luminance plane
+ * width is the image width padded to the nearest multiple of the horizontal
+ * subsampling factor (1 in the case of 4:4:4, grayscale, 4:4:0, or 4:4:1; 2 in
+ * the case of 4:2:2 or 4:2:0; 4 in the case of 4:1:1.)  Similarly, the
+ * luminance plane height is the image height padded to the nearest multiple of
+ * the vertical subsampling factor (1 in the case of 4:4:4, 4:2:2, grayscale,
+ * or 4:1:1; 2 in the case of 4:2:0 or 4:4:0; 4 in the case of 4:4:1.)  This is
+ * irrespective of any additional padding that may be specified as an argument
+ * to the various YUV functions.  The chrominance plane width is equal to the
+ * luminance plane width divided by the horizontal subsampling factor, and the
+ * chrominance plane height is equal to the luminance plane height divided by
+ * the vertical subsampling factor.
+ *
+ * For example, if the source image is 35 x 35 pixels and 4:2:2 subsampling is
+ * used, then the luminance plane would be 36 x 35 bytes, and each of the
+ * chrominance planes would be 18 x 35 bytes.  If you specify a row alignment
+ * of 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes,
+ * and each of the chrominance planes would be 20 x 35 bytes.
+ *
+ * @{
+ */
+
+
+/**
+ * The number of initialization options
+ */
+#define TJ_NUMINIT  3
+
+/**
+ * Initialization options.
+ */
+enum TJINIT {
+  /**
+   * Initialize the TurboJPEG instance for compression.
+   */
+  TJINIT_COMPRESS,
+  /**
+   * Initialize the TurboJPEG instance for decompression.
+   */
+  TJINIT_DECOMPRESS,
+  /**
+   * Initialize the TurboJPEG instance for lossless transformation (both
+   * compression and decompression.)
+   */
+  TJINIT_TRANSFORM
+};
+
+
+/**
+ * The number of chrominance subsampling options
+ */
+#define TJ_NUMSAMP  7
+
+/**
+ * Chrominance subsampling options.
+ * When pixels are converted from RGB to YCbCr (see #TJCS_YCbCr) or from CMYK
+ * to YCCK (see #TJCS_YCCK) as part of the JPEG compression process, some of
+ * the Cb and Cr (chrominance) components can be discarded or averaged together
+ * to produce a smaller image with little perceptible loss of image clarity.
+ * (The human eye is more sensitive to small changes in brightness than to
+ * small changes in color.)  This is called "chrominance subsampling".
+ */
+enum TJSAMP {
+  /**
+   * 4:4:4 chrominance subsampling (no chrominance subsampling).  The JPEG or
+   * YUV image will contain one chrominance component for every pixel in the
+   * source image.
+   */
+  TJSAMP_444,
+  /**
+   * 4:2:2 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 2x1 block of pixels in the source image.
+   */
+  TJSAMP_422,
+  /**
+   * 4:2:0 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 2x2 block of pixels in the source image.
+   */
+  TJSAMP_420,
+  /**
+   * Grayscale.  The JPEG or YUV image will contain no chrominance components.
+   */
+  TJSAMP_GRAY,
+  /**
+   * 4:4:0 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 1x2 block of pixels in the source image.
+   *
+   * @note 4:4:0 subsampling is not fully accelerated in libjpeg-turbo.
+   */
+  TJSAMP_440,
+  /**
+   * 4:1:1 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 4x1 block of pixels in the source image.
+   * JPEG images compressed with 4:1:1 subsampling will be almost exactly the
+   * same size as those compressed with 4:2:0 subsampling, and in the
+   * aggregate, both subsampling methods produce approximately the same
+   * perceptual quality.  However, 4:1:1 is better able to reproduce sharp
+   * horizontal features.
+   *
+   * @note 4:1:1 subsampling is not fully accelerated in libjpeg-turbo.
+   */
+  TJSAMP_411,
+  /**
+   * 4:4:1 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 1x4 block of pixels in the source image.
+   * JPEG images compressed with 4:4:1 subsampling will be almost exactly the
+   * same size as those compressed with 4:2:0 subsampling, and in the
+   * aggregate, both subsampling methods produce approximately the same
+   * perceptual quality.  However, 4:4:1 is better able to reproduce sharp
+   * vertical features.
+   *
+   * @note 4:4:1 subsampling is not fully accelerated in libjpeg-turbo.
+   */
+  TJSAMP_441,
+  /**
+   * Unknown subsampling.  The JPEG image uses an unusual type of chrominance
+   * subsampling.  Such images can be decompressed into packed-pixel images,
+   * but they cannot be
+   * - decompressed into planar YUV images,
+   * - losslessly transformed if #TJXOPT_CROP is specified, or
+   * - partially decompressed using a cropping region.
+   */
+  TJSAMP_UNKNOWN = -1
+};
+
+/**
+ * MCU block width (in pixels) for a given level of chrominance subsampling.
+ * MCU block sizes:
+ * - 8x8 for no subsampling or grayscale
+ * - 16x8 for 4:2:2
+ * - 8x16 for 4:4:0
+ * - 16x16 for 4:2:0
+ * - 32x8 for 4:1:1
+ * - 8x32 for 4:4:1
+ */
+static const int tjMCUWidth[TJ_NUMSAMP]  = { 8, 16, 16, 8, 8, 32, 8 };
+
+/**
+ * MCU block height (in pixels) for a given level of chrominance subsampling.
+ * MCU block sizes:
+ * - 8x8 for no subsampling or grayscale
+ * - 16x8 for 4:2:2
+ * - 8x16 for 4:4:0
+ * - 16x16 for 4:2:0
+ * - 32x8 for 4:1:1
+ * - 8x32 for 4:4:1
+ */
+static const int tjMCUHeight[TJ_NUMSAMP] = { 8, 8, 16, 8, 16, 8, 32 };
+
+
+/**
+ * The number of pixel formats
+ */
+#define TJ_NUMPF  12
+
+/**
+ * Pixel formats
+ */
+enum TJPF {
+  /**
+   * RGB pixel format.  The red, green, and blue components in the image are
+   * stored in 3-sample pixels in the order R, G, B from lowest to highest
+   * memory address within each pixel.
+   */
+  TJPF_RGB,
+  /**
+   * BGR pixel format.  The red, green, and blue components in the image are
+   * stored in 3-sample pixels in the order B, G, R from lowest to highest
+   * memory address within each pixel.
+   */
+  TJPF_BGR,
+  /**
+   * RGBX pixel format.  The red, green, and blue components in the image are
+   * stored in 4-sample pixels in the order R, G, B from lowest to highest
+   * memory address within each pixel.  The X component is ignored when
+   * compressing and undefined when decompressing.
+   */
+  TJPF_RGBX,
+  /**
+   * BGRX pixel format.  The red, green, and blue components in the image are
+   * stored in 4-sample pixels in the order B, G, R from lowest to highest
+   * memory address within each pixel.  The X component is ignored when
+   * compressing and undefined when decompressing.
+   */
+  TJPF_BGRX,
+  /**
+   * XBGR pixel format.  The red, green, and blue components in the image are
+   * stored in 4-sample pixels in the order R, G, B from highest to lowest
+   * memory address within each pixel.  The X component is ignored when
+   * compressing and undefined when decompressing.
+   */
+  TJPF_XBGR,
+  /**
+   * XRGB pixel format.  The red, green, and blue components in the image are
+   * stored in 4-sample pixels in the order B, G, R from highest to lowest
+   * memory address within each pixel.  The X component is ignored when
+   * compressing and undefined when decompressing.
+   */
+  TJPF_XRGB,
+  /**
+   * Grayscale pixel format.  Each 1-sample pixel represents a luminance
+   * (brightness) level from 0 to the maximum sample value (255 for 8-bit
+   * samples, 4095 for 12-bit samples, and 65535 for 16-bit samples.)
+   */
+  TJPF_GRAY,
+  /**
+   * RGBA pixel format.  This is the same as @ref TJPF_RGBX, except that when
+   * decompressing, the X component is guaranteed to be equal to the maximum
+   * sample value, which can be interpreted as an opaque alpha channel.
+   */
+  TJPF_RGBA,
+  /**
+   * BGRA pixel format.  This is the same as @ref TJPF_BGRX, except that when
+   * decompressing, the X component is guaranteed to be equal to the maximum
+   * sample value, which can be interpreted as an opaque alpha channel.
+   */
+  TJPF_BGRA,
+  /**
+   * ABGR pixel format.  This is the same as @ref TJPF_XBGR, except that when
+   * decompressing, the X component is guaranteed to be equal to the maximum
+   * sample value, which can be interpreted as an opaque alpha channel.
+   */
+  TJPF_ABGR,
+  /**
+   * ARGB pixel format.  This is the same as @ref TJPF_XRGB, except that when
+   * decompressing, the X component is guaranteed to be equal to the maximum
+   * sample value, which can be interpreted as an opaque alpha channel.
+   */
+  TJPF_ARGB,
+  /**
+   * CMYK pixel format.  Unlike RGB, which is an additive color model used
+   * primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive
+   * color model used primarily for printing.  In the CMYK color model, the
+   * value of each color component typically corresponds to an amount of cyan,
+   * magenta, yellow, or black ink that is applied to a white background.  In
+   * order to convert between CMYK and RGB, it is necessary to use a color
+   * management system (CMS.)  A CMS will attempt to map colors within the
+   * printer's gamut to perceptually similar colors in the display's gamut and
+   * vice versa, but the mapping is typically not 1:1 or reversible, nor can it
+   * be defined with a simple formula.  Thus, such a conversion is out of scope
+   * for a codec library.  However, the TurboJPEG API allows for compressing
+   * packed-pixel CMYK images into YCCK JPEG images (see #TJCS_YCCK) and
+   * decompressing YCCK JPEG images into packed-pixel CMYK images.
+   */
+  TJPF_CMYK,
+  /**
+   * Unknown pixel format.  Currently this is only used by #tj3LoadImage8(),
+   * #tj3LoadImage12(), and #tj3LoadImage16().
+   */
+  TJPF_UNKNOWN = -1
+};
+
+/**
+ * Red offset (in samples) for a given pixel format.  This specifies the number
+ * of samples that the red component is offset from the start of the pixel.
+ * For instance, if an 8-bit-per-component pixel of format TJPF_BGRX is stored
+ * in `unsigned char pixel[]`, then the red component will be
+ * `pixel[tjRedOffset[TJPF_BGRX]]`.  This will be -1 if the pixel format does
+ * not have a red component.
+ */
+static const int tjRedOffset[TJ_NUMPF] = {
+  0, 2, 0, 2, 3, 1, -1, 0, 2, 3, 1, -1
+};
+/**
+ * Green offset (in samples) for a given pixel format.  This specifies the
+ * number of samples that the green component is offset from the start of the
+ * pixel.  For instance, if an 8-bit-per-component pixel of format TJPF_BGRX is
+ * stored in `unsigned char pixel[]`, then the green component will be
+ * `pixel[tjGreenOffset[TJPF_BGRX]]`.  This will be -1 if the pixel format does
+ * not have a green component.
+ */
+static const int tjGreenOffset[TJ_NUMPF] = {
+  1, 1, 1, 1, 2, 2, -1, 1, 1, 2, 2, -1
+};
+/**
+ * Blue offset (in samples) for a given pixel format.  This specifies the
+ * number of samples that the blue component is offset from the start of the
+ * pixel.  For instance, if an 8-bit-per-component pixel of format TJPF_BGRX is
+ * stored in `unsigned char pixel[]`, then the blue component will be
+ * `pixel[tjBlueOffset[TJPF_BGRX]]`.  This will be -1 if the pixel format does
+ * not have a blue component.
+ */
+static const int tjBlueOffset[TJ_NUMPF] = {
+  2, 0, 2, 0, 1, 3, -1, 2, 0, 1, 3, -1
+};
+/**
+ * Alpha offset (in samples) for a given pixel format.  This specifies the
+ * number of samples that the alpha component is offset from the start of the
+ * pixel.  For instance, if an 8-bit-per-component pixel of format TJPF_BGRA is
+ * stored in `unsigned char pixel[]`, then the alpha component will be
+ * `pixel[tjAlphaOffset[TJPF_BGRA]]`.  This will be -1 if the pixel format does
+ * not have an alpha component.
+ */
+static const int tjAlphaOffset[TJ_NUMPF] = {
+  -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1
+};
+/**
+ * Pixel size (in samples) for a given pixel format
+ */
+static const int tjPixelSize[TJ_NUMPF] = {
+  3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4
+};
+
+
+/**
+ * The number of JPEG colorspaces
+ */
+#define TJ_NUMCS  5
+
+/**
+ * JPEG colorspaces
+ */
+enum TJCS {
+  /**
+   * RGB colorspace.  When compressing the JPEG image, the R, G, and B
+   * components in the source image are reordered into image planes, but no
+   * colorspace conversion or subsampling is performed.  RGB JPEG images can be
+   * compressed from and decompressed to packed-pixel images with any of the
+   * extended RGB or grayscale pixel formats, but they cannot be compressed
+   * from or decompressed to planar YUV images.
+   */
+  TJCS_RGB,
+  /**
+   * YCbCr colorspace.  YCbCr is not an absolute colorspace but rather a
+   * mathematical transformation of RGB designed solely for storage and
+   * transmission.  YCbCr images must be converted to RGB before they can
+   * actually be displayed.  In the YCbCr colorspace, the Y (luminance)
+   * component represents the black & white portion of the original image, and
+   * the Cb and Cr (chrominance) components represent the color portion of the
+   * original image.  Originally, the analog equivalent of this transformation
+   * allowed the same signal to drive both black & white and color televisions,
+   * but JPEG images use YCbCr primarily because it allows the color data to be
+   * optionally subsampled for the purposes of reducing network or disk usage.
+   * YCbCr is the most common JPEG colorspace, and YCbCr JPEG images can be
+   * compressed from and decompressed to packed-pixel images with any of the
+   * extended RGB or grayscale pixel formats.  YCbCr JPEG images can also be
+   * compressed from and decompressed to planar YUV images.
+   */
+  TJCS_YCbCr,
+  /**
+   * Grayscale colorspace.  The JPEG image retains only the luminance data (Y
+   * component), and any color data from the source image is discarded.
+   * Grayscale JPEG images can be compressed from and decompressed to
+   * packed-pixel images with any of the extended RGB or grayscale pixel
+   * formats, or they can be compressed from and decompressed to planar YUV
+   * images.
+   */
+  TJCS_GRAY,
+  /**
+   * CMYK colorspace.  When compressing the JPEG image, the C, M, Y, and K
+   * components in the source image are reordered into image planes, but no
+   * colorspace conversion or subsampling is performed.  CMYK JPEG images can
+   * only be compressed from and decompressed to packed-pixel images with the
+   * CMYK pixel format.
+   */
+  TJCS_CMYK,
+  /**
+   * YCCK colorspace.  YCCK (AKA "YCbCrK") is not an absolute colorspace but
+   * rather a mathematical transformation of CMYK designed solely for storage
+   * and transmission.  It is to CMYK as YCbCr is to RGB.  CMYK pixels can be
+   * reversibly transformed into YCCK, and as with YCbCr, the chrominance
+   * components in the YCCK pixels can be subsampled without incurring major
+   * perceptual loss.  YCCK JPEG images can only be compressed from and
+   * decompressed to packed-pixel images with the CMYK pixel format.
+   */
+  TJCS_YCCK
+};
+
+
+/**
+ * Parameters
+ */
+enum TJPARAM {
+  /**
+   * Error handling behavior
+   *
+   * **Value**
+   * - `0` *[default]* Allow the current compression/decompression/transform
+   * operation to complete unless a fatal error is encountered.
+   * - `1` Immediately discontinue the current
+   * compression/decompression/transform operation if a warning (non-fatal
+   * error) occurs.
+   */
+  TJPARAM_STOPONWARNING,
+  /**
+   * Row order in packed-pixel source/destination images
+   *
+   * **Value**
+   * - `0` *[default]* top-down (X11) order
+   * - `1` bottom-up (Windows, OpenGL) order
+   */
+  TJPARAM_BOTTOMUP,
+  /**
+   * JPEG destination buffer (re)allocation [compression, lossless
+   * transformation]
+   *
+   * **Value**
+   * - `0` *[default]* Attempt to allocate or reallocate the JPEG destination
+   * buffer as needed.
+   * - `1` Generate an error if the JPEG destination buffer is invalid or too
+   * small.
+   */
+  TJPARAM_NOREALLOC,
+  /**
+   * Perceptual quality of lossy JPEG images [compression only]
+   *
+   * **Value**
+   * - `1`-`100` (`1` = worst quality but best compression, `100` = best
+   * quality but worst compression) *[no default; must be explicitly
+   * specified]*
+   */
+  TJPARAM_QUALITY,
+  /**
+   * Chrominance subsampling level
+   *
+   * The JPEG or YUV image uses (decompression, decoding) or will use (lossy
+   * compression, encoding) the specified level of chrominance subsampling.
+   *
+   * **Value**
+   * - One of the @ref TJSAMP "chrominance subsampling options" *[no default;
+   * must be explicitly specified for lossy compression, encoding, and
+   * decoding]*
+   */
+  TJPARAM_SUBSAMP,
+  /**
+   * JPEG width (in pixels) [decompression only, read-only]
+   */
+  TJPARAM_JPEGWIDTH,
+  /**
+   * JPEG height (in pixels) [decompression only, read-only]
+   */
+  TJPARAM_JPEGHEIGHT,
+  /**
+   * JPEG data precision (bits per sample) [decompression only, read-only]
+   *
+   * The JPEG image uses the specified number of bits per sample.
+   *
+   * **Value**
+   * - `8`, `12`, or `16`
+   *
+   * 12-bit data precision implies #TJPARAM_OPTIMIZE unless #TJPARAM_ARITHMETIC
+   * is set.
+   */
+  TJPARAM_PRECISION,
+  /**
+   * JPEG colorspace
+   *
+   * The JPEG image uses (decompression) or will use (lossy compression) the
+   * specified colorspace.
+   *
+   * **Value**
+   * - One of the @ref TJCS "JPEG colorspaces" *[default for lossy compression:
+   * automatically selected based on the subsampling level and pixel format]*
+   */
+  TJPARAM_COLORSPACE,
+  /**
+   * Chrominance upsampling algorithm [lossy decompression only]
+   *
+   * **Value**
+   * - `0` *[default]* Use smooth upsampling when decompressing a JPEG image
+   * that was compressed using chrominance subsampling.  This creates a smooth
+   * transition between neighboring chrominance components in order to reduce
+   * upsampling artifacts in the decompressed image.
+   * - `1` Use the fastest chrominance upsampling algorithm available, which
+   * may combine upsampling with color conversion.
+   */
+  TJPARAM_FASTUPSAMPLE,
+  /**
+   * DCT/IDCT algorithm [lossy compression and decompression]
+   *
+   * **Value**
+   * - `0` *[default]* Use the most accurate DCT/IDCT algorithm available.
+   * - `1` Use the fastest DCT/IDCT algorithm available.
+   *
+   * This parameter is provided mainly for backward compatibility with libjpeg,
+   * which historically implemented several different DCT/IDCT algorithms
+   * because of performance limitations with 1990s CPUs.  In the libjpeg-turbo
+   * implementation of the TurboJPEG API:
+   * - The "fast" and "accurate" DCT/IDCT algorithms perform similarly on
+   * modern x86/x86-64 CPUs that support AVX2 instructions.
+   * - The "fast" algorithm is generally only about 5-15% faster than the
+   * "accurate" algorithm on other types of CPUs.
+   * - The difference in accuracy between the "fast" and "accurate" algorithms
+   * is the most pronounced at JPEG quality levels above 90 and tends to be
+   * more pronounced with decompression than with compression.
+   * - The "fast" algorithm degrades and is not fully accelerated for JPEG
+   * quality levels above 97, so it will be slower than the "accurate"
+   * algorithm.
+   */
+  TJPARAM_FASTDCT,
+  /**
+   * Optimized baseline entropy coding [lossy compression only]
+   *
+   * **Value**
+   * - `0` *[default]* The JPEG image will use the default Huffman tables.
+   * - `1` Optimal Huffman tables will be computed for the JPEG image.  For
+   * lossless transformation, this can also be specified using
+   * #TJXOPT_OPTIMIZE.
+   *
+   * Optimized baseline entropy coding will improve compression slightly
+   * (generally 5% or less), but it will reduce compression performance
+   * considerably.
+   */
+  TJPARAM_OPTIMIZE,
+  /**
+   * Progressive entropy coding
+   *
+   * **Value**
+   * - `0` *[default for compression, lossless transformation]* The lossy JPEG
+   * image uses (decompression) or will use (compression, lossless
+   * transformation) baseline entropy coding.
+   * - `1` The lossy JPEG image uses (decompression) or will use (compression,
+   * lossless transformation) progressive entropy coding.  For lossless
+   * transformation, this can also be specified using #TJXOPT_PROGRESSIVE.
+   *
+   * Progressive entropy coding will generally improve compression relative to
+   * baseline entropy coding, but it will reduce compression and decompression
+   * performance considerably.  Can be combined with #TJPARAM_ARITHMETIC.
+   * Implies #TJPARAM_OPTIMIZE unless #TJPARAM_ARITHMETIC is also set.
+   */
+  TJPARAM_PROGRESSIVE,
+  /**
+   * Progressive JPEG scan limit for lossy JPEG images [decompression, lossless
+   * transformation]
+   *
+   * Setting this parameter will cause the decompression and transform
+   * functions to return an error if the number of scans in a progressive JPEG
+   * image exceeds the specified limit.  The primary purpose of this is to
+   * allow security-critical applications to guard against an exploit of the
+   * progressive JPEG format described in
+   * <a href="https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf" target="_blank">this report</a>.
+   *
+   * **Value**
+   * - maximum number of progressive JPEG scans that the decompression and
+   * transform functions will process *[default: `0` (no limit)]*
+   *
+   * @see #TJPARAM_PROGRESSIVE
+   */
+  TJPARAM_SCANLIMIT,
+  /**
+   * Arithmetic entropy coding
+   *
+   * **Value**
+   * - `0` *[default for compression, lossless transformation]* The lossy JPEG
+   * image uses (decompression) or will use (compression, lossless
+   * transformation) Huffman entropy coding.
+   * - `1` The lossy JPEG image uses (decompression) or will use (compression,
+   * lossless transformation) arithmetic entropy coding.  For lossless
+   * transformation, this can also be specified using #TJXOPT_ARITHMETIC.
+   *
+   * Arithmetic entropy coding will generally improve compression relative to
+   * Huffman entropy coding, but it will reduce compression and decompression
+   * performance considerably.  Can be combined with #TJPARAM_PROGRESSIVE.
+   */
+  TJPARAM_ARITHMETIC,
+  /**
+   * Lossless JPEG
+   *
+   * **Value**
+   * - `0` *[default for compression]* The JPEG image is (decompression) or
+   * will be (compression) lossy/DCT-based.
+   * - `1` The JPEG image is (decompression) or will be (compression)
+   * lossless/predictive.
+   *
+   * In most cases, compressing and decompressing lossless JPEG images is
+   * considerably slower than compressing and decompressing lossy JPEG images,
+   * and lossless JPEG images are much larger than lossy JPEG images.  Thus,
+   * lossless JPEG images are typically used only for applications that require
+   * mathematically lossless compression.  Also note that the following
+   * features are not available with lossless JPEG images:
+   * - Colorspace conversion (lossless JPEG images always use #TJCS_RGB,
+   * #TJCS_GRAY, or #TJCS_CMYK, depending on the pixel format of the source
+   * image)
+   * - Chrominance subsampling (lossless JPEG images always use #TJSAMP_444)
+   * - JPEG quality selection
+   * - DCT/IDCT algorithm selection
+   * - Progressive entropy coding
+   * - Arithmetic entropy coding
+   * - Compression from/decompression to planar YUV images
+   * - Decompression scaling
+   * - Lossless transformation
+   *
+   * @see #TJPARAM_LOSSLESSPSV, #TJPARAM_LOSSLESSPT
+   */
+  TJPARAM_LOSSLESS,
+  /**
+   * Lossless JPEG predictor selection value (PSV)
+   *
+   * **Value**
+   * - `1`-`7` *[default for compression: `1`]*
+   *
+   * Lossless JPEG compression shares no algorithms with lossy JPEG
+   * compression.  Instead, it uses differential pulse-code modulation (DPCM),
+   * an algorithm whereby each sample is encoded as the difference between the
+   * sample's value and a "predictor", which is based on the values of
+   * neighboring samples.  If Ra is the sample immediately to the left of the
+   * current sample, Rb is the sample immediately above the current sample, and
+   * Rc is the sample diagonally to the left and above the current sample, then
+   * the relationship between the predictor selection value and the predictor
+   * is as follows:
+   *
+   * PSV | Predictor
+   * ----|----------
+   * 1   | Ra
+   * 2   | Rb
+   * 3   | Rc
+   * 4   | Ra + Rb – Rc
+   * 5   | Ra + (Rb – Rc) / 2
+   * 6   | Rb + (Ra – Rc) / 2
+   * 7   | (Ra + Rb) / 2
+   *
+   * Predictors 1-3 are 1-dimensional predictors, whereas Predictors 4-7 are
+   * 2-dimensional predictors.  The best predictor for a particular image
+   * depends on the image.
+   *
+   * @see #TJPARAM_LOSSLESS
+   */
+  TJPARAM_LOSSLESSPSV,
+  /**
+   * Lossless JPEG point transform (Pt)
+   *
+   * **Value**
+   * - `0` through ***precision*** *- 1*, where ***precision*** is the JPEG
+   * data precision in bits *[default for compression: `0`]*
+   *
+   * A point transform value of `0` is necessary in order to generate a fully
+   * lossless JPEG image.  (A non-zero point transform value right-shifts the
+   * input samples by the specified number of bits, which is effectively a form
+   * of lossy color quantization.)
+   *
+   * @see #TJPARAM_LOSSLESS, #TJPARAM_PRECISION
+   */
+  TJPARAM_LOSSLESSPT,
+  /**
+   * JPEG restart marker interval in MCU blocks (lossy) or samples (lossless)
+   * [compression only]
+   *
+   * The nature of entropy coding is such that a corrupt JPEG image cannot
+   * be decompressed beyond the point of corruption unless it contains restart
+   * markers.  A restart marker stops and restarts the entropy coding algorithm
+   * so that, if a JPEG image is corrupted, decompression can resume at the
+   * next marker.  Thus, adding more restart markers improves the fault
+   * tolerance of the JPEG image, but adding too many restart markers can
+   * adversely affect the compression ratio and performance.
+   *
+   * **Value**
+   * - the number of MCU blocks or samples between each restart marker
+   * *[default: `0` (no restart markers)]*
+   *
+   * Setting this parameter to a non-zero value sets #TJPARAM_RESTARTROWS to 0.
+   */
+  TJPARAM_RESTARTBLOCKS,
+  /**
+   * JPEG restart marker interval in MCU rows (lossy) or sample rows (lossless)
+   * [compression only]
+   *
+   * See #TJPARAM_RESTARTBLOCKS for a description of restart markers.
+   *
+   * **Value**
+   * - the number of MCU rows or sample rows between each restart marker
+   * *[default: `0` (no restart markers)]*
+   *
+   * Setting this parameter to a non-zero value sets #TJPARAM_RESTARTBLOCKS to
+   * 0.
+   */
+  TJPARAM_RESTARTROWS,
+  /**
+   * JPEG horizontal pixel density
+   *
+   * **Value**
+   * - The JPEG image has (decompression) or will have (compression) the
+   * specified horizontal pixel density *[default for compression: `1`]*.
+   *
+   * This value is stored in or read from the JPEG header.  It does not affect
+   * the contents of the JPEG image.  Note that this parameter is set by
+   * #tj3LoadImage8() when loading a Windows BMP file that contains pixel
+   * density information, and the value of this parameter is stored to a
+   * Windows BMP file by #tj3SaveImage8() if the value of #TJPARAM_DENSITYUNITS
+   * is `2`.
+   *
+   * @see TJPARAM_DENSITYUNITS
+   */
+  TJPARAM_XDENSITY,
+  /**
+   * JPEG vertical pixel density
+   *
+   * **Value**
+   * - The JPEG image has (decompression) or will have (compression) the
+   * specified vertical pixel density *[default for compression: `1`]*.
+   *
+   * This value is stored in or read from the JPEG header.  It does not affect
+   * the contents of the JPEG image.  Note that this parameter is set by
+   * #tj3LoadImage8() when loading a Windows BMP file that contains pixel
+   * density information, and the value of this parameter is stored to a
+   * Windows BMP file by #tj3SaveImage8() if the value of #TJPARAM_DENSITYUNITS
+   * is `2`.
+   *
+   * @see TJPARAM_DENSITYUNITS
+   */
+  TJPARAM_YDENSITY,
+  /**
+   * JPEG pixel density units
+   *
+   * **Value**
+   * - `0` *[default for compression]* The pixel density of the JPEG image is
+   * expressed (decompression) or will be expressed (compression) in unknown
+   * units.
+   * - `1` The pixel density of the JPEG image is expressed (decompression) or
+   * will be expressed (compression) in units of pixels/inch.
+   * - `2` The pixel density of the JPEG image is expressed (decompression) or
+   * will be expressed (compression) in units of pixels/cm.
+   *
+   * This value is stored in or read from the JPEG header.  It does not affect
+   * the contents of the JPEG image.  Note that this parameter is set by
+   * #tj3LoadImage8() when loading a Windows BMP file that contains pixel
+   * density information, and the value of this parameter is stored to a
+   * Windows BMP file by #tj3SaveImage8() if the value is `2`.
+   *
+   * @see TJPARAM_XDENSITY, TJPARAM_YDENSITY
+   */
+  TJPARAM_DENSITYUNITS,
+  /**
+   * Memory limit for intermediate buffers
+   *
+   * **Value**
+   * - the maximum amount of memory (in megabytes) that will be allocated for
+   * intermediate buffers, which are used with progressive JPEG compression and
+   * decompression, optimized baseline entropy coding, lossless JPEG
+   * compression, and lossless transformation *[default: `0` (no limit)]*
+   */
+  TJPARAM_MAXMEMORY,
+  /**
+   * Image size limit [decompression, lossless transformation, packed-pixel
+   * image loading]
+   *
+   * Setting this parameter will cause the decompression, transform, and image
+   * loading functions to return an error if the number of pixels in the source
+   * image exceeds the specified limit.  This allows security-critical
+   * applications to guard against excessive memory consumption.
+   *
+   * **Value**
+   * - maximum number of pixels that the decompression, transform, and image
+   * loading functions will process *[default: `0` (no limit)]*
+   */
+  TJPARAM_MAXPIXELS
+};
+
+
+/**
+ * The number of error codes
+ */
+#define TJ_NUMERR  2
+
+/**
+ * Error codes
+ */
+enum TJERR {
+  /**
+   * The error was non-fatal and recoverable, but the destination image may
+   * still be corrupt.
+   */
+  TJERR_WARNING,
+  /**
+   * The error was fatal and non-recoverable.
+   */
+  TJERR_FATAL
+};
+
+
+/**
+ * The number of transform operations
+ */
+#define TJ_NUMXOP  8
+
+/**
+ * Transform operations for #tj3Transform()
+ */
+enum TJXOP {
+  /**
+   * Do not transform the position of the image pixels
+   */
+  TJXOP_NONE,
+  /**
+   * Flip (mirror) image horizontally.  This transform is imperfect if there
+   * are any partial MCU blocks on the right edge (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_HFLIP,
+  /**
+   * Flip (mirror) image vertically.  This transform is imperfect if there are
+   * any partial MCU blocks on the bottom edge (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_VFLIP,
+  /**
+   * Transpose image (flip/mirror along upper left to lower right axis.)  This
+   * transform is always perfect.
+   */
+  TJXOP_TRANSPOSE,
+  /**
+   * Transverse transpose image (flip/mirror along upper right to lower left
+   * axis.)  This transform is imperfect if there are any partial MCU blocks in
+   * the image (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_TRANSVERSE,
+  /**
+   * Rotate image clockwise by 90 degrees.  This transform is imperfect if
+   * there are any partial MCU blocks on the bottom edge (see
+   * #TJXOPT_PERFECT.)
+   */
+  TJXOP_ROT90,
+  /**
+   * Rotate image 180 degrees.  This transform is imperfect if there are any
+   * partial MCU blocks in the image (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_ROT180,
+  /**
+   * Rotate image counter-clockwise by 90 degrees.  This transform is imperfect
+   * if there are any partial MCU blocks on the right edge (see
+   * #TJXOPT_PERFECT.)
+   */
+  TJXOP_ROT270
+};
+
+
+/**
+ * This option will cause #tj3Transform() to return an error if the transform
+ * is not perfect.  Lossless transforms operate on MCU blocks, whose size
+ * depends on the level of chrominance subsampling used (see #tjMCUWidth and
+ * #tjMCUHeight.)  If the image's width or height is not evenly divisible by
+ * the MCU block size, then there will be partial MCU blocks on the right
+ * and/or bottom edges.  It is not possible to move these partial MCU blocks to
+ * the top or left of the image, so any transform that would require that is
+ * "imperfect."  If this option is not specified, then any partial MCU blocks
+ * that cannot be transformed will be left in place, which will create
+ * odd-looking strips on the right or bottom edge of the image.
+ */
+#define TJXOPT_PERFECT  (1 << 0)
+/**
+ * This option will cause #tj3Transform() to discard any partial MCU blocks
+ * that cannot be transformed.
+ */
+#define TJXOPT_TRIM  (1 << 1)
+/**
+ * This option will enable lossless cropping.  See #tj3Transform() for more
+ * information.
+ */
+#define TJXOPT_CROP  (1 << 2)
+/**
+ * This option will discard the color data in the source image and produce a
+ * grayscale destination image.
+ */
+#define TJXOPT_GRAY  (1 << 3)
+/**
+ * This option will prevent #tj3Transform() from outputting a JPEG image for
+ * this particular transform.  (This can be used in conjunction with a custom
+ * filter to capture the transformed DCT coefficients without transcoding
+ * them.)
+ */
+#define TJXOPT_NOOUTPUT  (1 << 4)
+/**
+ * This option will enable progressive entropy coding in the JPEG image
+ * generated by this particular transform.  Progressive entropy coding will
+ * generally improve compression relative to baseline entropy coding (the
+ * default), but it will reduce decompression performance considerably.
+ * Can be combined with #TJXOPT_ARITHMETIC.  Implies #TJXOPT_OPTIMIZE unless
+ * #TJXOPT_ARITHMETIC is also specified.
+ */
+#define TJXOPT_PROGRESSIVE  (1 << 5)
+/**
+ * This option will prevent #tj3Transform() from copying any extra markers
+ * (including EXIF and ICC profile data) from the source image to the
+ * destination image.
+ */
+#define TJXOPT_COPYNONE  (1 << 6)
+/**
+ * This option will enable arithmetic entropy coding in the JPEG image
+ * generated by this particular transform.  Arithmetic entropy coding will
+ * generally improve compression relative to Huffman entropy coding (the
+ * default), but it will reduce decompression performance considerably.  Can be
+ * combined with #TJXOPT_PROGRESSIVE.
+ */
+#define TJXOPT_ARITHMETIC  (1 << 7)
+/**
+ * This option will enable optimized baseline entropy coding in the JPEG image
+ * generated by this particular transform.  Optimized baseline entropy coding
+ * will improve compression slightly (generally 5% or less.)
+ */
+#define TJXOPT_OPTIMIZE  (1 << 8)
+
+
+/**
+ * Scaling factor
+ */
+typedef struct {
+  /**
+   * Numerator
+   */
+  int num;
+  /**
+   * Denominator
+   */
+  int denom;
+} tjscalingfactor;
+
+/**
+ * Cropping region
+ */
+typedef struct {
+  /**
+   * The left boundary of the cropping region.  This must be evenly divisible
+   * by the MCU block width (see #tjMCUWidth.)
+   */
+  int x;
+  /**
+   * The upper boundary of the cropping region.  For lossless transformation,
+   * this must be evenly divisible by the MCU block height (see #tjMCUHeight.)
+   */
+  int y;
+  /**
+   * The width of the cropping region.  Setting this to 0 is the equivalent of
+   * setting it to the width of the source JPEG image - x.
+   */
+  int w;
+  /**
+   * The height of the cropping region.  Setting this to 0 is the equivalent of
+   * setting it to the height of the source JPEG image - y.
+   */
+  int h;
+} tjregion;
+
+/**
+ * A #tjregion structure that specifies no cropping
+ */
+static const tjregion TJUNCROPPED = { 0, 0, 0, 0 };
+
+/**
+ * Lossless transform
+ */
+typedef struct tjtransform {
+  /**
+   * Cropping region
+   */
+  tjregion r;
+  /**
+   * One of the @ref TJXOP "transform operations"
+   */
+  int op;
+  /**
+   * The bitwise OR of one of more of the @ref TJXOPT_ARITHMETIC
+   * "transform options"
+   */
+  int options;
+  /**
+   * Arbitrary data that can be accessed within the body of the callback
+   * function
+   */
+  void *data;
+  /**
+   * A callback function that can be used to modify the DCT coefficients after
+   * they are losslessly transformed but before they are transcoded to a new
+   * JPEG image.  This allows for custom filters or other transformations to be
+   * applied in the frequency domain.
+   *
+   * @param coeffs pointer to an array of transformed DCT coefficients.  (NOTE:
+   * this pointer is not guaranteed to be valid once the callback returns, so
+   * applications wishing to hand off the DCT coefficients to another function
+   * or library should make a copy of them within the body of the callback.)
+   *
+   * @param arrayRegion #tjregion structure containing the width and height of
+   * the array pointed to by `coeffs` as well as its offset relative to the
+   * component plane.  TurboJPEG implementations may choose to split each
+   * component plane into multiple DCT coefficient arrays and call the callback
+   * function once for each array.
+   *
+   * @param planeRegion #tjregion structure containing the width and height of
+   * the component plane to which `coeffs` belongs
+   *
+   * @param componentID ID number of the component plane to which `coeffs`
+   * belongs.  (Y, Cb, and Cr have, respectively, ID's of 0, 1, and 2 in
+   * typical JPEG images.)
+   *
+   * @param transformID ID number of the transformed image to which `coeffs`
+   * belongs.  This is the same as the index of the transform in the
+   * `transforms` array that was passed to #tj3Transform().
+   *
+   * @param transform a pointer to a #tjtransform structure that specifies the
+   * parameters and/or cropping region for this transform
+   *
+   * @return 0 if the callback was successful, or -1 if an error occurred.
+   */
+  int (*customFilter) (short *coeffs, tjregion arrayRegion,
+                       tjregion planeRegion, int componentID, int transformID,
+                       struct tjtransform *transform);
+} tjtransform;
+
+/**
+ * TurboJPEG instance handle
+ */
+typedef void *tjhandle;
+
+
+/**
+ * Compute the scaled value of `dimension` using the given scaling factor.
+ * This macro performs the integer equivalent of `ceil(dimension *
+ * scalingFactor)`.
+ */
+#define TJSCALED(dimension, scalingFactor) \
+  (((dimension) * scalingFactor.num + scalingFactor.denom - 1) / \
+   scalingFactor.denom)
+
+/**
+ * A #tjscalingfactor structure that specifies a scaling factor of 1/1 (no
+ * scaling)
+ */
+static const tjscalingfactor TJUNSCALED = { 1, 1 };
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ * Create a new TurboJPEG instance.
+ *
+ * @param initType one of the @ref TJINIT "initialization options"
+ *
+ * @return a handle to the newly-created instance, or NULL if an error occurred
+ * (see #tj3GetErrorStr().)
+ */
+DLLEXPORT tjhandle tj3Init(int initType);
+
+
+/**
+ * Set the value of a parameter.
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @param param one of the @ref TJPARAM "parameters"
+ *
+ * @param value value of the parameter (refer to @ref TJPARAM
+ * "parameter documentation")
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr().)
+ */
+DLLEXPORT int tj3Set(tjhandle handle, int param, int value);
+
+
+/**
+ * Get the value of a parameter.
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @param param one of the @ref TJPARAM "parameters"
+ *
+ * @return the value of the specified parameter, or -1 if the value is unknown.
+ */
+DLLEXPORT int tj3Get(tjhandle handle, int param);
+
+
+/**
+ * Compress an 8-bit-per-sample packed-pixel RGB, grayscale, or CMYK image into
+ * an 8-bit-per-sample JPEG image.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param srcBuf pointer to a buffer containing a packed-pixel RGB, grayscale,
+ * or CMYK source image to be compressed.  This buffer should normally be
+ * `pitch * height` samples in size.  However, you can also use this parameter
+ * to compress from a specific region of a larger buffer.
+ *
+ * @param width width (in pixels) of the source image
+ *
+ * @param pitch samples per row in the source image.  Normally this should be
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>, if the image is unpadded.
+ * (Setting this parameter to 0 is the equivalent of setting it to
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>.)  However, you can also use this
+ * parameter to specify the row alignment/padding of the source image, to skip
+ * rows, or to compress from a specific region of a larger buffer.
+ *
+ * @param height height (in pixels) of the source image
+ *
+ * @param pixelFormat pixel format of the source image (see @ref TJPF
+ * "Pixel formats".)
+ *
+ * @param jpegBuf address of a pointer to a byte buffer that will receive the
+ * JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer to
+ * accommodate the size of the JPEG image.  Thus, you can choose to:
+ * -# pre-allocate the JPEG buffer with an arbitrary size using #tj3Alloc() and
+ * let TurboJPEG grow the buffer as needed,
+ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you,
+ * or
+ * -# pre-allocate the buffer to a "worst case" size determined by calling
+ * #tj3JPEGBufSize().  This should ensure that the buffer never has to be
+ * re-allocated.  (Setting #TJPARAM_NOREALLOC guarantees that it won't be.)
+ * .
+ * If you choose option 1, then `*jpegSize` should be set to the size of your
+ * pre-allocated buffer.  In any case, unless you have set #TJPARAM_NOREALLOC,
+ * you should always check `*jpegBuf` upon return from this function, as it may
+ * have changed.
+ *
+ * @param jpegSize pointer to a size_t variable that holds the size of the JPEG
+ * buffer.  If `*jpegBuf` points to a pre-allocated buffer, then `*jpegSize`
+ * should be set to the size of the buffer.  Upon return, `*jpegSize` will
+ * contain the size of the JPEG image (in bytes.)  If `*jpegBuf` points to a
+ * JPEG buffer that is being reused from a previous call to one of the JPEG
+ * compression functions, then `*jpegSize` is ignored.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3Compress8(tjhandle handle, const unsigned char *srcBuf,
+                           int width, int pitch, int height, int pixelFormat,
+                           unsigned char **jpegBuf, size_t *jpegSize);
+
+/**
+ * Compress a 12-bit-per-sample packed-pixel RGB, grayscale, or CMYK image into
+ * a 12-bit-per-sample JPEG image.
+ *
+ * \details \copydetails tj3Compress8()
+ */
+DLLEXPORT int tj3Compress12(tjhandle handle, const short *srcBuf, int width,
+                            int pitch, int height, int pixelFormat,
+                            unsigned char **jpegBuf, size_t *jpegSize);
+
+/**
+ * Compress a 16-bit-per-sample packed-pixel RGB, grayscale, or CMYK image into
+ * a 16-bit-per-sample lossless JPEG image.
+ *
+ * \details \copydetails tj3Compress8()
+ */
+DLLEXPORT int tj3Compress16(tjhandle handle, const unsigned short *srcBuf,
+                            int width, int pitch, int height, int pixelFormat,
+                            unsigned char **jpegBuf, size_t *jpegSize);
+
+
+/**
+ * Compress an 8-bit-per-sample unified planar YUV image into an
+ * 8-bit-per-sample JPEG image.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param srcBuf pointer to a buffer containing a unified planar YUV source
+ * image to be compressed.  The size of this buffer should match the value
+ * returned by #tj3YUVBufSize() for the given image width, height, row
+ * alignment, and level of chrominance subsampling (see #TJPARAM_SUBSAMP.)  The
+ * Y, U (Cb), and V (Cr) image planes should be stored sequentially in the
+ * buffer.  (Refer to @ref YUVnotes "YUV Image Format Notes".)
+ *
+ * @param width width (in pixels) of the source image.  If the width is not an
+ * even multiple of the MCU block width (see #tjMCUWidth), then an intermediate
+ * buffer copy will be performed.
+ *
+ * @param align row alignment (in bytes) of the source image (must be a power
+ * of 2.)  Setting this parameter to n indicates that each row in each plane of
+ * the source image is padded to the nearest multiple of n bytes
+ * (1 = unpadded.)
+ *
+ * @param height height (in pixels) of the source image.  If the height is not
+ * an even multiple of the MCU block height (see #tjMCUHeight), then an
+ * intermediate buffer copy will be performed.
+ *
+ * @param jpegBuf address of a pointer to a byte buffer that will receive the
+ * JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer to
+ * accommodate the size of the JPEG image.  Thus, you can choose to:
+ * -# pre-allocate the JPEG buffer with an arbitrary size using #tj3Alloc() and
+ * let TurboJPEG grow the buffer as needed,
+ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you,
+ * or
+ * -# pre-allocate the buffer to a "worst case" size determined by calling
+ * #tj3JPEGBufSize().  This should ensure that the buffer never has to be
+ * re-allocated.  (Setting #TJPARAM_NOREALLOC guarantees that it won't be.)
+ * .
+ * If you choose option 1, then `*jpegSize` should be set to the size of your
+ * pre-allocated buffer.  In any case, unless you have set #TJPARAM_NOREALLOC,
+ * you should always check `*jpegBuf` upon return from this function, as it may
+ * have changed.
+ *
+ * @param jpegSize pointer to a size_t variable that holds the size of the JPEG
+ * buffer.  If `*jpegBuf` points to a pre-allocated buffer, then `*jpegSize`
+ * should be set to the size of the buffer.  Upon return, `*jpegSize` will
+ * contain the size of the JPEG image (in bytes.)  If `*jpegBuf` points to a
+ * JPEG buffer that is being reused from a previous call to one of the JPEG
+ * compression functions, then `*jpegSize` is ignored.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3CompressFromYUV8(tjhandle handle,
+                                  const unsigned char *srcBuf, int width,
+                                  int align, int height,
+                                  unsigned char **jpegBuf, size_t *jpegSize);
+
+
+/**
+ * Compress a set of 8-bit-per-sample Y, U (Cb), and V (Cr) image planes into
+ * an 8-bit-per-sample JPEG image.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param srcPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes
+ * (or just a Y plane, if compressing a grayscale image) that contain a YUV
+ * source image to be compressed.  These planes can be contiguous or
+ * non-contiguous in memory.  The size of each plane should match the value
+ * returned by #tj3YUVPlaneSize() for the given image width, height, strides,
+ * and level of chrominance subsampling (see #TJPARAM_SUBSAMP.)  Refer to
+ * @ref YUVnotes "YUV Image Format Notes" for more details.
+ *
+ * @param width width (in pixels) of the source image.  If the width is not an
+ * even multiple of the MCU block width (see #tjMCUWidth), then an intermediate
+ * buffer copy will be performed.
+ *
+ * @param strides an array of integers, each specifying the number of bytes per
+ * row in the corresponding plane of the YUV source image.  Setting the stride
+ * for any plane to 0 is the same as setting it to the plane width (see
+ * @ref YUVnotes "YUV Image Format Notes".)  If `strides` is NULL, then the
+ * strides for all planes will be set to their respective plane widths.  You
+ * can adjust the strides in order to specify an arbitrary amount of row
+ * padding in each plane or to create a JPEG image from a subregion of a larger
+ * planar YUV image.
+ *
+ * @param height height (in pixels) of the source image.  If the height is not
+ * an even multiple of the MCU block height (see #tjMCUHeight), then an
+ * intermediate buffer copy will be performed.
+ *
+ * @param jpegBuf address of a pointer to a byte buffer that will receive the
+ * JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer to
+ * accommodate the size of the JPEG image.  Thus, you can choose to:
+ * -# pre-allocate the JPEG buffer with an arbitrary size using #tj3Alloc() and
+ * let TurboJPEG grow the buffer as needed,
+ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you,
+ * or
+ * -# pre-allocate the buffer to a "worst case" size determined by calling
+ * #tj3JPEGBufSize().  This should ensure that the buffer never has to be
+ * re-allocated.  (Setting #TJPARAM_NOREALLOC guarantees that it won't be.)
+ * .
+ * If you choose option 1, then `*jpegSize` should be set to the size of your
+ * pre-allocated buffer.  In any case, unless you have set #TJPARAM_NOREALLOC,
+ * you should always check `*jpegBuf` upon return from this function, as it may
+ * have changed.
+ *
+ * @param jpegSize pointer to a size_t variable that holds the size of the JPEG
+ * buffer.  If `*jpegBuf` points to a pre-allocated buffer, then `*jpegSize`
+ * should be set to the size of the buffer.  Upon return, `*jpegSize` will
+ * contain the size of the JPEG image (in bytes.)  If `*jpegBuf` points to a
+ * JPEG buffer that is being reused from a previous call to one of the JPEG
+ * compression functions, then `*jpegSize` is ignored.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3CompressFromYUVPlanes8(tjhandle handle,
+                                        const unsigned char * const *srcPlanes,
+                                        int width, const int *strides,
+                                        int height, unsigned char **jpegBuf,
+                                        size_t *jpegSize);
+
+
+/**
+ * The maximum size of the buffer (in bytes) required to hold a JPEG image with
+ * the given parameters.  The number of bytes returned by this function is
+ * larger than the size of the uncompressed source image.  The reason for this
+ * is that the JPEG format uses 16-bit coefficients, so it is possible for a
+ * very high-quality source image with very high-frequency content to expand
+ * rather than compress when converted to the JPEG format.  Such images
+ * represent very rare corner cases, but since there is no way to predict the
+ * size of a JPEG image prior to compression, the corner cases have to be
+ * handled.
+ *
+ * @param width width (in pixels) of the image
+ *
+ * @param height height (in pixels) of the image
+ *
+ * @param jpegSubsamp the level of chrominance subsampling to be used when
+ * generating the JPEG image (see @ref TJSAMP
+ * "Chrominance subsampling options".)  #TJSAMP_UNKNOWN is treated like
+ * #TJSAMP_444, since a buffer large enough to hold a JPEG image with no
+ * subsampling should also be large enough to hold a JPEG image with an
+ * arbitrary level of subsampling.  Note that lossless JPEG images always
+ * use #TJSAMP_444.
+ *
+ * @return the maximum size of the buffer (in bytes) required to hold the
+ * image, or 0 if the arguments are out of bounds.
+ */
+DLLEXPORT size_t tj3JPEGBufSize(int width, int height, int jpegSubsamp);
+
+
+/**
+ * The size of the buffer (in bytes) required to hold a unified planar YUV
+ * image with the given parameters.
+ *
+ * @param width width (in pixels) of the image
+ *
+ * @param align row alignment (in bytes) of the image (must be a power of 2.)
+ * Setting this parameter to n specifies that each row in each plane of the
+ * image will be padded to the nearest multiple of n bytes (1 = unpadded.)
+ *
+ * @param height height (in pixels) of the image
+ *
+ * @param subsamp level of chrominance subsampling in the image (see
+ * @ref TJSAMP "Chrominance subsampling options".)
+ *
+ * @return the size of the buffer (in bytes) required to hold the image, or 0
+ * if the arguments are out of bounds.
+ */
+DLLEXPORT size_t tj3YUVBufSize(int width, int align, int height, int subsamp);
+
+
+/**
+ * The size of the buffer (in bytes) required to hold a YUV image plane with
+ * the given parameters.
+ *
+ * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr)
+ *
+ * @param width width (in pixels) of the YUV image.  NOTE: this is the width of
+ * the whole image, not the plane width.
+ *
+ * @param stride bytes per row in the image plane.  Setting this to 0 is the
+ * equivalent of setting it to the plane width.
+ *
+ * @param height height (in pixels) of the YUV image.  NOTE: this is the height
+ * of the whole image, not the plane height.
+ *
+ * @param subsamp level of chrominance subsampling in the image (see
+ * @ref TJSAMP "Chrominance subsampling options".)
+ *
+ * @return the size of the buffer (in bytes) required to hold the YUV image
+ * plane, or 0 if the arguments are out of bounds.
+ */
+DLLEXPORT size_t tj3YUVPlaneSize(int componentID, int width, int stride,
+                                 int height, int subsamp);
+
+
+/**
+ * The plane width of a YUV image plane with the given parameters.  Refer to
+ * @ref YUVnotes "YUV Image Format Notes" for a description of plane width.
+ *
+ * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr)
+ *
+ * @param width width (in pixels) of the YUV image
+ *
+ * @param subsamp level of chrominance subsampling in the image (see
+ * @ref TJSAMP "Chrominance subsampling options".)
+ *
+ * @return the plane width of a YUV image plane with the given parameters, or 0
+ * if the arguments are out of bounds.
+ */
+DLLEXPORT int tj3YUVPlaneWidth(int componentID, int width, int subsamp);
+
+
+/**
+ * The plane height of a YUV image plane with the given parameters.  Refer to
+ * @ref YUVnotes "YUV Image Format Notes" for a description of plane height.
+ *
+ * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr)
+ *
+ * @param height height (in pixels) of the YUV image
+ *
+ * @param subsamp level of chrominance subsampling in the image (see
+ * @ref TJSAMP "Chrominance subsampling options".)
+ *
+ * @return the plane height of a YUV image plane with the given parameters, or
+ * 0 if the arguments are out of bounds.
+ */
+DLLEXPORT int tj3YUVPlaneHeight(int componentID, int height, int subsamp);
+
+
+/**
+ * Encode an 8-bit-per-sample packed-pixel RGB or grayscale image into an
+ * 8-bit-per-sample unified planar YUV image.  This function performs color
+ * conversion (which is accelerated in the libjpeg-turbo implementation) but
+ * does not execute any of the other steps in the JPEG compression process.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param srcBuf pointer to a buffer containing a packed-pixel RGB or grayscale
+ * source image to be encoded.  This buffer should normally be `pitch * height`
+ * bytes in size.  However, you can also use this parameter to encode from a
+ * specific region of a larger buffer.
+ *
+ * @param width width (in pixels) of the source image
+ *
+ * @param pitch bytes per row in the source image.  Normally this should be
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>, if the image is unpadded.
+ * (Setting this parameter to 0 is the equivalent of setting it to
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>.)  However, you can also use this
+ * parameter to specify the row alignment/padding of the source image, to skip
+ * rows, or to encode from a specific region of a larger packed-pixel image.
+ *
+ * @param height height (in pixels) of the source image
+ *
+ * @param pixelFormat pixel format of the source image (see @ref TJPF
+ * "Pixel formats".)
+ *
+ * @param dstBuf pointer to a buffer that will receive the unified planar YUV
+ * image.  Use #tj3YUVBufSize() to determine the appropriate size for this
+ * buffer based on the image width, height, row alignment, and level of
+ * chrominance subsampling (see #TJPARAM_SUBSAMP.)  The Y, U (Cb), and V (Cr)
+ * image planes will be stored sequentially in the buffer.  (Refer to
+ * @ref YUVnotes "YUV Image Format Notes".)
+ *
+ * @param align row alignment (in bytes) of the YUV image (must be a power of
+ * 2.)  Setting this parameter to n will cause each row in each plane of the
+ * YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.)
+ * To generate images suitable for X Video, `align` should be set to 4.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3EncodeYUV8(tjhandle handle, const unsigned char *srcBuf,
+                            int width, int pitch, int height, int pixelFormat,
+                            unsigned char *dstBuf, int align);
+
+
+/**
+ * Encode an 8-bit-per-sample packed-pixel RGB or grayscale image into separate
+ * 8-bit-per-sample Y, U (Cb), and V (Cr) image planes.  This function performs
+ * color conversion (which is accelerated in the libjpeg-turbo implementation)
+ * but does not execute any of the other steps in the JPEG compression process.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param srcBuf pointer to a buffer containing a packed-pixel RGB or grayscale
+ * source image to be encoded.  This buffer should normally be `pitch * height`
+ * bytes in size.  However, you can also use this parameter to encode from a
+ * specific region of a larger buffer.
+ *
+ *
+ * @param width width (in pixels) of the source image
+ *
+ * @param pitch bytes per row in the source image.  Normally this should be
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>, if the image is unpadded.
+ * (Setting this parameter to 0 is the equivalent of setting it to
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>.)  However, you can also use this
+ * parameter to specify the row alignment/padding of the source image, to skip
+ * rows, or to encode from a specific region of a larger packed-pixel image.
+ *
+ * @param height height (in pixels) of the source image
+ *
+ * @param pixelFormat pixel format of the source image (see @ref TJPF
+ * "Pixel formats".)
+ *
+ * @param dstPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes
+ * (or just a Y plane, if generating a grayscale image) that will receive the
+ * encoded image.  These planes can be contiguous or non-contiguous in memory.
+ * Use #tj3YUVPlaneSize() to determine the appropriate size for each plane
+ * based on the image width, height, strides, and level of chrominance
+ * subsampling (see #TJPARAM_SUBSAMP.)  Refer to @ref YUVnotes
+ * "YUV Image Format Notes" for more details.
+ *
+ * @param strides an array of integers, each specifying the number of bytes per
+ * row in the corresponding plane of the YUV image.  Setting the stride for any
+ * plane to 0 is the same as setting it to the plane width (see @ref YUVnotes
+ * "YUV Image Format Notes".)  If `strides` is NULL, then the strides for all
+ * planes will be set to their respective plane widths.  You can adjust the
+ * strides in order to add an arbitrary amount of row padding to each plane or
+ * to encode an RGB or grayscale image into a subregion of a larger planar YUV
+ * image.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3EncodeYUVPlanes8(tjhandle handle, const unsigned char *srcBuf,
+                                  int width, int pitch, int height,
+                                  int pixelFormat, unsigned char **dstPlanes,
+                                  int *strides);
+
+
+/**
+ * Retrieve information about a JPEG image without decompressing it, or prime
+ * the decompressor with quantization and Huffman tables.  If a JPEG image is
+ * passed to this function, then the @ref TJPARAM "parameters" that describe
+ * the JPEG image will be set when the function returns.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param jpegBuf pointer to a byte buffer containing a JPEG image or an
+ * "abbreviated table specification" (AKA "tables-only") datastream.  Passing a
+ * tables-only datastream to this function primes the decompressor with
+ * quantization and Huffman tables that can be used when decompressing
+ * subsequent "abbreviated image" datastreams.  This is useful, for instance,
+ * when decompressing video streams in which all frames share the same
+ * quantization and Huffman tables.
+ *
+ * @param jpegSize size of the JPEG image or tables-only datastream (in bytes)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3DecompressHeader(tjhandle handle,
+                                  const unsigned char *jpegBuf,
+                                  size_t jpegSize);
+
+
+/**
+ * Returns a list of fractional scaling factors that the JPEG decompressor
+ * supports.
+ *
+ * @param numScalingFactors pointer to an integer variable that will receive
+ * the number of elements in the list
+ *
+ * @return a pointer to a list of fractional scaling factors, or NULL if an
+ * error is encountered (see #tj3GetErrorStr().)
+ */
+DLLEXPORT tjscalingfactor *tj3GetScalingFactors(int *numScalingFactors);
+
+
+/**
+ * Set the scaling factor for subsequent lossy decompression operations.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param scalingFactor #tjscalingfactor structure that specifies a fractional
+ * scaling factor that the decompressor supports (see #tj3GetScalingFactors()),
+ * or <tt>#TJUNSCALED</tt> for no scaling.  Decompression scaling is a function
+ * of the IDCT algorithm, so scaling factors are generally limited to multiples
+ * of 1/8.  If the entire JPEG image will be decompressed, then the width and
+ * height of the scaled destination image can be determined by calling
+ * #TJSCALED() with the JPEG width and height (see #TJPARAM_JPEGWIDTH and
+ * #TJPARAM_JPEGHEIGHT) and the specified scaling factor.  When decompressing
+ * into a planar YUV image, an intermediate buffer copy will be performed if
+ * the width or height of the scaled destination image is not an even multiple
+ * of the MCU block size (see #tjMCUWidth and #tjMCUHeight.)  Note that
+ * decompression scaling is not available (and the specified scaling factor is
+ * ignored) when decompressing lossless JPEG images (see #TJPARAM_LOSSLESS),
+ * since the IDCT algorithm is not used with those images.  Note also that
+ * #TJPARAM_FASTDCT is ignored when decompression scaling is enabled.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr().)
+ */
+DLLEXPORT int tj3SetScalingFactor(tjhandle handle,
+                                  tjscalingfactor scalingFactor);
+
+
+/**
+ * Set the cropping region for partially decompressing a lossy JPEG image into
+ * a packed-pixel image
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param croppingRegion #tjregion structure that specifies a subregion of the
+ * JPEG image to decompress, or <tt>#TJUNCROPPED</tt> for no cropping.  The
+ * left boundary of the cropping region must be evenly divisible by the scaled
+ * MCU block width (<tt>#TJSCALED(#tjMCUWidth[subsamp], scalingFactor)</tt>,
+ * where `subsamp` is the level of chrominance subsampling in the JPEG image
+ * (see #TJPARAM_SUBSAMP) and `scalingFactor` is the decompression scaling
+ * factor (see #tj3SetScalingFactor().)  The cropping region should be
+ * specified relative to the scaled image dimensions.  Unless `croppingRegion`
+ * is <tt>#TJUNCROPPED</tt>, the JPEG header must be read (see
+ * #tj3DecompressHeader()) prior to calling this function.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr().)
+ */
+DLLEXPORT int tj3SetCroppingRegion(tjhandle handle, tjregion croppingRegion);
+
+
+/**
+ * Decompress an 8-bit-per-sample JPEG image into an 8-bit-per-sample
+ * packed-pixel RGB, grayscale, or CMYK image.  The @ref TJPARAM "parameters"
+ * that describe the JPEG image will be set when this function returns.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param jpegBuf pointer to a byte buffer containing the JPEG image to
+ * decompress
+ *
+ * @param jpegSize size of the JPEG image (in bytes)
+ *
+ * @param dstBuf pointer to a buffer that will receive the packed-pixel
+ * decompressed image.  This buffer should normally be
+ * `pitch * destinationHeight` samples in size.  However, you can also use this
+ * parameter to decompress into a specific region of a larger buffer.  NOTE:
+ * If the JPEG image is lossy, then `destinationHeight` is either the scaled
+ * JPEG height (see #TJSCALED(), #TJPARAM_JPEGHEIGHT, and
+ * #tj3SetScalingFactor()) or the height of the cropping region (see
+ * #tj3SetCroppingRegion().)  If the JPEG image is lossless, then
+ * `destinationHeight` is the JPEG height.
+ *
+ * @param pitch samples per row in the destination image.  Normally this should
+ * be set to <tt>destinationWidth * #tjPixelSize[pixelFormat]</tt>, if the
+ * destination image should be unpadded.  (Setting this parameter to 0 is the
+ * equivalent of setting it to
+ * <tt>destinationWidth * #tjPixelSize[pixelFormat]</tt>.)  However, you can
+ * also use this parameter to specify the row alignment/padding of the
+ * destination image, to skip rows, or to decompress into a specific region of
+ * a larger buffer.  NOTE: If the JPEG image is lossy, then `destinationWidth`
+ * is either the scaled JPEG width (see #TJSCALED(), #TJPARAM_JPEGWIDTH, and
+ * #tj3SetScalingFactor()) or the width of the cropping region (see
+ * #tj3SetCroppingRegion().)  If the JPEG image is lossless, then
+ * `destinationWidth` is the JPEG width.
+ *
+ * @param pixelFormat pixel format of the destination image (see @ref
+ * TJPF "Pixel formats".)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3Decompress8(tjhandle handle, const unsigned char *jpegBuf,
+                             size_t jpegSize, unsigned char *dstBuf, int pitch,
+                             int pixelFormat);
+
+/**
+ * Decompress a 12-bit-per-sample JPEG image into a 12-bit-per-sample
+ * packed-pixel RGB, grayscale, or CMYK image.
+ *
+ * \details \copydetails tj3Decompress8()
+ */
+DLLEXPORT int tj3Decompress12(tjhandle handle, const unsigned char *jpegBuf,
+                              size_t jpegSize, short *dstBuf, int pitch,
+                              int pixelFormat);
+
+/**
+ * Decompress a 16-bit-per-sample lossless JPEG image into a 16-bit-per-sample
+ * packed-pixel RGB, grayscale, or CMYK image.
+ *
+ * \details \copydetails tj3Decompress8()
+ */
+DLLEXPORT int tj3Decompress16(tjhandle handle, const unsigned char *jpegBuf,
+                              size_t jpegSize, unsigned short *dstBuf,
+                              int pitch, int pixelFormat);
+
+
+/**
+ * Decompress an 8-bit-per-sample JPEG image into an 8-bit-per-sample unified
+ * planar YUV image.  This function performs JPEG decompression but leaves out
+ * the color conversion step, so a planar YUV image is generated instead of a
+ * packed-pixel image.  The @ref TJPARAM "parameters" that describe the JPEG
+ * image will be set when this function returns.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param jpegBuf pointer to a byte buffer containing the JPEG image to
+ * decompress
+ *
+ * @param jpegSize size of the JPEG image (in bytes)
+ *
+ * @param dstBuf pointer to a buffer that will receive the unified planar YUV
+ * decompressed image.  Use #tj3YUVBufSize() to determine the appropriate size
+ * for this buffer based on the scaled JPEG width and height (see #TJSCALED(),
+ * #TJPARAM_JPEGWIDTH, #TJPARAM_JPEGHEIGHT, and #tj3SetScalingFactor()), row
+ * alignment, and level of chrominance subsampling (see #TJPARAM_SUBSAMP.)  The
+ * Y, U (Cb), and V (Cr) image planes will be stored sequentially in the
+ * buffer.  (Refer to @ref YUVnotes "YUV Image Format Notes".)
+ *
+ * @param align row alignment (in bytes) of the YUV image (must be a power of
+ * 2.)  Setting this parameter to n will cause each row in each plane of the
+ * YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.)
+ * To generate images suitable for X Video, `align` should be set to 4.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3DecompressToYUV8(tjhandle handle,
+                                  const unsigned char *jpegBuf,
+                                  size_t jpegSize,
+                                  unsigned char *dstBuf, int align);
+
+
+/**
+ * Decompress an 8-bit-per-sample JPEG image into separate 8-bit-per-sample Y,
+ * U (Cb), and V (Cr) image planes.  This function performs JPEG decompression
+ * but leaves out the color conversion step, so a planar YUV image is generated
+ * instead of a packed-pixel image.  The @ref TJPARAM "parameters" that
+ * describe the JPEG image will be set when this function returns.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param jpegBuf pointer to a byte buffer containing the JPEG image to
+ * decompress
+ *
+ * @param jpegSize size of the JPEG image (in bytes)
+ *
+ * @param dstPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes
+ * (or just a Y plane, if decompressing a grayscale image) that will receive
+ * the decompressed image.  These planes can be contiguous or non-contiguous in
+ * memory.  Use #tj3YUVPlaneSize() to determine the appropriate size for each
+ * plane based on the scaled JPEG width and height (see #TJSCALED(),
+ * #TJPARAM_JPEGWIDTH, #TJPARAM_JPEGHEIGHT, and #tj3SetScalingFactor()),
+ * strides, and level of chrominance subsampling (see #TJPARAM_SUBSAMP.)  Refer
+ * to @ref YUVnotes "YUV Image Format Notes" for more details.
+ *
+ * @param strides an array of integers, each specifying the number of bytes per
+ * row in the corresponding plane of the YUV image.  Setting the stride for any
+ * plane to 0 is the same as setting it to the scaled plane width (see
+ * @ref YUVnotes "YUV Image Format Notes".)  If `strides` is NULL, then the
+ * strides for all planes will be set to their respective scaled plane widths.
+ * You can adjust the strides in order to add an arbitrary amount of row
+ * padding to each plane or to decompress the JPEG image into a subregion of a
+ * larger planar YUV image.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3DecompressToYUVPlanes8(tjhandle handle,
+                                        const unsigned char *jpegBuf,
+                                        size_t jpegSize,
+                                        unsigned char **dstPlanes,
+                                        int *strides);
+
+
+/**
+ * Decode an 8-bit-per-sample unified planar YUV image into an 8-bit-per-sample
+ * packed-pixel RGB or grayscale image.  This function performs color
+ * conversion (which is accelerated in the libjpeg-turbo implementation) but
+ * does not execute any of the other steps in the JPEG decompression process.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param srcBuf pointer to a buffer containing a unified planar YUV source
+ * image to be decoded.  The size of this buffer should match the value
+ * returned by #tj3YUVBufSize() for the given image width, height, row
+ * alignment, and level of chrominance subsampling (see #TJPARAM_SUBSAMP.)  The
+ * Y, U (Cb), and V (Cr) image planes should be stored sequentially in the
+ * source buffer.  (Refer to @ref YUVnotes "YUV Image Format Notes".)
+ *
+ * @param align row alignment (in bytes) of the YUV source image (must be a
+ * power of 2.)  Setting this parameter to n indicates that each row in each
+ * plane of the YUV source image is padded to the nearest multiple of n bytes
+ * (1 = unpadded.)
+ *
+ * @param dstBuf pointer to a buffer that will receive the packed-pixel decoded
+ * image.  This buffer should normally be `pitch * height` bytes in size.
+ * However, you can also use this parameter to decode into a specific region of
+ * a larger buffer.
+ *
+ * @param width width (in pixels) of the source and destination images
+ *
+ * @param pitch bytes per row in the destination image.  Normally this should
+ * be set to <tt>width * #tjPixelSize[pixelFormat]</tt>, if the destination
+ * image should be unpadded.  (Setting this parameter to 0 is the equivalent of
+ * setting it to <tt>width * #tjPixelSize[pixelFormat]</tt>.)  However, you can
+ * also use this parameter to specify the row alignment/padding of the
+ * destination image, to skip rows, or to decode into a specific region of a
+ * larger buffer.
+ *
+ * @param height height (in pixels) of the source and destination images
+ *
+ * @param pixelFormat pixel format of the destination image (see @ref TJPF
+ * "Pixel formats".)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3DecodeYUV8(tjhandle handle, const unsigned char *srcBuf,
+                            int align, unsigned char *dstBuf, int width,
+                            int pitch, int height, int pixelFormat);
+
+
+/**
+ * Decode a set of 8-bit-per-sample Y, U (Cb), and V (Cr) image planes into an
+ * 8-bit-per-sample packed-pixel RGB or grayscale image.  This function
+ * performs color conversion (which is accelerated in the libjpeg-turbo
+ * implementation) but does not execute any of the other steps in the JPEG
+ * decompression process.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param srcPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes
+ * (or just a Y plane, if decoding a grayscale image) that contain a YUV image
+ * to be decoded.  These planes can be contiguous or non-contiguous in memory.
+ * The size of each plane should match the value returned by #tj3YUVPlaneSize()
+ * for the given image width, height, strides, and level of chrominance
+ * subsampling (see #TJPARAM_SUBSAMP.)  Refer to @ref YUVnotes
+ * "YUV Image Format Notes" for more details.
+ *
+ * @param strides an array of integers, each specifying the number of bytes per
+ * row in the corresponding plane of the YUV source image.  Setting the stride
+ * for any plane to 0 is the same as setting it to the plane width (see
+ * @ref YUVnotes "YUV Image Format Notes".)  If `strides` is NULL, then the
+ * strides for all planes will be set to their respective plane widths.  You
+ * can adjust the strides in order to specify an arbitrary amount of row
+ * padding in each plane or to decode a subregion of a larger planar YUV image.
+ *
+ * @param dstBuf pointer to a buffer that will receive the packed-pixel decoded
+ * image.  This buffer should normally be `pitch * height` bytes in size.
+ * However, you can also use this parameter to decode into a specific region of
+ * a larger buffer.
+ *
+ * @param width width (in pixels) of the source and destination images
+ *
+ * @param pitch bytes per row in the destination image.  Normally this should
+ * be set to <tt>width * #tjPixelSize[pixelFormat]</tt>, if the destination
+ * image should be unpadded.  (Setting this parameter to 0 is the equivalent of
+ * setting it to <tt>width * #tjPixelSize[pixelFormat]</tt>.)  However, you can
+ * also use this parameter to specify the row alignment/padding of the
+ * destination image, to skip rows, or to decode into a specific region of a
+ * larger buffer.
+ *
+ * @param height height (in pixels) of the source and destination images
+ *
+ * @param pixelFormat pixel format of the destination image (see @ref TJPF
+ * "Pixel formats".)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3DecodeYUVPlanes8(tjhandle handle,
+                                  const unsigned char * const *srcPlanes,
+                                  const int *strides, unsigned char *dstBuf,
+                                  int width, int pitch, int height,
+                                  int pixelFormat);
+
+
+/**
+ * Losslessly transform a JPEG image into another JPEG image.  Lossless
+ * transforms work by moving the raw DCT coefficients from one JPEG image
+ * structure to another without altering the values of the coefficients.  While
+ * this is typically faster than decompressing the image, transforming it, and
+ * re-compressing it, lossless transforms are not free.  Each lossless
+ * transform requires reading and performing entropy decoding on all of the
+ * coefficients in the source image, regardless of the size of the destination
+ * image.  Thus, this function provides a means of generating multiple
+ * transformed images from the same source or applying multiple transformations
+ * simultaneously, in order to eliminate the need to read the source
+ * coefficients multiple times.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * lossless transformation
+ *
+ * @param jpegBuf pointer to a byte buffer containing the JPEG source image to
+ * transform
+ *
+ * @param jpegSize size of the JPEG source image (in bytes)
+ *
+ * @param n the number of transformed JPEG images to generate
+ *
+ * @param dstBufs pointer to an array of n byte buffers.  `dstBufs[i]` will
+ * receive a JPEG image that has been transformed using the parameters in
+ * `transforms[i]`.  TurboJPEG has the ability to reallocate the JPEG
+ * destination buffer to accommodate the size of the transformed JPEG image.
+ * Thus, you can choose to:
+ * -# pre-allocate the JPEG destination buffer with an arbitrary size using
+ * #tj3Alloc() and let TurboJPEG grow the buffer as needed,
+ * -# set `dstBufs[i]` to NULL to tell TurboJPEG to allocate the buffer for
+ * you, or
+ * -# pre-allocate the buffer to a "worst case" size determined by calling
+ * #tj3JPEGBufSize() with the transformed or cropped width and height and the
+ * level of subsampling used in the source image.  Under normal circumstances,
+ * this should ensure that the buffer never has to be re-allocated.  (Setting
+ * #TJPARAM_NOREALLOC guarantees that it won't be.)  Note, however, that there
+ * are some rare cases (such as transforming images with a large amount of
+ * embedded EXIF or ICC profile data) in which the transformed JPEG image will
+ * be larger than the worst-case size, and #TJPARAM_NOREALLOC cannot be used in
+ * those cases.
+ * .
+ * If you choose option 1, then `dstSizes[i]` should be set to the size of your
+ * pre-allocated buffer.  In any case, unless you have set #TJPARAM_NOREALLOC,
+ * you should always check `dstBufs[i]` upon return from this function, as it
+ * may have changed.
+ *
+ * @param dstSizes pointer to an array of n size_t variables that will receive
+ * the actual sizes (in bytes) of each transformed JPEG image.  If `dstBufs[i]`
+ * points to a pre-allocated buffer, then `dstSizes[i]` should be set to the
+ * size of the buffer.  Upon return, `dstSizes[i]` will contain the size of the
+ * transformed JPEG image (in bytes.)
+ *
+ * @param transforms pointer to an array of n #tjtransform structures, each of
+ * which specifies the transform parameters and/or cropping region for the
+ * corresponding transformed JPEG image.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3Transform(tjhandle handle, const unsigned char *jpegBuf,
+                           size_t jpegSize, int n, unsigned char **dstBufs,
+                           size_t *dstSizes, const tjtransform *transforms);
+
+
+/**
+ * Destroy a TurboJPEG instance.
+ *
+ * @param handle handle to a TurboJPEG instance.  If the handle is NULL, then
+ * this function has no effect.
+ */
+DLLEXPORT void tj3Destroy(tjhandle handle);
+
+
+/**
+ * Allocate a byte buffer for use with TurboJPEG.  You should always use this
+ * function to allocate the JPEG destination buffer(s) for the compression and
+ * transform functions unless you are disabling automatic buffer (re)allocation
+ * (by setting #TJPARAM_NOREALLOC.)
+ *
+ * @param bytes the number of bytes to allocate
+ *
+ * @return a pointer to a newly-allocated buffer with the specified number of
+ * bytes.
+ *
+ * @see tj3Free()
+ */
+DLLEXPORT void *tj3Alloc(size_t bytes);
+
+
+/**
+ * Load an 8-bit-per-sample packed-pixel image from disk into memory.
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @param filename name of a file containing a packed-pixel image in Windows
+ * BMP or PBMPLUS (PPM/PGM) format.  Windows BMP files require 8-bit-per-sample
+ * data precision.  If the data precision of the PBMPLUS file does not match
+ * the target data precision, then upconverting or downconverting will be
+ * performed.
+ *
+ * @param width pointer to an integer variable that will receive the width (in
+ * pixels) of the packed-pixel image
+ *
+ * @param align row alignment (in samples) of the packed-pixel buffer to be
+ * returned (must be a power of 2.)  Setting this parameter to n will cause all
+ * rows in the buffer to be padded to the nearest multiple of n samples
+ * (1 = unpadded.)
+ *
+ * @param height pointer to an integer variable that will receive the height
+ * (in pixels) of the packed-pixel image
+ *
+ * @param pixelFormat pointer to an integer variable that specifies or will
+ * receive the pixel format of the packed-pixel buffer.  The behavior of this
+ * function will vary depending on the value of `*pixelFormat` passed to the
+ * function:
+ * - @ref TJPF_UNKNOWN : The packed-pixel buffer returned by this function will
+ * use the most optimal pixel format for the file type, and `*pixelFormat` will
+ * contain the ID of that pixel format upon successful return from this
+ * function.
+ * - @ref TJPF_GRAY : Only PGM files and 8-bit-per-pixel BMP files with a
+ * grayscale colormap can be loaded.
+ * - @ref TJPF_CMYK : The RGB or grayscale pixels stored in the file will be
+ * converted using a quick & dirty algorithm that is suitable only for testing
+ * purposes.  (Proper conversion between CMYK and other formats requires a
+ * color management system.)
+ * - Other @ref TJPF "pixel formats" : The packed-pixel buffer will use the
+ * specified pixel format, and pixel format conversion will be performed if
+ * necessary.
+ *
+ * @return a pointer to a newly-allocated buffer containing the packed-pixel
+ * image, converted to the chosen pixel format and with the chosen row
+ * alignment, or NULL if an error occurred (see #tj3GetErrorStr().)  This
+ * buffer should be freed using #tj3Free().
+ */
+DLLEXPORT unsigned char *tj3LoadImage8(tjhandle handle, const char *filename,
+                                       int *width, int align, int *height,
+                                       int *pixelFormat);
+
+/**
+ * Load a 12-bit-per-sample packed-pixel image from disk into memory.
+ *
+ * \details \copydetails tj3LoadImage8()
+ */
+DLLEXPORT short *tj3LoadImage12(tjhandle handle, const char *filename,
+                                int *width, int align, int *height,
+                                int *pixelFormat);
+
+/**
+ * Load a 16-bit-per-sample packed-pixel image from disk into memory.
+ *
+ * \details \copydetails tj3LoadImage8()
+ */
+DLLEXPORT unsigned short *tj3LoadImage16(tjhandle handle, const char *filename,
+                                         int *width, int align, int *height,
+                                         int *pixelFormat);
+
+
+/**
+ * Save an 8-bit-per-sample packed-pixel image from memory to disk.
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @param filename name of a file to which to save the packed-pixel image.  The
+ * image will be stored in Windows BMP or PBMPLUS (PPM/PGM) format, depending
+ * on the file extension.  Windows BMP files require 8-bit-per-sample data
+ * precision.
+ *
+ * @param buffer pointer to a buffer containing a packed-pixel RGB, grayscale,
+ * or CMYK image to be saved
+ *
+ * @param width width (in pixels) of the packed-pixel image
+ *
+ * @param pitch samples per row in the packed-pixel image.  Setting this
+ * parameter to 0 is the equivalent of setting it to
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>.
+ *
+ * @param height height (in pixels) of the packed-pixel image
+ *
+ * @param pixelFormat pixel format of the packed-pixel image (see @ref TJPF
+ * "Pixel formats".)  If this parameter is set to @ref TJPF_GRAY, then the
+ * image will be stored in PGM or 8-bit-per-pixel (indexed color) BMP format.
+ * Otherwise, the image will be stored in PPM or 24-bit-per-pixel BMP format.
+ * If this parameter is set to @ref TJPF_CMYK, then the CMYK pixels will be
+ * converted to RGB using a quick & dirty algorithm that is suitable only for
+ * testing purposes.  (Proper conversion between CMYK and other formats
+ * requires a color management system.)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr().)
+ */
+DLLEXPORT int tj3SaveImage8(tjhandle handle, const char *filename,
+                            const unsigned char *buffer, int width, int pitch,
+                            int height, int pixelFormat);
+
+/**
+ * Save a 12-bit-per-sample packed-pixel image from memory to disk.
+ *
+ * \details \copydetails tj3SaveImage8()
+ */
+DLLEXPORT int tj3SaveImage12(tjhandle handle, const char *filename,
+                             const short *buffer, int width, int pitch,
+                             int height, int pixelFormat);
+
+/**
+ * Save a 16-bit-per-sample packed-pixel image from memory to disk.
+ *
+ * \details \copydetails tj3SaveImage8()
+ */
+DLLEXPORT int tj3SaveImage16(tjhandle handle, const char *filename,
+                             const unsigned short *buffer, int width,
+                             int pitch, int height, int pixelFormat);
+
+
+/**
+ * Free a byte buffer previously allocated by TurboJPEG.  You should always use
+ * this function to free JPEG destination buffer(s) that were automatically
+ * (re)allocated by the compression and transform functions or that were
+ * manually allocated using #tj3Alloc().
+ *
+ * @param buffer address of the buffer to free.  If the address is NULL, then
+ * this function has no effect.
+ *
+ * @see tj3Alloc()
+ */
+DLLEXPORT void tj3Free(void *buffer);
+
+
+/**
+ * Returns a descriptive error message explaining why the last command failed.
+ *
+ * @param handle handle to a TurboJPEG instance, or NULL if the error was
+ * generated by a global function (but note that retrieving the error message
+ * for a global function is thread-safe only on platforms that support
+ * thread-local storage.)
+ *
+ * @return a descriptive error message explaining why the last command failed.
+ */
+DLLEXPORT char *tj3GetErrorStr(tjhandle handle);
+
+
+/**
+ * Returns a code indicating the severity of the last error.  See
+ * @ref TJERR "Error codes".
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @return a code indicating the severity of the last error.  See
+ * @ref TJERR "Error codes".
+ */
+DLLEXPORT int tj3GetErrorCode(tjhandle handle);
+
+
+/* Backward compatibility functions and macros (nothing to see here) */
+
+/* TurboJPEG 1.0+ */
+
+#define NUMSUBOPT  TJ_NUMSAMP
+#define TJ_444  TJSAMP_444
+#define TJ_422  TJSAMP_422
+#define TJ_420  TJSAMP_420
+#define TJ_411  TJSAMP_420
+#define TJ_GRAYSCALE  TJSAMP_GRAY
+
+#define TJ_BGR  1
+#define TJ_BOTTOMUP  TJFLAG_BOTTOMUP
+#define TJ_FORCEMMX  TJFLAG_FORCEMMX
+#define TJ_FORCESSE  TJFLAG_FORCESSE
+#define TJ_FORCESSE2  TJFLAG_FORCESSE2
+#define TJ_ALPHAFIRST  64
+#define TJ_FORCESSE3  TJFLAG_FORCESSE3
+#define TJ_FASTUPSAMPLE  TJFLAG_FASTUPSAMPLE
+
+#define TJPAD(width)  (((width) + 3) & (~3))
+
+DLLEXPORT unsigned long TJBUFSIZE(int width, int height);
+
+DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width,
+                         int pitch, int height, int pixelSize,
+                         unsigned char *dstBuf, unsigned long *compressedSize,
+                         int jpegSubsamp, int jpegQual, int flags);
+
+DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf,
+                           unsigned long jpegSize, unsigned char *dstBuf,
+                           int width, int pitch, int height, int pixelSize,
+                           int flags);
+
+DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf,
+                                 unsigned long jpegSize, int *width,
+                                 int *height);
+
+DLLEXPORT int tjDestroy(tjhandle handle);
+
+DLLEXPORT char *tjGetErrorStr(void);
+
+DLLEXPORT tjhandle tjInitCompress(void);
+
+DLLEXPORT tjhandle tjInitDecompress(void);
+
+/* TurboJPEG 1.1+ */
+
+#define TJ_YUV  512
+
+DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int jpegSubsamp);
+
+DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf,
+                                  unsigned long jpegSize, int *width,
+                                  int *height, int *jpegSubsamp);
+
+DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf,
+                                unsigned long jpegSize, unsigned char *dstBuf,
+                                int flags);
+
+DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width,
+                          int pitch, int height, int pixelSize,
+                          unsigned char *dstBuf, int subsamp, int flags);
+
+/* TurboJPEG 1.2+ */
+
+#define TJFLAG_BOTTOMUP  2
+#define TJFLAG_FORCEMMX  8
+#define TJFLAG_FORCESSE  16
+#define TJFLAG_FORCESSE2  32
+#define TJFLAG_FORCESSE3  128
+#define TJFLAG_FASTUPSAMPLE  256
+#define TJFLAG_NOREALLOC  1024
+
+DLLEXPORT unsigned char *tjAlloc(int bytes);
+
+DLLEXPORT unsigned long tjBufSize(int width, int height, int jpegSubsamp);
+
+DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp);
+
+DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf,
+                          int width, int pitch, int height, int pixelFormat,
+                          unsigned char **jpegBuf, unsigned long *jpegSize,
+                          int jpegSubsamp, int jpegQual, int flags);
+
+DLLEXPORT int tjDecompress2(tjhandle handle, const unsigned char *jpegBuf,
+                            unsigned long jpegSize, unsigned char *dstBuf,
+                            int width, int pitch, int height, int pixelFormat,
+                            int flags);
+
+DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width,
+                           int pitch, int height, int pixelFormat,
+                           unsigned char *dstBuf, int subsamp, int flags);
+
+DLLEXPORT void tjFree(unsigned char *buffer);
+
+DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numscalingfactors);
+
+DLLEXPORT tjhandle tjInitTransform(void);
+
+DLLEXPORT int tjTransform(tjhandle handle, const unsigned char *jpegBuf,
+                            unsigned long jpegSize, int n,
+                            unsigned char **dstBufs, unsigned long *dstSizes,
+                            tjtransform *transforms, int flags);
+
+/* TurboJPEG 1.2.1+ */
+
+#define TJFLAG_FASTDCT  2048
+#define TJFLAG_ACCURATEDCT  4096
+
+/* TurboJPEG 1.4+ */
+
+DLLEXPORT unsigned long tjBufSizeYUV2(int width, int align, int height,
+                                      int subsamp);
+
+DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf,
+                                int width, int align, int height, int subsamp,
+                                unsigned char **jpegBuf,
+                                unsigned long *jpegSize, int jpegQual,
+                                int flags);
+
+DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle,
+                                      const unsigned char **srcPlanes,
+                                      int width, const int *strides,
+                                      int height, int subsamp,
+                                      unsigned char **jpegBuf,
+                                      unsigned long *jpegSize, int jpegQual,
+                                      int flags);
+
+DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf,
+                          int align, int subsamp, unsigned char *dstBuf,
+                          int width, int pitch, int height, int pixelFormat,
+                          int flags);
+
+DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle,
+                                const unsigned char **srcPlanes,
+                                const int *strides, int subsamp,
+                                unsigned char *dstBuf, int width, int pitch,
+                                int height, int pixelFormat, int flags);
+
+DLLEXPORT int tjDecompressHeader3(tjhandle handle,
+                                  const unsigned char *jpegBuf,
+                                  unsigned long jpegSize, int *width,
+                                  int *height, int *jpegSubsamp,
+                                  int *jpegColorspace);
+
+DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf,
+                                 unsigned long jpegSize, unsigned char *dstBuf,
+                                 int width, int align, int height, int flags);
+
+DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle,
+                                      const unsigned char *jpegBuf,
+                                      unsigned long jpegSize,
+                                      unsigned char **dstPlanes, int width,
+                                      int *strides, int height, int flags);
+
+DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf,
+                           int width, int pitch, int height, int pixelFormat,
+                           unsigned char *dstBuf, int align, int subsamp,
+                           int flags);
+
+DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf,
+                                int width, int pitch, int height,
+                                int pixelFormat, unsigned char **dstPlanes,
+                                int *strides, int subsamp, int flags);
+
+DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp);
+
+DLLEXPORT unsigned long tjPlaneSizeYUV(int componentID, int width, int stride,
+                                       int height, int subsamp);
+
+DLLEXPORT int tjPlaneWidth(int componentID, int width, int subsamp);
+
+/* TurboJPEG 2.0+ */
+
+#define TJFLAG_STOPONWARNING  8192
+#define TJFLAG_PROGRESSIVE  16384
+
+DLLEXPORT int tjGetErrorCode(tjhandle handle);
+
+DLLEXPORT char *tjGetErrorStr2(tjhandle handle);
+
+DLLEXPORT unsigned char *tjLoadImage(const char *filename, int *width,
+                                     int align, int *height, int *pixelFormat,
+                                     int flags);
+
+DLLEXPORT int tjSaveImage(const char *filename, unsigned char *buffer,
+                          int width, int pitch, int height, int pixelFormat,
+                          int flags);
+
+/* TurboJPEG 2.1+ */
+
+#define TJFLAG_LIMITSCANS  32768
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif