Extract some full-size raw previews without LibRaw

Not all image/x-nikon-nef will work like this,
so don't claim their MIME type.
This commit is contained in:
Přemysl Eric Janouch 2023-05-23 02:09:15 +02:00
parent 0f1c61ae33
commit bb4b895cb5
Signed by: p
GPG Key ID: A0420B94F92B9493
4 changed files with 719 additions and 404 deletions

400
fiv-io.c
View File

@ -41,6 +41,10 @@
#include <lcms2.h> #include <lcms2.h>
#endif // HAVE_LCMS2 #endif // HAVE_LCMS2
#define TIFF_TABLES_CONSTANTS_ONLY
#include "tiff-tables.h"
#include "tiffer.h"
#ifdef HAVE_LIBRAW #ifdef HAVE_LIBRAW
#include <libraw.h> #include <libraw.h>
#if LIBRAW_VERSION >= LIBRAW_MAKE_VERSION(0, 21, 0) #if LIBRAW_VERSION >= LIBRAW_MAKE_VERSION(0, 21, 0)
@ -1141,32 +1145,28 @@ fail:
// --- JPEG -------------------------------------------------------------------- // --- JPEG --------------------------------------------------------------------
static GBytes * struct jpeg_metadata {
parse_jpeg_metadata(cairo_surface_t *surface, const char *data, gsize len) GByteArray *exif; ///< Exif buffer or NULL
GByteArray *icc; ///< ICC profile buffer or NULL
int width; ///< Image width
int height; ///< Image height
};
static void
parse_jpeg_metadata(const char *data, size_t len, struct jpeg_metadata *meta)
{ {
// Because the JPEG file format is simple, just do it manually. // Because the JPEG file format is simple, just do it manually.
// See: https://www.w3.org/Graphics/JPEG/itu-t81.pdf // See: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
enum { enum {
APP0 = 0xE0,
APP1,
APP2,
RST0 = 0xD0,
RST1,
RST2,
RST3,
RST4,
RST5,
RST6,
RST7,
SOI = 0xD8,
EOI = 0xD9,
SOS = 0xDA,
TEM = 0x01, TEM = 0x01,
SOF0 = 0xC0, SOF1, SOF2, SOF3, DHT, SOF5, SOF6, SOF7,
JPG, SOF9, SOF10, SOF11, DAC, SOF13, SOF14, SOF15,
RST0, RST1, RST2, RST3, RST4, RST5, RST6, RST7,
SOI, EOI, SOS, DQT, DNL, DRI, DHP, EXP,
APP0, APP1, APP2, APP3, APP4, APP5, APP6, APP7,
}; };
GByteArray *exif = g_byte_array_new(), *icc = g_byte_array_new();
int icc_sequence = 0, icc_done = FALSE; int icc_sequence = 0, icc_done = FALSE;
const guint8 *p = (const guint8 *) data, *end = p + len; const guint8 *p = (const guint8 *) data, *end = p + len;
while (p + 3 < end && *p++ == 0xFF && *p != SOS && *p != EOI) { while (p + 3 < end && *p++ == 0xFF && *p != SOS && *p != EOI) {
// The previous byte is a fill byte, restart. // The previous byte is a fill byte, restart.
@ -1195,49 +1195,76 @@ parse_jpeg_metadata(cairo_surface_t *surface, const char *data, gsize len)
if (G_UNLIKELY((p += length) > end)) if (G_UNLIKELY((p += length) > end))
break; break;
switch (marker) {
case SOF0:
case SOF1:
case SOF2:
case SOF3:
case SOF5:
case SOF6:
case SOF7:
case SOF9:
case SOF10:
case SOF11:
case SOF13:
case SOF14:
case SOF15:
if (length >= 5) {
meta->width = (payload[3] << 8) + payload[4];
meta->height = (payload[1] << 8) + payload[2];
}
}
// https://www.cipa.jp/std/documents/e/DC-008-2012_E.pdf 4.7.2 // https://www.cipa.jp/std/documents/e/DC-008-2012_E.pdf 4.7.2
// Adobe XMP Specification Part 3: Storage in Files, 2020/1, 1.1.3 // Adobe XMP Specification Part 3: Storage in Files, 2020/1, 1.1.3
// Not checking the padding byte is intentional. // Not checking the padding byte is intentional.
if (marker == APP1 && p - payload >= 6 && // XXX: Thumbnails may in practice overflow into follow-up segments.
!memcmp(payload, "Exif\0", 5) && !exif->len) { if (meta->exif && marker == APP1 && p - payload >= 6 &&
!memcmp(payload, "Exif\0", 5) && !meta->exif->len) {
payload += 6; payload += 6;
g_byte_array_append(exif, payload, p - payload); g_byte_array_append(meta->exif, payload, p - payload);
} }
// https://www.color.org/specification/ICC1v43_2010-12.pdf B.4 // https://www.color.org/specification/ICC1v43_2010-12.pdf B.4
if (marker == APP2 && p - payload >= 14 && if (meta->icc && marker == APP2 && p - payload >= 14 &&
!memcmp(payload, "ICC_PROFILE\0", 12) && !icc_done && !memcmp(payload, "ICC_PROFILE\0", 12) && !icc_done &&
payload[12] == ++icc_sequence && payload[13] >= payload[12]) { payload[12] == ++icc_sequence && payload[13] >= payload[12]) {
payload += 14; payload += 14;
g_byte_array_append(icc, payload, p - payload); g_byte_array_append(meta->icc, payload, p - payload);
icc_done = payload[-1] == icc_sequence; icc_done = payload[-1] == icc_sequence;
} }
// TODO(p): Extract the main XMP segment. // TODO(p): Extract the main XMP segment.
} }
if (exif->len) if (meta->icc && !icc_done)
cairo_surface_set_user_data(surface, &fiv_io_key_exif, g_byte_array_set_size(meta->icc, 0);
g_byte_array_free_to_bytes(exif),
(cairo_destroy_func_t) g_bytes_unref);
else
g_byte_array_free(exif, TRUE);
GBytes *icc_profile = NULL;
if (icc_done)
cairo_surface_set_user_data(surface, &fiv_io_key_icc,
(icc_profile = g_byte_array_free_to_bytes(icc)),
(cairo_destroy_func_t) g_bytes_unref);
else
g_byte_array_free(icc, TRUE);
return icc_profile;
} }
static void static void
load_jpeg_finalize(cairo_surface_t *surface, bool cmyk, load_jpeg_finalize(cairo_surface_t *surface, bool cmyk,
FivIoProfile destination, const char *data, size_t len) FivIoProfile destination, const char *data, size_t len)
{ {
GBytes *icc_profile = parse_jpeg_metadata(surface, data, len); struct jpeg_metadata meta = {
.exif = g_byte_array_new(), .icc = g_byte_array_new()};
parse_jpeg_metadata(data, len, &meta);
if (meta.exif->len)
cairo_surface_set_user_data(surface, &fiv_io_key_exif,
g_byte_array_free_to_bytes(meta.exif),
(cairo_destroy_func_t) g_bytes_unref);
else
g_byte_array_free(meta.exif, TRUE);
GBytes *icc_profile = NULL;
if (meta.icc->len)
cairo_surface_set_user_data(surface, &fiv_io_key_icc,
(icc_profile = g_byte_array_free_to_bytes(meta.icc)),
(cairo_destroy_func_t) g_bytes_unref);
else
g_byte_array_free(meta.icc, TRUE);
FivIoProfile source = NULL; FivIoProfile source = NULL;
if (icc_profile) if (icc_profile)
source = fiv_io_profile_new( source = fiv_io_profile_new(
@ -1700,6 +1727,269 @@ fail:
return result; return result;
} }
// --- TIFF/EP + DNG -----------------------------------------------------------
// In Nikon NEF files, which claim to be TIFF/EP-compatible, IFD0 is a tiny
// uncompressed thumbnail with SubIFDs that, aside from raw sensor data,
// typically contain a nearly full-size JPEG preview.
//
// LibRaw takes too long a time to render something that will never be as good
// as the large preview, and libtiff can only read the horrible IFD0 thumbnail.
// (TIFFSetSubDirectory() requires an ImageLength tag that's missing from JPEG
// SubIFDs, and TIFFReadCustomDirectory() takes a privately defined struct that
// may not be omitted.)
//
// While LibRaw since 0.21.0 provides an API that would allow us to extract
// the JPEG, a little bit of custom processing won't hurt either.
static bool
tiffer_find(const struct tiffer *self, uint16_t tag, struct tiffer_entry *entry)
{
// Note that we could employ binary search, because tags must be ordered:
// - TIFF 6.0: Sort Order
// - ISO/DIS 12234-2: 4.1.2, 5.1
// - CIPA DC-007-2009 (Multi-Picture Format): 5.2.3., 5.2.4.
// - CIPA DC-008-2019 (Exif 2.32): 4.6.2.
// However, it doesn't seem to warrant the ugly code.
struct tiffer T = *self;
while (tiffer_next_entry(&T, entry)) {
if (entry->tag == tag)
return true;
}
*entry = (struct tiffer_entry) {};
return false;
}
static bool
tiffer_find_integer(const struct tiffer *self, uint16_t tag, int64_t *i)
{
struct tiffer_entry entry = {};
return tiffer_find(self, tag, &entry) && tiffer_integer(self, &entry, i);
}
// In case of failure, an entry with a zero "remaining_count" is returned.
static struct tiffer_entry
tiff_ep_subifds_init(const struct tiffer *T)
{
struct tiffer_entry entry = {};
(void) tiffer_find(T, TIFF_SubIFDs, &entry);
return entry;
}
static bool
tiff_ep_subifds_next(
const struct tiffer *T, struct tiffer_entry *subifds, struct tiffer *subT)
{
// XXX: Except for a zero "remaining_count", all conditions are errors,
// and should perhaps be reported.
int64_t offset = 0;
if (!tiffer_integer(T, subifds, &offset) ||
offset < 0 || offset > UINT32_MAX || !tiffer_subifd(T, offset, subT))
return false;
(void) tiffer_next_value(subifds);
return true;
}
static bool
tiff_ep_find_main(const struct tiffer *T, struct tiffer *outputT)
{
// This is a mandatory field.
int64_t type = 0;
if (!tiffer_find_integer(T, TIFF_NewSubfileType, &type))
return false;
// This is the main image.
// (See DNG rather than ISO/DIS 12234-2 for values.)
if (type == 0) {
*outputT = *T;
return true;
}
struct tiffer_entry subifds = tiff_ep_subifds_init(T);
struct tiffer subT = {};
while (tiff_ep_subifds_next(T, &subifds, &subT))
if (tiff_ep_find_main(&subT, outputT))
return true;
return false;
}
struct tiff_ep_jpeg {
const uint8_t *jpeg; ///< JPEG data stream
size_t jpeg_length; ///< JPEG data stream length
int64_t pixels; ///< Number of pixels in the JPEG
};
static void
tiff_ep_find_jpeg_evaluate(const struct tiffer *T, struct tiff_ep_jpeg *out)
{
// This is a mandatory field.
int64_t compression = 0;
if (!tiffer_find_integer(T, TIFF_Compression, &compression))
return;
uint16_t tag_pointer = 0, tag_length = 0;
switch (compression) {
// This is how Exif specifies it, which doesn't follow TIFF 6.0.
case TIFF_Compression_JPEG:
tag_pointer = TIFF_JPEGInterchangeFormat;
tag_length = TIFF_JPEGInterchangeFormatLength;
break;
// Theoretically, there may be more strips, but this is not expected.
case TIFF_Compression_JPEGDatastream:
tag_pointer = TIFF_StripOffsets;
tag_length = TIFF_StripByteCounts;
break;
default:
return;
}
int64_t ipointer = 0, ilength = 0;
if (!tiffer_find_integer(T, tag_pointer, &ipointer) ||
!tiffer_find_integer(T, tag_length, &ilength) ||
ipointer <= 0 || ilength <= 0 ||
(uint64_t) ilength > SIZE_MAX ||
ipointer + ilength > (T->end - T->begin))
return;
// Note that to get the largest JPEG,
// we don't need to descend into Exif thumbnails.
// TODO(p): Consider DNG 1.2.0.0 PreviewColorSpace.
// But first, try to find some real-world files with it.
const uint8_t *jpeg = T->begin + ipointer;
size_t jpeg_length = ilength;
struct jpeg_metadata meta = {};
parse_jpeg_metadata((const char *) jpeg, jpeg_length, &meta);
int64_t pixels = meta.width * meta.height;
if (pixels > out->pixels) {
out->jpeg = jpeg;
out->jpeg_length = jpeg_length;
out->pixels = pixels;
}
}
static bool
tiff_ep_find_jpeg(const struct tiffer *T, struct tiff_ep_jpeg *out)
{
// This is a mandatory field.
int64_t type = 0;
if (!tiffer_find_integer(T, TIFF_NewSubfileType, &type))
return false;
// This is a thumbnail of the main image.
// (See DNG rather than ISO/DIS 12234-2 for values.)
if (type == 1)
tiff_ep_find_jpeg_evaluate(T, out);
struct tiffer_entry subifds = tiff_ep_subifds_init(T);
struct tiffer subT = {};
while (tiff_ep_subifds_next(T, &subifds, &subT))
if (!tiff_ep_find_jpeg(&subT, out))
return false;
return true;
}
static cairo_surface_t *
load_tiff_ep(
const struct tiffer *T, const FivIoOpenContext *ctx, GError **error)
{
// ISO/DIS 12234-2 is a fuck-up that says this should be in "IFD0",
// but it might have intended to say "all top-level IFDs".
// The DNG specification shares the same problem.
//
// In any case, chained TIFFs are relatively rare.
struct tiffer_entry entry = {};
bool is_tiffep = tiffer_find(T, TIFF_TIFF_EPStandardID, &entry) &&
entry.type == BYTE && entry.remaining_count == 4 &&
entry.p[0] == 1 && !entry.p[1] && !entry.p[2] && !entry.p[3];
// Apple ProRAW, e.g., does not claim TIFF/EP compatibility,
// but we should still be able to make sense of it.
bool is_supported_dng = tiffer_find(T, TIFF_DNGBackwardVersion, &entry) &&
entry.type == BYTE && entry.remaining_count == 4 &&
entry.p[0] == 1 && entry.p[1] <= 6 && !entry.p[2] && !entry.p[3];
if (!is_tiffep && !is_supported_dng) {
set_error(error, "not a supported TIFF/EP or DNG image");
return NULL;
}
struct tiffer fullT = {};
if (!tiff_ep_find_main(T, &fullT)) {
set_error(error, "could not find a main image");
return NULL;
}
int64_t width = 0, height = 0;
if (!tiffer_find_integer(&fullT, TIFF_ImageWidth, &width) ||
!tiffer_find_integer(&fullT, TIFF_ImageLength, &height) ||
width <= 0 || height <= 0) {
set_error(error, "missing or invalid main image dimensions");
return NULL;
}
struct tiff_ep_jpeg out = {};
if (!tiff_ep_find_jpeg(T, &out)) {
set_error(error, "error looking for a full-size JPEG preview");
return NULL;
}
// Nikon NEFs seem to generally have a preview above 99 percent,
// (though some of them may not even reach 50 percent).
// Be a bit more generous than that with our crop tolerance.
// TODO(p): Also take into account DNG DefaultCropSize, if present.
if (out.pixels / ((double) width * height) < 0.95) {
set_error(error, "could not find a large enough JPEG preview");
return NULL;
}
cairo_surface_t *surface = open_libjpeg_turbo(
(const char *) out.jpeg, out.jpeg_length, ctx, error);
if (!surface)
return NULL;
// Note that Exif may override this later in fiv_io_open_from_data().
// TODO(p): Try to use the Orientation field nearest to the target IFD.
// IFD0 just happens to be fine for Nikon NEF.
int64_t orientation = 0;
if (tiffer_find_integer(T, TIFF_Orientation, &orientation) &&
orientation >= 1 && orientation <= 8) {
cairo_surface_set_user_data(surface, &fiv_io_key_orientation,
(void *) (uintptr_t) orientation, NULL);
}
return surface;
}
static cairo_surface_t *
open_tiff_ep(
const char *data, gsize len, const FivIoOpenContext *ctx, GError **error)
{
// -Wunused-function, we might want to give this its own compile unit.
(void) tiffer_real;
struct tiffer T = {};
if (!tiffer_init(&T, (const uint8_t *) data, len)) {
set_error(error, "not a TIFF file");
return NULL;
}
cairo_surface_t *result = NULL, *result_tail = NULL;
while (tiffer_next_ifd(&T)) {
if (!try_append_page(
load_tiff_ep(&T, ctx, error), &result, &result_tail)) {
g_clear_pointer(&result, cairo_surface_destroy);
return NULL;
}
if (ctx->first_frame_only)
break;
// TODO(p): Try to adjust tiffer so that this isn't necessary.
struct tiffer_entry dummy = {};
while (tiffer_next_entry(&T, &dummy))
;
}
return result;
}
// --- Optional dependencies --------------------------------------------------- // --- Optional dependencies ---------------------------------------------------
#ifdef HAVE_LIBRAW // --------------------------------------------------------- #ifdef HAVE_LIBRAW // ---------------------------------------------------------
@ -2590,30 +2880,6 @@ open_libtiff(
if (!tiff) if (!tiff)
goto fail; goto fail;
// In Nikon NEF files, IFD0 is a tiny uncompressed thumbnail with SubIFDs--
// two of them JPEGs, the remaining one is raw. libtiff cannot read either
// of those better versions.
//
// TODO(p): If NewSubfileType is ReducedImage, and it has SubIFDs compressed
// as old JPEG (6), decode JPEGInterchangeFormat/JPEGInterchangeFormatLength
// with libjpeg-turbo and insert them as the starting pages.
//
// This is not possible with libtiff directly, because TIFFSetSubDirectory()
// requires an ImageLength tag that's missing, and TIFFReadCustomDirectory()
// takes a privately defined struct that cannot be omitted.
//
// TODO(p): Samsung Android DNGs also claim to be TIFF/EP, but use a smaller
// uncompressed YCbCr image. Apple ProRAW uses the new JPEG Compression (7),
// with a weird Orientation. It also uses that value for its raw data.
uint32_t subtype = 0;
uint16_t subifd_count = 0;
const uint64_t *subifd_offsets = NULL;
if (TIFFGetField(tiff, TIFFTAG_SUBFILETYPE, &subtype) &&
(subtype & FILETYPE_REDUCEDIMAGE) &&
TIFFGetField(tiff, TIFFTAG_SUBIFD, &subifd_count, &subifd_offsets) &&
subifd_count > 0 && subifd_offsets) {
}
do { do {
// We inform about unsupported directories, but do not fail on them. // We inform about unsupported directories, but do not fail on them.
GError *err = NULL; GError *err = NULL;
@ -2824,6 +3090,14 @@ fiv_io_open_from_data(
surface = open_libwebp(data, len, ctx, error); surface = open_libwebp(data, len, ctx, error);
break; break;
default: default:
// Try to extract full-size previews from TIFF/EP-compatible raws.
if ((surface = open_tiff_ep(data, len, ctx, error)))
break;
if (error) {
g_debug("%s", (*error)->message);
g_clear_error(error);
}
#ifdef HAVE_LIBRAW // --------------------------------------------------------- #ifdef HAVE_LIBRAW // ---------------------------------------------------------
if ((surface = open_libraw(data, len, ctx, error))) if ((surface = open_libraw(data, len, ctx, error)))
break; break;

View File

@ -2,6 +2,22 @@
BEGIN { BEGIN {
FS = ", *" FS = ", *"
print "// Generated by tiff-tables.awk. DO NOT MODIFY." print "// Generated by tiff-tables.awk. DO NOT MODIFY."
print ""
print "#ifndef TIFF_TABLES_CONSTANTS_ONLY"
print "#include <stddef.h>"
print "#include <stdint.h>"
print ""
print "struct tiff_value {"
print "\tconst char *name;"
print "\tuint16_t value;"
print "};"
print ""
print "struct tiff_entry {"
print "\tconst char *name;"
print "\tuint16_t tag;"
print "\tstruct tiff_value *values;"
print "};"
print "#endif"
} }
{ {
@ -55,8 +71,10 @@ function flushvalues() {
function flushsection() { function flushsection() {
if (section) { if (section) {
flushvalues() flushvalues()
print "};\n\n" allvalues "static struct tiff_entry " \ print "};\n\n" allvalues "#ifndef TIFF_TABLES_CONSTANTS_ONLY"
print "static struct tiff_entry " \
sectionsnakecase "_entries[] = {" fields "\n\t{}\n};" sectionsnakecase "_entries[] = {" fields "\n\t{}\n};"
print "#endif"
} }
} }

340
tiffer.h Normal file
View File

@ -0,0 +1,340 @@
//
// tiffer.h: TIFF reading utilities
//
// Copyright (c) 2021 - 2023, Přemysl Eric Janouch <p@janouch.name>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
// --- Utilities ---------------------------------------------------------------
static uint64_t
tiffer_u64be(const uint8_t *p)
{
return (uint64_t) p[0] << 56 | (uint64_t) p[1] << 48 |
(uint64_t) p[2] << 40 | (uint64_t) p[3] << 32 |
(uint64_t) p[4] << 24 | p[5] << 16 | p[6] << 8 | p[7];
}
static uint32_t
tiffer_u32be(const uint8_t *p)
{
return (uint32_t) p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
}
static uint16_t
tiffer_u16be(const uint8_t *p)
{
return (uint16_t) p[0] << 8 | p[1];
}
static uint64_t
tiffer_u64le(const uint8_t *p)
{
return (uint64_t) p[7] << 56 | (uint64_t) p[6] << 48 |
(uint64_t) p[5] << 40 | (uint64_t) p[4] << 32 |
(uint64_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
}
static uint32_t
tiffer_u32le(const uint8_t *p)
{
return (uint32_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
}
static uint16_t
tiffer_u16le(const uint8_t *p)
{
return (uint16_t) p[1] << 8 | p[0];
}
// --- TIFF --------------------------------------------------------------------
// libtiff is a mess, and the format is not particularly complicated.
// Exiv2 is senselessly copylefted, and cannot do much.
// libexif is only marginally better.
// ExifTool is too user-oriented.
struct un {
uint64_t (*u64) (const uint8_t *);
uint32_t (*u32) (const uint8_t *);
uint16_t (*u16) (const uint8_t *);
};
static struct un tiffer_unbe = {tiffer_u64be, tiffer_u32be, tiffer_u16be};
static struct un tiffer_unle = {tiffer_u64le, tiffer_u32le, tiffer_u16le};
struct tiffer {
struct un *un;
const uint8_t *begin, *p, *end;
uint16_t remaining_fields;
};
static bool
tiffer_u32(struct tiffer *self, uint32_t *u)
{
if (self->p < self->begin || self->p + 4 > self->end)
return false;
*u = self->un->u32(self->p);
self->p += 4;
return true;
}
static bool
tiffer_u16(struct tiffer *self, uint16_t *u)
{
if (self->p < self->begin || self->p + 2 > self->end)
return false;
*u = self->un->u16(self->p);
self->p += 2;
return true;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static bool
tiffer_init(struct tiffer *self, const uint8_t *tiff, size_t len)
{
self->un = NULL;
self->begin = self->p = tiff;
self->end = tiff + len;
self->remaining_fields = 0;
const uint8_t
le[4] = {'I', 'I', 42, 0},
be[4] = {'M', 'M', 0, 42};
if (tiff + 8 > self->end)
return false;
else if (!memcmp(tiff, le, sizeof le))
self->un = &tiffer_unle;
else if (!memcmp(tiff, be, sizeof be))
self->un = &tiffer_unbe;
else
return false;
self->p = tiff + 4;
// The first IFD needs to be read by caller explicitly,
// even though it's required to be present by TIFF 6.0.
return true;
}
/// Read the next IFD in a sequence.
static bool
tiffer_next_ifd(struct tiffer *self)
{
// All fields from any previous IFD need to be read first.
if (self->remaining_fields)
return false;
uint32_t ifd_offset = 0;
if (!tiffer_u32(self, &ifd_offset))
return false;
// There is nothing more to read, this chain has terminated.
if (!ifd_offset)
return false;
// Note that TIFF 6.0 requires there to be at least one entry,
// but there is no need for us to check it.
self->p = self->begin + ifd_offset;
return tiffer_u16(self, &self->remaining_fields);
}
/// Initialize a derived TIFF reader for a subIFD at the given location.
static bool
tiffer_subifd(
const struct tiffer *self, uint32_t offset, struct tiffer *subreader)
{
*subreader = *self;
subreader->p = subreader->begin + offset;
return tiffer_u16(subreader, &subreader->remaining_fields);
}
enum tiffer_type {
BYTE = 1, ASCII, SHORT, LONG, RATIONAL,
SBYTE, UNDEFINED, SSHORT, SLONG, SRATIONAL, FLOAT, DOUBLE,
IFD // This last type from TIFF Technical Note 1 isn't really used much.
};
static size_t
tiffer_value_size(enum tiffer_type type)
{
switch (type) {
case BYTE:
case SBYTE:
case ASCII:
case UNDEFINED:
return 1;
case SHORT:
case SSHORT:
return 2;
case LONG:
case SLONG:
case FLOAT:
case IFD:
return 4;
case RATIONAL:
case SRATIONAL:
case DOUBLE:
return 8;
default:
return 0;
}
}
/// A lean iterator for values within entries.
struct tiffer_entry {
uint16_t tag;
enum tiffer_type type;
// For {S,}BYTE, ASCII, UNDEFINED, use these fields directly.
const uint8_t *p;
uint32_t remaining_count;
};
static bool
tiffer_next_value(struct tiffer_entry *entry)
{
if (!entry->remaining_count)
return false;
entry->p += tiffer_value_size(entry->type);
entry->remaining_count--;
return true;
}
static bool
tiffer_integer(
const struct tiffer *self, const struct tiffer_entry *entry, int64_t *out)
{
if (!entry->remaining_count)
return false;
// Somewhat excessively lenient, intended for display.
// TIFF 6.0 only directly suggests that a reader is should accept
// any of BYTE/SHORT/LONG for unsigned integers.
switch (entry->type) {
case BYTE:
case ASCII:
case UNDEFINED:
*out = *entry->p;
return true;
case SBYTE:
*out = (int8_t) *entry->p;
return true;
case SHORT:
*out = self->un->u16(entry->p);
return true;
case SSHORT:
*out = (int16_t) self->un->u16(entry->p);
return true;
case LONG:
case IFD:
*out = self->un->u32(entry->p);
return true;
case SLONG:
*out = (int32_t) self->un->u32(entry->p);
return true;
default:
return false;
}
}
static bool
tiffer_rational(const struct tiffer *self, const struct tiffer_entry *entry,
int64_t *numerator, int64_t *denominator)
{
if (!entry->remaining_count)
return false;
// Somewhat excessively lenient, intended for display.
switch (entry->type) {
case RATIONAL:
*numerator = self->un->u32(entry->p);
*denominator = self->un->u32(entry->p + 4);
return true;
case SRATIONAL:
*numerator = (int32_t) self->un->u32(entry->p);
*denominator = (int32_t) self->un->u32(entry->p + 4);
return true;
default:
if (tiffer_integer(self, entry, numerator)) {
*denominator = 1;
return true;
}
return false;
}
}
static bool
tiffer_real(
const struct tiffer *self, const struct tiffer_entry *entry, double *out)
{
if (!entry->remaining_count)
return false;
// Somewhat excessively lenient, intended for display.
// Assuming the host architecture uses IEEE 754.
switch (entry->type) {
int64_t numerator, denominator;
case FLOAT:
*out = *(float *) entry->p;
return true;
case DOUBLE:
*out = *(double *) entry->p;
return true;
default:
if (tiffer_rational(self, entry, &numerator, &denominator)) {
*out = (double) numerator / denominator;
return true;
}
return false;
}
}
static bool
tiffer_next_entry(struct tiffer *self, struct tiffer_entry *entry)
{
if (!self->remaining_fields)
return false;
uint16_t type = entry->type = 0xFFFF;
if (!tiffer_u16(self, &entry->tag) || !tiffer_u16(self, &type) ||
!tiffer_u32(self, &entry->remaining_count))
return false;
// Short values may and will be inlined, rather than pointed to.
size_t values_size = tiffer_value_size(type) * entry->remaining_count;
uint32_t offset = 0;
if (values_size <= sizeof offset) {
entry->p = self->p;
self->p += sizeof offset;
} else if (tiffer_u32(self, &offset)) {
entry->p = self->begin + offset;
} else {
return false;
}
// All entries are pre-checked not to overflow.
if (entry->p + values_size > self->end)
return false;
// Setting it at the end may provide an indication while debugging.
entry->type = type;
self->remaining_fields--;
return true;
}

View File

@ -21,348 +21,10 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
// --- Utilities --------------------------------------------------------------- // --- TIFF/Exif ---------------------------------------------------------------
static char *
binhex(const uint8_t *data, size_t len)
{
static const char *alphabet = "0123456789abcdef";
char *buf = calloc(1, len * 2 + 1), *p = buf;
for (size_t i = 0; i < len; i++) {
*p++ = alphabet[data[i] >> 4];
*p++ = alphabet[data[i] & 0xF];
}
return buf;
}
static uint64_t
u64be(const uint8_t *p)
{
return (uint64_t) p[0] << 56 | (uint64_t) p[1] << 48 |
(uint64_t) p[2] << 40 | (uint64_t) p[3] << 32 |
(uint64_t) p[4] << 24 | p[5] << 16 | p[6] << 8 | p[7];
}
static uint32_t
u32be(const uint8_t *p)
{
return (uint32_t) p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
}
static uint16_t
u16be(const uint8_t *p)
{
return (uint16_t) p[0] << 8 | p[1];
}
static uint64_t
u64le(const uint8_t *p)
{
return (uint64_t) p[7] << 56 | (uint64_t) p[6] << 48 |
(uint64_t) p[5] << 40 | (uint64_t) p[4] << 32 |
(uint64_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
}
static uint32_t
u32le(const uint8_t *p)
{
return (uint32_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
}
static uint16_t
u16le(const uint8_t *p)
{
return (uint16_t) p[1] << 8 | p[0];
}
// --- TIFF --------------------------------------------------------------------
// libtiff is a mess, and the format is not particularly complicated.
// Exiv2 is senselessly copylefted, and cannot do much.
// libexif is only marginally better.
// ExifTool is too user-oriented.
static struct un {
uint64_t (*u64) (const uint8_t *);
uint32_t (*u32) (const uint8_t *);
uint16_t (*u16) (const uint8_t *);
} unbe = {u64be, u32be, u16be}, unle = {u64le, u32le, u16le};
struct tiffer {
struct un *un;
const uint8_t *begin, *p, *end;
uint16_t remaining_fields;
};
static bool
tiffer_u32(struct tiffer *self, uint32_t *u)
{
if (self->p < self->begin || self->p + 4 > self->end)
return false;
*u = self->un->u32(self->p);
self->p += 4;
return true;
}
static bool
tiffer_u16(struct tiffer *self, uint16_t *u)
{
if (self->p < self->begin || self->p + 2 > self->end)
return false;
*u = self->un->u16(self->p);
self->p += 2;
return true;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static bool
tiffer_init(struct tiffer *self, const uint8_t *tiff, size_t len)
{
self->un = NULL;
self->begin = self->p = tiff;
self->end = tiff + len;
self->remaining_fields = 0;
const uint8_t
le[4] = {'I', 'I', 42, 0},
be[4] = {'M', 'M', 0, 42};
if (tiff + 8 > self->end)
return false;
else if (!memcmp(tiff, le, sizeof le))
self->un = &unle;
else if (!memcmp(tiff, be, sizeof be))
self->un = &unbe;
else
return false;
self->p = tiff + 4;
// The first IFD needs to be read by caller explicitly,
// even though it's required to be present by TIFF 6.0.
return true;
}
/// Read the next IFD in a sequence.
static bool
tiffer_next_ifd(struct tiffer *self)
{
// All fields from any previous IFD need to be read first.
if (self->remaining_fields)
return false;
uint32_t ifd_offset = 0;
if (!tiffer_u32(self, &ifd_offset))
return false;
// There is nothing more to read, this chain has terminated.
if (!ifd_offset)
return false;
// Note that TIFF 6.0 requires there to be at least one entry,
// but there is no need for us to check it.
self->p = self->begin + ifd_offset;
return tiffer_u16(self, &self->remaining_fields);
}
/// Initialize a derived TIFF reader for a subIFD at the given location.
static bool
tiffer_subifd(struct tiffer *self, uint32_t offset, struct tiffer *subreader)
{
*subreader = *self;
subreader->p = subreader->begin + offset;
return tiffer_u16(subreader, &subreader->remaining_fields);
}
enum tiffer_type {
BYTE = 1, ASCII, SHORT, LONG, RATIONAL,
SBYTE, UNDEFINED, SSHORT, SLONG, SRATIONAL, FLOAT, DOUBLE,
IFD // This last type from TIFF Technical Note 1 isn't really used much.
};
static size_t
tiffer_value_size(enum tiffer_type type)
{
switch (type) {
case BYTE:
case SBYTE:
case ASCII:
case UNDEFINED:
return 1;
case SHORT:
case SSHORT:
return 2;
case LONG:
case SLONG:
case FLOAT:
case IFD:
return 4;
case RATIONAL:
case SRATIONAL:
case DOUBLE:
return 8;
default:
return 0;
}
}
/// A lean iterator for values within entries.
struct tiffer_entry {
uint16_t tag;
enum tiffer_type type;
// For {S,}BYTE, ASCII, UNDEFINED, use these fields directly.
const uint8_t *p;
uint32_t remaining_count;
};
static bool
tiffer_next_value(struct tiffer_entry *entry)
{
if (!entry->remaining_count)
return false;
entry->p += tiffer_value_size(entry->type);
entry->remaining_count--;
return true;
}
static bool
tiffer_integer(
const struct tiffer *self, const struct tiffer_entry *entry, int64_t *out)
{
if (!entry->remaining_count)
return false;
// Somewhat excessively lenient, intended for display.
// TIFF 6.0 only directly suggests that a reader is should accept
// any of BYTE/SHORT/LONG for unsigned integers.
switch (entry->type) {
case BYTE:
case ASCII:
case UNDEFINED:
*out = *entry->p;
return true;
case SBYTE:
*out = (int8_t) *entry->p;
return true;
case SHORT:
*out = self->un->u16(entry->p);
return true;
case SSHORT:
*out = (int16_t) self->un->u16(entry->p);
return true;
case LONG:
case IFD:
*out = self->un->u32(entry->p);
return true;
case SLONG:
*out = (int32_t) self->un->u32(entry->p);
return true;
default:
return false;
}
}
static bool
tiffer_rational(const struct tiffer *self, const struct tiffer_entry *entry,
int64_t *numerator, int64_t *denominator)
{
if (!entry->remaining_count)
return false;
// Somewhat excessively lenient, intended for display.
switch (entry->type) {
case RATIONAL:
*numerator = self->un->u32(entry->p);
*denominator = self->un->u32(entry->p + 4);
return true;
case SRATIONAL:
*numerator = (int32_t) self->un->u32(entry->p);
*denominator = (int32_t) self->un->u32(entry->p + 4);
return true;
default:
if (tiffer_integer(self, entry, numerator)) {
*denominator = 1;
return true;
}
return false;
}
}
static bool
tiffer_real(
const struct tiffer *self, const struct tiffer_entry *entry, double *out)
{
if (!entry->remaining_count)
return false;
// Somewhat excessively lenient, intended for display.
// Assuming the host architecture uses IEEE 754.
switch (entry->type) {
int64_t numerator, denominator;
case FLOAT:
*out = *(float *) entry->p;
return true;
case DOUBLE:
*out = *(double *) entry->p;
return true;
default:
if (tiffer_rational(self, entry, &numerator, &denominator)) {
*out = (double) numerator / denominator;
return true;
}
return false;
}
}
static bool
tiffer_next_entry(struct tiffer *self, struct tiffer_entry *entry)
{
if (!self->remaining_fields)
return false;
uint16_t type = entry->type = 0xFFFF;
if (!tiffer_u16(self, &entry->tag) || !tiffer_u16(self, &type) ||
!tiffer_u32(self, &entry->remaining_count))
return false;
// Short values may and will be inlined, rather than pointed to.
size_t values_size = tiffer_value_size(type) * entry->remaining_count;
uint32_t offset = 0;
if (values_size <= sizeof offset) {
entry->p = self->p;
self->p += sizeof offset;
} else if (tiffer_u32(self, &offset)) {
entry->p = self->begin + offset;
} else {
return false;
}
// All entries are pre-checked not to overflow.
if (entry->p + values_size > self->end)
return false;
// Setting it at the end may provide an indication while debugging.
entry->type = type;
self->remaining_fields--;
return true;
}
// --- TIFF/Exif tags ----------------------------------------------------------
struct tiff_value {
const char *name;
uint16_t value;
};
struct tiff_entry {
const char *name;
uint16_t tag;
struct tiff_value *values;
};
#include "tiff-tables.h" #include "tiff-tables.h"
#include "tiffer.h"
// TODO(p): Consider if these can't be inlined into `tiff_entries`. // TODO(p): Consider if these can't be inlined into `tiff_entries`.
static struct { static struct {
@ -376,6 +38,27 @@ static struct {
{} {}
}; };
// --- Utilities ---------------------------------------------------------------
#define u64be tiffer_u64be
#define u32be tiffer_u32be
#define u16be tiffer_u16be
#define u64le tiffer_u64le
#define u32le tiffer_u32le
#define u16le tiffer_u16le
static char *
binhex(const uint8_t *data, size_t len)
{
static const char *alphabet = "0123456789abcdef";
char *buf = calloc(1, len * 2 + 1), *p = buf;
for (size_t i = 0; i < len; i++) {
*p++ = alphabet[data[i] >> 4];
*p++ = alphabet[data[i] & 0xF];
}
return buf;
}
// --- Analysis ---------------------------------------------------------------- // --- Analysis ----------------------------------------------------------------
static jv static jv