From 38427ff88effeb13e02e5124243d78421a9d1527 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C5=99emysl=20Eric=20Janouch?=
Date: Thu, 2 Dec 2021 08:26:59 +0100
Subject: [PATCH] jpeginfo: add a basic TIFF/Exif parser
---
tools/jpeginfo.c | 319 +++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 312 insertions(+), 7 deletions(-)
diff --git a/tools/jpeginfo.c b/tools/jpeginfo.c
index 3060290..7d6ea66 100644
--- a/tools/jpeginfo.c
+++ b/tools/jpeginfo.c
@@ -23,6 +23,307 @@
#include
#include
+// --- TIFF --------------------------------------------------------------------
+// https://www.adobe.io/content/dam/udp/en/open/standards/tiff/TIFF6.pdf
+// https://www.adobe.io/content/dam/udp/en/open/standards/tiff/TIFFPM6.pdf
+// https://www.cipa.jp/std/documents/e/DC-008-2012_E.pdf
+//
+// libtiff is a mess, and the format is not particularly complicated.
+// Also, we'd still want to duplicate its tag tables.
+// Exif libraries are senselessly copylefted.
+
+static uint32_t
+u32be(const uint8_t *p)
+{
+ return (uint32_t) p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
+}
+
+static uint16_t
+u16be(const uint8_t *p)
+{
+ return (uint16_t) p[0] << 8 | p[1];
+}
+
+static uint32_t
+u32le(const uint8_t *p)
+{
+ return (uint32_t) p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
+}
+
+static uint16_t
+u16le(const uint8_t *p)
+{
+ return (uint16_t) p[1] << 8 | p[0];
+}
+
+static struct un {
+ uint32_t (*u32) (const uint8_t *);
+ uint16_t (*u16) (const uint8_t *);
+} unbe = {u32be, u16be}, unle = {u32le, u16le};
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+struct tiffer {
+ struct un *un;
+ const uint8_t *begin, *p, *end;
+ uint16_t remaining_fields;
+};
+
+static bool
+tiffer_u32(struct tiffer *self, uint32_t *u)
+{
+ if (self->p + 4 > self->end)
+ return false;
+ *u = self->un->u32(self->p);
+ self->p += 4;
+ return true;
+}
+
+static bool
+tiffer_u16(struct tiffer *self, uint16_t *u)
+{
+ if (self->p + 2 > self->end)
+ return false;
+ *u = self->un->u16(self->p);
+ self->p += 2;
+ return true;
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+static bool
+tiffer_init(struct tiffer *self, const uint8_t *tiff, size_t len)
+{
+ self->un = NULL;
+ self->begin = self->p = tiff;
+ self->end = tiff + len;
+ self->remaining_fields = 0;
+
+ const uint8_t
+ le[4] = {'I', 'I', 42, 0},
+ be[4] = {'M', 'M', 0, 42};
+
+ if (tiff + 8 > self->end)
+ return false;
+ else if (!memcmp(tiff, le, sizeof le))
+ self->un = &unle;
+ else if (!memcmp(tiff, be, sizeof be))
+ self->un = &unbe;
+ else
+ return false;
+
+ self->p = tiff + 4;
+ // The first IFD needs to be read by caller explicitly.
+ return true;
+}
+
+/// Read the next IFD in a sequence.
+static bool
+tiffer_next_ifd(struct tiffer *self)
+{
+ // All fields from any previous IFD need to be read first.
+ if (self->remaining_fields)
+ return false;
+
+ uint32_t ifd_offset = 0;
+ if (!tiffer_u32(self, &ifd_offset))
+ return false;
+
+ // There is nothing more to read, this chain has terminated.
+ if (!ifd_offset)
+ return false;
+
+ self->p = self->begin + ifd_offset;
+ return tiffer_u16(self, &self->remaining_fields);
+}
+
+/// Initialize a derived TIFF reader for a subIFD at the given location.
+static bool
+tiffer_subifd(struct tiffer *self, uint32_t offset, struct tiffer *subreader)
+{
+ *subreader = *self;
+ subreader->p = subreader->begin + offset;
+ return tiffer_u16(subreader, &subreader->remaining_fields);
+}
+
+enum tiffer_type {
+ BYTE = 1, ASCII, SHORT, LONG, RATIONAL,
+ SBYTE, UNDEFINED, SSHORT, SLONG, SRATIONAL, FLOAT, DOUBLE,
+ IFD // This last type isn't really used much.
+};
+
+static size_t
+tiffer_value_size(enum tiffer_type type)
+{
+ switch (type) {
+ case BYTE:
+ case SBYTE:
+ case ASCII:
+ case UNDEFINED:
+ return 1;
+ case SHORT:
+ case SSHORT:
+ return 2;
+ case LONG:
+ case SLONG:
+ case FLOAT:
+ case IFD:
+ return 4;
+ case RATIONAL:
+ case SRATIONAL:
+ case DOUBLE:
+ return 8;
+ default:
+ return 0;
+ }
+}
+
+/// A lean iterator for values within entries.
+struct tiffer_entry {
+ uint16_t tag;
+ enum tiffer_type type;
+ // For {S,}BYTE, ASCII, UNDEFINED, use these fields directly.
+ const uint8_t *p;
+ uint32_t remaining_count;
+};
+
+static bool
+tiffer_next_value(struct tiffer_entry *entry)
+{
+ if (!entry->remaining_count)
+ return false;
+
+ entry->p += tiffer_value_size(entry->type);
+ entry->remaining_count--;
+ return true;
+}
+
+static bool
+tiffer_integer(
+ const struct tiffer *self, const struct tiffer_entry *entry, int64_t *out)
+{
+ if (!entry->remaining_count)
+ return false;
+
+ // Somewhat excessively lenient, intended for display.
+ switch (entry->type) {
+ case BYTE:
+ case ASCII:
+ case UNDEFINED:
+ *out = *entry->p;
+ return true;
+ case SBYTE:
+ *out = (int8_t) *entry->p;
+ return true;
+ case SHORT:
+ *out = self->un->u16(entry->p);
+ return true;
+ case SSHORT:
+ *out = (int16_t) self->un->u16(entry->p);
+ return true;
+ case LONG:
+ case IFD:
+ *out = self->un->u32(entry->p);
+ return true;
+ case SLONG:
+ *out = (int32_t) self->un->u32(entry->p);
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+tiffer_rational(const struct tiffer *self, const struct tiffer_entry *entry,
+ int64_t *numerator, int64_t *denominator)
+{
+ if (!entry->remaining_count)
+ return false;
+
+ // Somewhat excessively lenient, intended for display.
+ switch (entry->type) {
+ case RATIONAL:
+ *numerator = self->un->u32(entry->p);
+ *denominator = self->un->u32(entry->p + 4);
+ return true;
+ case SRATIONAL:
+ *numerator = (int32_t) self->un->u32(entry->p);
+ *denominator = (int32_t) self->un->u32(entry->p + 4);
+ return true;
+ default:
+ if (!tiffer_integer(self, entry, numerator))
+ return false;
+
+ *denominator = 1;
+ return true;
+ }
+}
+
+static bool
+tiffer_real(
+ const struct tiffer *self, const struct tiffer_entry *entry, double *out)
+{
+ if (!entry->remaining_count)
+ return false;
+
+ // Somewhat excessively lenient, intended for display.
+ switch (entry->type) {
+ int64_t integer;
+ case RATIONAL:
+ *out = self->un->u32(entry->p) / (double) self->un->u32(entry->p + 4);
+ return true;
+ case SRATIONAL:
+ *out = (int32_t) self->un->u32(entry->p) /
+ (double) (int32_t) self->un->u32(entry->p + 4);
+ return true;
+ case FLOAT:
+ *out = *(float *) entry->p;
+ return true;
+ case DOUBLE:
+ *out = *(double *) entry->p;
+ return true;
+ default:
+ if (!tiffer_integer(self, entry, &integer))
+ return false;
+
+ *out = integer;
+ return true;
+ }
+}
+
+static bool
+tiffer_next_entry(struct tiffer *self, struct tiffer_entry *entry)
+{
+ if (!self->remaining_fields)
+ return false;
+
+ uint16_t type = entry->type = 0xFFFF;
+ if (!tiffer_u16(self, &entry->tag) || !tiffer_u16(self, &type) ||
+ !tiffer_u32(self, &entry->remaining_count))
+ return false;
+
+ // Short values may and will be inlined, rather than pointed to.
+ size_t values_size = tiffer_value_size(type) * entry->remaining_count;
+ uint32_t offset = 0;
+ if (values_size <= sizeof offset) {
+ entry->p = self->p;
+ self->p += sizeof offset;
+ } else if (tiffer_u32(self, &offset)) {
+ entry->p = self->p + offset;
+ } else {
+ return false;
+ }
+
+ // All entries are pre-checked not to overflow.
+ if (entry->p + values_size > self->end)
+ return false;
+
+ // Setting it at the end may provide an indication while debugging.
+ entry->type = type;
+ self->remaining_fields--;
+ return true;
+}
+
// --- Analysis ----------------------------------------------------------------
static jv
@@ -53,7 +354,17 @@ add_error(jv o, const char *message)
static jv
parse_exif(jv o, const uint8_t *p, size_t len)
{
- // TODO(p): Decode.
+ struct tiffer T;
+ if (!tiffer_init(&T, p, len))
+ return add_warning(o, "invalid Exif");
+
+ // TODO(p): Decode more and better.
+ struct tiffer_entry entry;
+ while (tiffer_next_ifd(&T)) {
+ while (tiffer_next_entry(&T, &entry)) {
+ o = add_to_subarray(o, "TIFF", jv_number(entry.tag));
+ }
+ }
return o;
}
@@ -61,12 +372,6 @@ parse_exif(jv o, const uint8_t *p, size_t len)
// v2 https://www.color.org/ICC_Minor_Revision_for_Web.pdf
// v4 https://www.color.org/specification/ICC1v43_2010-12.pdf
-static uint32_t
-u32be(const uint8_t *p)
-{
- return (uint32_t) p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
-}
-
static jv
parse_icc_mluc(jv o, const uint8_t *tag, uint32_t tag_length)
{