Compare commits

...

2 Commits

Author SHA1 Message Date
06779c6bdd
jpeginfo: decode basic TIFF tag names 2021-12-03 14:57:55 +01:00
46c46ac093
jpeginfo: clean up 2021-12-03 14:19:48 +01:00

View File

@ -23,14 +23,7 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdarg.h> #include <stdarg.h>
// --- TIFF -------------------------------------------------------------------- // --- Utilities ---------------------------------------------------------------
// https://www.adobe.io/content/dam/udp/en/open/standards/tiff/TIFF6.pdf
// https://www.adobe.io/content/dam/udp/en/open/standards/tiff/TIFFPM6.pdf
// https://www.cipa.jp/std/documents/e/DC-008-2012_E.pdf
//
// libtiff is a mess, and the format is not particularly complicated.
// Also, we'd still want to duplicate its tag tables.
// Exif libraries are senselessly copylefted.
static uint32_t static uint32_t
u32be(const uint8_t *p) u32be(const uint8_t *p)
@ -56,13 +49,20 @@ u16le(const uint8_t *p)
return (uint16_t) p[1] << 8 | p[0]; return (uint16_t) p[1] << 8 | p[0];
} }
// --- TIFF --------------------------------------------------------------------
// https://www.adobe.io/content/dam/udp/en/open/standards/tiff/TIFF6.pdf
// https://www.adobe.io/content/dam/udp/en/open/standards/tiff/TIFFPM6.pdf
// https://www.cipa.jp/std/documents/e/DC-008-2012_E.pdf
//
// libtiff is a mess, and the format is not particularly complicated.
// Also, we'd still want to duplicate its tag tables.
// Exif libraries are senselessly copylefted.
static struct un { static struct un {
uint32_t (*u32) (const uint8_t *); uint32_t (*u32) (const uint8_t *);
uint16_t (*u16) (const uint8_t *); uint16_t (*u16) (const uint8_t *);
} unbe = {u32be, u16be}, unle = {u32le, u16le}; } unbe = {u32be, u16be}, unle = {u32le, u16le};
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
struct tiffer { struct tiffer {
struct un *un; struct un *un;
const uint8_t *begin, *p, *end; const uint8_t *begin, *p, *end;
@ -324,6 +324,120 @@ tiffer_next_entry(struct tiffer *self, struct tiffer_entry *entry)
return true; return true;
} }
// --- TIFF/Exif/MPF/* tags ----------------------------------------------------
struct tiff_entry {
const char *name;
uint16_t tag;
};
static struct tiff_entry tiff_entries[] = {
{"NewSubfileType", 254},
{"SubfileType", 255},
{"ImageWidth", 256},
{"ImageLength", 257},
{"BitsPerSample", 258},
{"Compression", 259},
{"PhotometricInterpretation", 262},
{"Threshholding", 263},
{"CellWidth", 264},
{"CellLength", 265},
{"FillOrder", 266},
{"DocumentName", 269},
{"ImageDescription", 270},
{"Make", 271},
{"Model", 272},
{"StripOffsets", 273},
{"Orientation", 274},
{"SamplesPerPixel", 277},
{"RowsPerStrip", 278},
{"StripByteCounts", 279},
{"MinSampleValue", 280},
{"MaxSampleValue", 281},
{"XResolution", 282},
{"YResolution", 283},
{"PlanarConfiguration", 284},
{"PageName", 285},
{"XPosition", 286},
{"YPosition", 287},
{"FreeOffsets", 288},
{"FreeByteCounts", 289},
{"GrayResponseUnit", 290},
{"GrayResponseCurve", 291},
{"T4Options", 292},
{"T6Options", 293},
{"ResolutionUnit", 296},
{"PageNumber", 297},
{"TransferFunction", 301},
{"Software", 305},
{"DateTime", 306},
{"Artist", 315},
{"HostComputer", 316},
{"Predictor", 317},
{"WhitePoint", 318},
{"PrimaryChromaticities", 319},
{"ColorMap", 320},
{"HalftoneHints", 321},
{"TileWidth", 322},
{"TileLength", 323},
{"TileOffsets", 324},
{"TileByteCounts", 325},
{"InkSet", 332},
{"InkNames", 333},
{"NumberOfInks", 334},
{"DotRange", 336},
{"TargetPrinter", 337},
{"ExtraSamples", 338},
{"SampleFormat", 339},
{"SMinSampleValue", 340},
{"SMaxSampleValue", 341},
{"TransferRange", 342},
{"JPEGProc", 512},
{"JPEGInterchangeFormat", 513},
{"JPEGInterchangeFormatLngth", 514},
{"JPEGRestartInterval", 515},
{"JPEGLosslessPredictors", 517},
{"JPEGPointTransforms", 518},
{"JPEGQTables", 519},
{"JPEGDCTables", 520},
{"JPEGACTables", 521},
{"YCbCrCoefficients", 529},
{"YCbCrSubSampling", 530},
{"YCbCrPositioning", 531},
{"ReferenceBlackWhite", 532},
{"Copyright", 33432},
{}
};
// Compression
static struct tiff_entry tiff_compression_values[] = {
{"Uncompressed", 1},
{"CCITT 1D", 2},
{"Group 3 Fax", 3},
{"Group 4 Fax", 4},
{"LZW", 5},
{"JPEG", 6},
{"PackBits", 32773},
{}
};
// PhotometricInterpretation
static struct tiff_entry tiff_photometric_interpretation_values[] = {
{"WhiteIsZero", 0},
{"BlackIsZero", 1},
{"RGB", 2},
{"RGB Palette", 3},
{"Transparency mask", 4},
{"CMYK", 5},
{"YCbCr", 6},
{"CIELab", 8},
{}
};
// TODO(p): Insert tags and values from other documentation,
// so far only Appendix A from TIFF 6.0 is present.
// There are still quite a few missing constant names from there.
// --- Analysis ---------------------------------------------------------------- // --- Analysis ----------------------------------------------------------------
static jv static jv
@ -351,6 +465,17 @@ add_error(jv o, const char *message)
// --- Exif -------------------------------------------------------------------- // --- Exif --------------------------------------------------------------------
// TODO(p): Decode more and better.
static jv
process_exif_entry(jv o, const struct tiffer_entry *entry)
{
for (const struct tiff_entry *p = tiff_entries; p->name; p++) {
if (p->tag == entry->tag)
return add_to_subarray(o, "TIFF", jv_string(p->name));
}
return add_to_subarray(o, "TIFF", jv_number(entry->tag));
}
static jv static jv
parse_exif(jv o, const uint8_t *p, size_t len) parse_exif(jv o, const uint8_t *p, size_t len)
{ {
@ -358,11 +483,11 @@ parse_exif(jv o, const uint8_t *p, size_t len)
if (!tiffer_init(&T, p, len)) if (!tiffer_init(&T, p, len))
return add_warning(o, "invalid Exif"); return add_warning(o, "invalid Exif");
// TODO(p): Decode more and better. // TODO(p): Turn this into an array of objects indexed by tag name.
struct tiffer_entry entry; struct tiffer_entry entry;
while (tiffer_next_ifd(&T)) { while (tiffer_next_ifd(&T)) {
while (tiffer_next_entry(&T, &entry)) { while (tiffer_next_entry(&T, &entry)) {
o = add_to_subarray(o, "TIFF", jv_number(entry.tag)); o = process_exif_entry(o, &entry);
} }
} }
return o; return o;
@ -467,7 +592,7 @@ parse_icc(jv o, const uint8_t *profile, size_t profile_len)
// https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/ // https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/
static jv static jv
parse_psir_block(jv o, uint16_t resource_id, const char *name, process_psir(jv o, uint16_t resource_id, const char *name,
const uint8_t *data, size_t len) const uint8_t *data, size_t len)
{ {
// TODO(p): These is more to extract here. The name is most often empty. // TODO(p): These is more to extract here. The name is most often empty.
@ -477,45 +602,51 @@ parse_psir_block(jv o, uint16_t resource_id, const char *name,
return add_to_subarray(o, "PSIR", jv_number(resource_id)); return add_to_subarray(o, "PSIR", jv_number(resource_id));
} }
static jv
parse_psir_block(jv o, const uint8_t *p, size_t len, size_t *advance)
{
*advance = 0;
if (len < 8 || memcmp(p, "8BIM", 4))
return add_warning(o, "bad PSIR block header");
uint16_t resource_id = u16be(p + 4);
uint8_t name_len = p[6];
const uint8_t *name = &p[7];
// Add one byte for the Pascal-ish string length prefix,
// then another one for padding to make the length even.
size_t name_len_full = (name_len + 2) & ~1U;
size_t resource_len_offset = 6 + name_len_full,
header_len = resource_len_offset + 4;
if (len < header_len)
return add_warning(o, "bad PSIR block header");
uint32_t resource_len = u32be(p + resource_len_offset);
size_t resource_len_padded = (resource_len + 1) & ~1U;
if (resource_len_padded < resource_len ||
len < header_len + resource_len_padded)
return add_warning(o, "runaway PSIR block");
char *cname = calloc(1, name_len_full);
strncpy(cname, (const char *) name, name_len);
o = process_psir(o, resource_id, cname, p + header_len, resource_len);
free(cname);
*advance = header_len + resource_len_padded;
return o;
}
static jv static jv
parse_psir(jv o, const uint8_t *p, size_t len) parse_psir(jv o, const uint8_t *p, size_t len)
{ {
if (len == 0) if (len == 0)
return add_warning(o, "empty PSIR data"); return add_warning(o, "empty PSIR data");
while (len) { size_t advance = 0;
if (len < 8 || memcmp(p, "8BIM", 4)) while (len && (o = parse_psir_block(o, p, len, &advance), advance)) {
return add_warning(o, "bad PSIR block header"); p += advance;
len -= advance;
uint16_t resource_id = u16be(p + 4);
uint8_t name_len = p[6];
const uint8_t *name = &p[7];
// Add one byte for the Pascal-ish string length prefix,
// then another one for padding to make the length even.
size_t name_len_full = (name_len + 2) & ~1U;
size_t resource_len_offset = 6 + name_len_full,
header_len = resource_len_offset + 4;
if (len < header_len)
return add_warning(o, "bad PSIR block header");
uint32_t resource_len = u32be(p + resource_len_offset);
size_t resource_len_padded = (resource_len + 1) & ~1U;
if (resource_len_padded < resource_len ||
len < header_len + resource_len_padded)
return add_warning(o, "runaway PSIR block");
p += header_len;
len -= header_len;
char *cname = calloc(1, name_len_full);
strncpy(cname, (const char *) name, name_len);
o = parse_psir_block(o, resource_id, cname, p, resource_len);
free(cname);
p += resource_len_padded;
len -= resource_len_padded;
} }
return o; return o;
} }