Add a tool to extract information from WebP

This commit is contained in:
Přemysl Eric Janouch 2021-12-13 18:56:33 +01:00
parent 9eb9cc44aa
commit 1478a9f83f
Signed by: p
GPG Key ID: A0420B94F92B9493
6 changed files with 227 additions and 95 deletions

1
tools/.gitignore vendored
View File

@ -1,3 +1,4 @@
/pnginfo
/jpeginfo
/tiffinfo
/webpinfo

View File

@ -5,7 +5,7 @@ CFLAGS = -g -O2 -Wall -Wextra `pkg-config --cflags $(deps)`
LDLIBS = -ljq `pkg-config --libs $(deps)`
deps = libpng
targets = pnginfo jpeginfo tiffinfo
targets = pnginfo jpeginfo tiffinfo webpinfo
all: $(targets)
$(targets): info.h

View File

@ -1275,3 +1275,97 @@ parse_psir(jv o, const uint8_t *p, size_t len)
}
return o;
}
// --- ICC profiles ------------------------------------------------------------
// v2 https://www.color.org/ICC_Minor_Revision_for_Web.pdf
// v4 https://www.color.org/specification/ICC1v43_2010-12.pdf
static jv
parse_icc_mluc(jv o, const uint8_t *tag, uint32_t tag_length)
{
// v4 10.13
if (tag_length < 16)
return add_warning(o, "invalid ICC 'mluc' structure length");
uint32_t count = u32be(tag + 8);
if (count == 0)
return add_warning(o, "unnamed ICC profile");
// There is no particularly good reason for us to iterate, take the first.
const uint8_t *record = tag + 16 /* + i * u32be(tag + 12) */;
uint32_t len = u32be(&record[4]);
uint32_t off = u32be(&record[8]);
if (off + len > tag_length)
return add_warning(o, "invalid ICC 'mluc' structure record");
// Blindly assume simple ASCII, ensure NUL-termination.
char name[len], *p = name;
for (uint32_t i = 0; i < len / 2; i++)
*p++ = tag[off + i * 2 + 1];
*p++ = 0;
return jv_set(o, jv_string("ICC"),
JV_OBJECT(jv_string("name"), jv_string(name),
jv_string("version"), jv_number(4)));
}
static jv
parse_icc_desc(jv o, const uint8_t *profile, size_t profile_len,
uint32_t tag_offset, uint32_t tag_length)
{
const uint8_t *tag = profile + tag_offset;
if (tag_offset + tag_length > profile_len)
return add_warning(o, "unexpected end of ICC profile");
if (tag_length < 4)
return add_warning(o, "invalid ICC tag structure length");
// v2 6.5.17
uint32_t sig = u32be(tag);
if (sig == 0x6D6C7563 /* mluc */)
return parse_icc_mluc(o, profile + tag_offset, tag_length);
if (sig != 0x64657363 /* desc */)
return add_warning(o, "invalid ICC 'desc' structure signature");
if (tag_length < 12)
return add_warning(o, "invalid ICC 'desc' structure length");
uint32_t count = u32be(tag + 8);
if (tag_length < 12 + count)
return add_warning(o, "invalid ICC 'desc' structure length");
// Double-ensure a trailing NUL byte.
char name[count + 1];
memcpy(name, tag + 12, count);
name[count] = 0;
return jv_set(o, jv_string("ICC"),
JV_OBJECT(jv_string("name"), jv_string(name),
jv_string("version"), jv_number(2)));
}
static jv
parse_icc(jv o, const uint8_t *profile, size_t profile_len)
{
// v2 6, v4 7
if (profile_len < 132)
return add_warning(o, "ICC profile too short");
if (u32be(profile) != profile_len)
return add_warning(o, "ICC profile size mismatch");
// TODO(p): May decode more of the header fields, and validate them.
// Need to check both v2 and v4, this is all fairly annoying.
uint32_t count = u32be(profile + 128);
if (132 + count * 12 > profile_len)
return add_warning(o, "unexpected end of ICC profile");
for (uint32_t i = 0; i < count; i++) {
const uint8_t *entry = profile + 132 + i * 12;
uint32_t sig = u32be(&entry[0]);
uint32_t off = u32be(&entry[4]);
uint32_t len = u32be(&entry[8]);
// v2 6.4.32, v4 9.2.41
if (sig == 0x64657363 /* desc */)
return parse_icc_desc(o, profile, profile_len, off, len);
}
// The description is required, so this should be unreachable.
return jv_set(o, jv_string("ICC"), jv_bool(true));
}

View File

@ -25,100 +25,6 @@
#include <stdlib.h>
#include <string.h>
// --- ICC profiles ------------------------------------------------------------
// v2 https://www.color.org/ICC_Minor_Revision_for_Web.pdf
// v4 https://www.color.org/specification/ICC1v43_2010-12.pdf
static jv
parse_icc_mluc(jv o, const uint8_t *tag, uint32_t tag_length)
{
// v4 10.13
if (tag_length < 16)
return add_warning(o, "invalid ICC 'mluc' structure length");
uint32_t count = u32be(tag + 8);
if (count == 0)
return add_warning(o, "unnamed ICC profile");
// There is no particularly good reason for us to iterate, take the first.
const uint8_t *record = tag + 16 /* + i * u32be(tag + 12) */;
uint32_t len = u32be(&record[4]);
uint32_t off = u32be(&record[8]);
if (off + len > tag_length)
return add_warning(o, "invalid ICC 'mluc' structure record");
// Blindly assume simple ASCII, ensure NUL-termination.
char name[len], *p = name;
for (uint32_t i = 0; i < len / 2; i++)
*p++ = tag[off + i * 2 + 1];
*p++ = 0;
return jv_set(o, jv_string("ICC"),
JV_OBJECT(jv_string("name"), jv_string(name),
jv_string("version"), jv_number(4)));
}
static jv
parse_icc_desc(jv o, const uint8_t *profile, size_t profile_len,
uint32_t tag_offset, uint32_t tag_length)
{
const uint8_t *tag = profile + tag_offset;
if (tag_offset + tag_length > profile_len)
return add_warning(o, "unexpected end of ICC profile");
if (tag_length < 4)
return add_warning(o, "invalid ICC tag structure length");
// v2 6.5.17
uint32_t sig = u32be(tag);
if (sig == 0x6D6C7563 /* mluc */)
return parse_icc_mluc(o, profile + tag_offset, tag_length);
if (sig != 0x64657363 /* desc */)
return add_warning(o, "invalid ICC 'desc' structure signature");
if (tag_length < 12)
return add_warning(o, "invalid ICC 'desc' structure length");
uint32_t count = u32be(tag + 8);
if (tag_length < 12 + count)
return add_warning(o, "invalid ICC 'desc' structure length");
// Double-ensure a trailing NUL byte.
char name[count + 1];
memcpy(name, tag + 12, count);
name[count] = 0;
return jv_set(o, jv_string("ICC"),
JV_OBJECT(jv_string("name"), jv_string(name),
jv_string("version"), jv_number(2)));
}
static jv
parse_icc(jv o, const uint8_t *profile, size_t profile_len)
{
// v2 6, v4 7
if (profile_len < 132)
return add_warning(o, "ICC profile too short");
if (u32be(profile) != profile_len)
return add_warning(o, "ICC profile size mismatch");
// TODO(p): May decode more of the header fields, and validate them.
// Need to check both v2 and v4, this is all fairly annoying.
uint32_t count = u32be(profile + 128);
if (132 + count * 12 > profile_len)
return add_warning(o, "unexpected end of ICC profile");
for (uint32_t i = 0; i < count; i++) {
const uint8_t *entry = profile + 132 + i * 12;
uint32_t sig = u32be(&entry[0]);
uint32_t off = u32be(&entry[4]);
uint32_t len = u32be(&entry[8]);
// v2 6.4.32, v4 9.2.41
if (sig == 0x64657363 /* desc */)
return parse_icc_desc(o, profile, profile_len, off, len);
}
// The description is required, so this should be unreachable.
return jv_set(o, jv_string("ICC"), jv_bool(true));
}
// --- Multi-Picture Format ----------------------------------------------------
enum {

View File

@ -348,6 +348,8 @@ error:
int
main(int argc, char *argv[])
{
(void) parse_icc;
// XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes.
// Usage: find . -iname *.png -print0 | xargs -0 ./pnginfo
for (int i = 1; i < argc; i++) {

129
tools/webpinfo.c Normal file
View File

@ -0,0 +1,129 @@
//
// webpinfo.c: acquire information about WebP files in JSON format
//
// Copyright (c) 2021, Přemysl Eric Janouch <p@janouch.name>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
#include "info.h"
#include <jv.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// --- WebP --------------------------------------------------------------------
// https://github.com/webmproject/libwebp/blob/master/doc/webp-container-spec.txt
// https://github.com/webmproject/libwebp/blob/master/doc/webp-lossless-bitstream-spec.txt
// https://datatracker.ietf.org/doc/html/rfc6386
static jv
parse_webp(jv o, const uint8_t *p, size_t len)
{
// libwebp won't let us simply iterate over all chunks, so handroll it.
if (len < 12 || memcmp(p, "RIFF", 4) || memcmp(p + 8, "WEBP", 4))
return add_error(o, "not a WEBP file");
// TODO(p): This can still be parseable.
// TODO(p): Warn on trailing data.
uint32_t size = unle.u32(p + 4);
if (8 + size < len)
return add_error(o, "truncated file");
const uint8_t *end = p + 8 + size;
p += 12;
jv chunks = jv_array();
while (p < end) {
if (end - p < 8) {
o = add_warning(o, "framing mismatch");
printf("%ld", end - p);
break;
}
uint32_t chunk_size = unle.u32(p + 4);
uint32_t chunk_advance = (chunk_size + 1) & ~1;
if (p + 8 + chunk_advance > end) {
o = add_warning(o, "runaway chunk payload");
break;
}
char fourcc[5] = "";
memcpy(fourcc, p, 4);
chunks = jv_array_append(chunks, jv_string(fourcc));
p += 8;
// TODO(p): Decode VP8 and VP8L chunk metadata.
if (!strcmp(fourcc, "EXIF"))
o = parse_exif(o, p, chunk_size);
if (!strcmp(fourcc, "ICCP"))
o = parse_icc(o, p, chunk_size);
p += chunk_advance;
}
return jv_set(o, jv_string("chunks"), chunks);
}
// --- I/O ---------------------------------------------------------------------
static jv
do_file(const char *filename, jv o)
{
const char *err = NULL;
FILE *fp = fopen(filename, "rb");
if (!fp) {
err = strerror(errno);
goto error;
}
uint8_t *data = NULL, buf[256 << 10];
size_t n, len = 0;
while ((n = fread(buf, sizeof *buf, sizeof buf / sizeof *buf, fp))) {
data = realloc(data, len + n);
memcpy(data + len, buf, n);
len += n;
}
if (ferror(fp)) {
err = strerror(errno);
goto error_read;
}
o = parse_webp(o, data, len);
error_read:
fclose(fp);
free(data);
error:
if (err)
o = add_error(o, err);
return o;
}
int
main(int argc, char *argv[])
{
(void) parse_psir;
// XXX: Can't use `xargs -P0`, there's a risk of non-atomic writes.
// Usage: find . -iname *.png -print0 | xargs -0 ./pnginfo
for (int i = 1; i < argc; i++) {
const char *filename = argv[i];
jv o = jv_object();
o = jv_object_set(o, jv_string("filename"), jv_string(filename));
o = do_file(filename, o);
jv_dumpf(o, stdout, 0 /* Might consider JV_PRINT_SORTED. */);
fputc('\n', stdout);
}
return 0;
}