tdv/src/dictzip-input-stream.c

/*
 * dictzip-input-stream.c: dictzip GIO stream reader
 *
 * Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com>
 * All rights reserved.
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include <glib.h>
#include <gio/gio.h>

#include <zlib.h>

#include "utils.h"
#include "dictzip-input-stream.h"


// --- Errors ------------------------------------------------------------------

GQuark
dictzip_error_quark (void)
{
	return g_quark_from_static_string ("dictzip-error-quark");
}

// --- dictzip utilities -------------------------------------------------------

static void
free_gzip_header (gz_header *gzh)
{
	g_free (gzh->comment);  gzh->comment = NULL;
	g_free (gzh->extra);    gzh->extra   = NULL;
	g_free (gzh->name);     gzh->name    = NULL;
}

/* Reading the header in manually due to stupidity of the ZLIB API. */
static gboolean
read_gzip_header (GInputStream *is, gz_header *gzh,
	goffset *first_block_offset, GError **error)
{
	assert (is != NULL);
	assert (gzh != NULL);

	GDataInputStream *dis = g_data_input_stream_new (is);
	g_data_input_stream_set_byte_order (dis,
		G_DATA_STREAM_BYTE_ORDER_LITTLE_ENDIAN);
	g_filter_input_stream_set_close_base_stream
		(G_FILTER_INPUT_STREAM (dis), FALSE);

	GError *err = NULL;
	memset (gzh, 0, sizeof *gzh);

	// File header identification
	if (g_data_input_stream_read_byte (dis, NULL, &err) != 31
	 || g_data_input_stream_read_byte (dis, NULL, &err) != 139)
	{
		if (err)
			g_propagate_error (error, err);
		else
			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
				"wrong header magic");
		goto error_own;
	}

	// Compression method, only "deflate" is supported here
	if (g_data_input_stream_read_byte (dis, NULL, &err) != Z_DEFLATED)
	{
		if (err)
			g_propagate_error (error, err);
		else
			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
				"unsupported compression method");
		goto error_own;
	}

	guint flags = g_data_input_stream_read_byte (dis, NULL, &err);
	if (err) goto error;

	gzh->text = ((flags & 1) != 0);
	gzh->hcrc = ((flags & 2) != 0);

	gzh->time = g_data_input_stream_read_uint32 (dis, NULL, &err);
	if (err) goto error;

	gzh->xflags = g_data_input_stream_read_byte (dis, NULL, &err);
	if (err) goto error;

	gzh->os = g_data_input_stream_read_byte (dis, NULL, &err);
	if (err) goto error;

	if (flags & 4)
	{
		gzh->extra_len = g_data_input_stream_read_uint16 (dis, NULL, &err);
		if (err) goto error;
		gzh->extra_max = gzh->extra_len;

		gzh->extra = g_malloc (gzh->extra_len);
		gssize read = g_input_stream_read (G_INPUT_STREAM (dis),
			gzh->extra, gzh->extra_len, NULL, &err);
		if (err) goto error;

		if (read != gzh->extra_len)
		{
			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
				"unexpected end of file");
			goto error_own;
		}
	}

	if (flags & 8)
	{
		gzh->name = (Bytef *) stream_read_string (dis, &err);
		if (err) goto error;
		gzh->name_max = strlen ((char *) gzh->name) + 1;
	}

	if (flags & 16)
	{
		gzh->comment = (Bytef *) stream_read_string (dis, &err);
		if (err) goto error;
		gzh->comm_max = strlen ((char *) gzh->comment) + 1;
	}

	goffset header_size_sans_crc = g_seekable_tell (G_SEEKABLE (dis));

	if (!gzh->hcrc)
		*first_block_offset = header_size_sans_crc;
	else
	{
		*first_block_offset = header_size_sans_crc + 2;
		uLong header_crc = g_data_input_stream_read_uint16 (dis, NULL, &err);
		if (err) goto error;

		g_seekable_seek (G_SEEKABLE (is), 0, G_SEEK_SET, NULL, &err);
		if (err) goto error;

		gpointer buf = g_malloc (header_size_sans_crc);
		g_input_stream_read (is, buf, header_size_sans_crc, NULL, &err);
		if (err) goto error;

		uLong crc = crc32 (0, NULL, 0);
		crc = crc32 (crc, buf, header_size_sans_crc);
		g_free (buf);

		if (header_crc != (guint16) crc)
		{
			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
				"header checksum mismatch");
			goto error_own;
		}
	}

	gzh->done = 1;
	g_object_unref (dis);
	return TRUE;

error:
	g_propagate_error (error, err);
error_own:
	free_gzip_header (gzh);
	g_object_unref (dis);
	return FALSE;
}

static guint16 *
read_random_access_field (const gz_header *gzh,
	gsize *chunk_length, gsize *n_chunks, GError **error)
{
	if (!gzh->extra)
	{
		g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
			"no 'extra' field within the header");
		return NULL;
	}

	guchar *extra_iterator = gzh->extra;
	guchar *extra_end = gzh->extra + gzh->extra_len;

	guint16 *chunks = NULL;

	while (extra_iterator <= extra_end - 4)
	{
		guchar *f = extra_iterator;

		guint16 length = f[2] | (f[3] << 8);
		extra_iterator += length + 4;
		if (extra_iterator > extra_end)
		{
			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
				"overflowing header subfield");
			g_free (chunks);
			return NULL;
		}

		if (f[0] != 'R' || f[1] != 'A')
			continue;

		if (chunks != NULL)
		{
			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
				"multiple RA subfields present in the header");
			g_free (chunks);
			return NULL;
		}

		guint16 version = f[4] | (f[5] << 8);
		if (version != 1)
		{
			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
				"unsupported RA subfield version");
			return NULL;
		}

		*chunk_length = f[6] | (f[7] << 8);
		if (chunk_length == 0)
		{
			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
				"invalid RA chunk length");
			return NULL;
		}

		*n_chunks = f[8] | (f[9] << 8);
		if ((gulong) (extra_iterator - f) < 10 + *n_chunks * 2)
		{
			g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
				"RA subfield overflow");
			return NULL;
		}

		chunks = g_malloc_n (*n_chunks, sizeof *chunks);

		guint i;
		for (i = 0; i < *n_chunks; i++)
			chunks[i] = f[10 + i * 2] + (f[10 + i * 2 + 1] << 8);
	}

	if (extra_iterator < extra_end - 4)
	{
		g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
			"invalid 'extra' field, subfield too short");
		g_free (chunks);
		return NULL;
	}

	return chunks;
}

// --- DictzipInputStream ------------------------------------------------------

static void dictzip_input_stream_finalize (GObject *gobject);

static void dictzip_input_stream_seekable_init
	(GSeekableIface *iface, gpointer iface_data);
static goffset dictzip_input_stream_tell (GSeekable *seekable);
static gboolean dictzip_input_stream_seek (GSeekable *seekable, goffset offset,
	GSeekType type, GCancellable *cancellable, GError **error);

static gssize dictzip_input_stream_read (GInputStream *stream, void *buffer,
	gsize count, GCancellable *cancellable, GError **error);
static gssize dictzip_input_stream_skip (GInputStream *stream, gsize count,
	GCancellable *cancellable, GError **error);

struct dictzip_input_stream_private
{
	GFileInfo  * file_info;            //!< File information from gzip header

	goffset      first_block_offset;   //!< Offset to the first block/chunk
	gsize        chunk_length;         //!< Uncompressed chunk length
	gsize        n_chunks;             //!< Number of chunks in file
	guint16    * chunks;               //!< Chunk sizes after compression

	z_stream     zs;                   //!< zlib decompression context
	gpointer     input_buffer;         //!< Input buffer

	goffset      offset;               //!< Current offset
	gpointer   * decompressed;         //!< Array of decompressed chunks
	gsize        last_chunk_length;    //!< Size of the last chunk
};

G_DEFINE_TYPE_EXTENDED (DictzipInputStream, dictzip_input_stream,
	G_TYPE_FILTER_INPUT_STREAM, 0,
	G_IMPLEMENT_INTERFACE (G_TYPE_SEEKABLE, dictzip_input_stream_seekable_init))

static gboolean seekable_true  (G_GNUC_UNUSED GSeekable *x) { return TRUE;  }
static gboolean seekable_false (G_GNUC_UNUSED GSeekable *x) { return FALSE; }

static void
dictzip_input_stream_seekable_init
	(GSeekableIface *iface, G_GNUC_UNUSED gpointer iface_data)
{
	iface->tell            = dictzip_input_stream_tell;
	iface->can_seek        = seekable_true;
	iface->seek            = dictzip_input_stream_seek;
	iface->can_truncate    = seekable_false;
}

static void
dictzip_input_stream_class_init (DictzipInputStreamClass *klass)
{
	g_type_class_add_private (klass, sizeof (DictzipInputStreamPrivate));

	GInputStreamClass *stream_class = G_INPUT_STREAM_CLASS (klass);
	stream_class->read_fn  = dictzip_input_stream_read;
	stream_class->skip     = dictzip_input_stream_skip;

	GObjectClass *object_class = G_OBJECT_CLASS (klass);
	object_class->finalize = dictzip_input_stream_finalize;
}

static void
dictzip_input_stream_init (DictzipInputStream *self)
{
	self->priv = G_TYPE_INSTANCE_GET_PRIVATE (self,
		DICTZIP_TYPE_INPUT_STREAM, DictzipInputStreamPrivate);
}

static void
dictzip_input_stream_finalize (GObject *gobject)
{
	DictzipInputStreamPrivate *priv = DICTZIP_INPUT_STREAM (gobject)->priv;

	if (priv->file_info)
		g_object_unref (priv->file_info);
	g_free (priv->chunks);
	g_free (priv->input_buffer);
	inflateEnd (&priv->zs);

	guint i;
	for (i = 0; i < priv->n_chunks; i++)
		g_free (priv->decompressed[i]);
	g_free (priv->decompressed);

	G_OBJECT_CLASS (dictzip_input_stream_parent_class)->finalize (gobject);
}

static goffset
dictzip_input_stream_tell (GSeekable *seekable)
{
	return DICTZIP_INPUT_STREAM (seekable)->priv->offset;
}

static gpointer
inflate_chunk (DictzipInputStream *self,
	guint chunk_id, gsize *inflated_length, GError **error)
{
	DictzipInputStreamPrivate *priv = self->priv;
	g_return_val_if_fail (chunk_id < priv->n_chunks, NULL);

	GInputStream *base_stream = G_FILTER_INPUT_STREAM (self)->base_stream;

	guint i;
	goffset offset = priv->first_block_offset;
	for (i = 0; i < chunk_id; i++)
		offset += priv->chunks[i];

	if (!g_seekable_seek (G_SEEKABLE (base_stream),
		offset, G_SEEK_SET, NULL, error))
		return NULL;

	gssize read = g_input_stream_read (base_stream, priv->input_buffer,
		priv->chunks[chunk_id], NULL, error);
	if (read == -1)
		return NULL;

	if (read != priv->chunks[chunk_id])
	{
		g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
			"premature end of file");
		return NULL;
	}

	int z_err;
	gpointer chunk_data = g_malloc (priv->chunk_length);

	priv->zs.next_in   = (Bytef *) priv->input_buffer;
	priv->zs.avail_in  = read;
	priv->zs.total_in  = 0;

	priv->zs.next_out  = (Bytef *) chunk_data;
	priv->zs.avail_out = priv->chunk_length;
	priv->zs.total_out = 0;

	z_err = inflateReset (&priv->zs);
	if (z_err != Z_OK)
		goto error_zlib;

	z_err = inflate (&priv->zs, Z_BLOCK);
	if (z_err != Z_OK)
		goto error_zlib;

	*inflated_length = priv->zs.total_out;
	return chunk_data;

error_zlib:
	g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
		"failed to inflate the requested block: %s", zError (z_err));
	g_free (chunk_data);
	return NULL;
}

static gpointer
get_chunk (DictzipInputStream *self, guint chunk_id, GError **error)
{
	DictzipInputStreamPrivate *priv = self->priv;
	gpointer chunk = priv->decompressed[chunk_id];
	if (!chunk)
	{
		/* Just inflating the file piece by piece as needed. */
		gsize chunk_size;
		chunk = inflate_chunk (self, chunk_id, &chunk_size, error);
		if (!chunk)
			return NULL;

		if (chunk_id + 1 == priv->n_chunks)
			priv->last_chunk_length = chunk_size;
		else if (chunk_size < priv->chunk_length)
		{
			g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
				"inflated dictzip chunk is too short");
			g_free (chunk);
			return NULL;
		}

		priv->decompressed[chunk_id] = chunk;
	}
	return chunk;
}

static gboolean
dictzip_input_stream_seek (GSeekable *seekable, goffset offset,
	GSeekType type, GCancellable *cancellable, GError **error)
{
	if (g_cancellable_set_error_if_cancelled (cancellable, error))
		return FALSE;

	if (type == G_SEEK_END)
	{
		/* This could be implemented by retrieving the last chunk
		 * and deducing the filesize, should the functionality be needed. */
		g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
			"I don't know where the stream ends, cannot seek there");
		return FALSE;
	}

	DictzipInputStream *self = DICTZIP_INPUT_STREAM (seekable);
	goffset new_offset;

	if (type == G_SEEK_SET)
		new_offset = offset;
	else if (type == G_SEEK_CUR)
		new_offset = self->priv->offset + offset;
	else
		g_assert_not_reached ();

	if (new_offset < 0)
	{
		g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
			"cannot seek before the start of data");
		return FALSE;
	}

	self->priv->offset = new_offset;
	return TRUE;
}

static gssize
dictzip_input_stream_read (GInputStream *stream, void *buffer,
	gsize count, GCancellable *cancellable, GError **error)
{
	if (g_cancellable_set_error_if_cancelled (cancellable, error))
		return -1;

	DictzipInputStream *self = DICTZIP_INPUT_STREAM (stream);
	DictzipInputStreamPrivate *priv = self->priv;
	gssize read = 0;

	guint chunk_id     = priv->offset / priv->chunk_length;
	guint chunk_offset = priv->offset % priv->chunk_length;

	do
	{
		if (chunk_id >= priv->n_chunks)
			return read;

		gpointer chunk = get_chunk (self, chunk_id, error);
		if (!chunk)
			return -1;

		glong to_copy;
		if (chunk_id + 1 == priv->n_chunks)
			// Set by the call to get_chunk().
			to_copy = priv->last_chunk_length - chunk_offset;
		else
			to_copy = priv->chunk_length - chunk_offset;

		if (to_copy > (glong) count)
			to_copy = count;

		if (to_copy > 0)
		{
			memcpy (buffer, chunk + chunk_offset, to_copy);
			buffer += to_copy;
			priv->offset += to_copy;
			count -= to_copy;
			read += to_copy;
		}

		chunk_id++;
		chunk_offset = 0;
	}
	while (count);

	return read;
}

static gssize
dictzip_input_stream_skip (GInputStream *stream, gsize count,
	GCancellable *cancellable, GError **error)
{
	if (!dictzip_input_stream_seek (G_SEEKABLE (stream), count,
		G_SEEK_CUR, cancellable, error))
		return -1;

	return count;
}

/** Create an input stream for the underlying dictzip file. */
DictzipInputStream *
dictzip_input_stream_new (GInputStream *base_stream, GError **error)
{
	g_return_val_if_fail (G_IS_INPUT_STREAM (base_stream), NULL);

	if (!G_IS_SEEKABLE (base_stream)
	 || !g_seekable_can_seek (G_SEEKABLE (base_stream)))
	{
		g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_NOT_SEEKABLE,
			"the underlying stream isn't seekable");
		return NULL;
	}

	GError *err = NULL;
	DictzipInputStream *self = g_object_new (DICTZIP_TYPE_INPUT_STREAM,
		"base-stream", base_stream, "close-base-stream", FALSE, NULL);
	DictzipInputStreamPrivate *priv = self->priv;

	/* Decode the header. */
	gz_header gzh;
	if (!read_gzip_header (G_INPUT_STREAM (base_stream),
		&gzh, &priv->first_block_offset, &err))
	{
		g_propagate_error (error, err);
		goto error;
	}

	priv->chunks = read_random_access_field (&gzh,
		&priv->chunk_length, &priv->n_chunks, &err);
	if (err)
	{
		g_propagate_error (error, err);
		goto error;
	}

	if (!priv->chunks)
	{
		g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,
			"not a dictzip file");
		goto error;
	}

	/* Store file information. */
	priv->file_info = g_file_info_new ();

	if (gzh.time != 0)
	{
		GTimeVal m_time = { gzh.time, 0 };
		g_file_info_set_modification_time (priv->file_info, &m_time);
	}

	if (gzh.name && *gzh.name)
		g_file_info_set_name (priv->file_info, (gchar *) gzh.name);

	/* Initialise zlib. */
	int z_err;
	z_err = inflateInit2 (&priv->zs, -15);
	if (z_err != Z_OK)
	{
		g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
			"zlib initialisation failed: %s", zError (z_err));
		goto error;
	}

	priv->input_buffer = g_malloc (65536);
	priv->decompressed = g_new0 (gpointer, priv->n_chunks);
	priv->last_chunk_length = -1; // We don't know yet.

	free_gzip_header (&gzh);
	return self;

error:
	free_gzip_header (&gzh);
	g_object_unref (self);
	return NULL;
}

/** Return file information for the compressed file. */
GFileInfo *
dictzip_input_stream_get_file_info (DictzipInputStream *self)
{
	g_return_val_if_fail (DICTZIP_IS_INPUT_STREAM (self), NULL);

	DictzipInputStreamPrivate *priv = self->priv;
	return priv->file_info;
}
Add a class to handle dictzip files Provides pseudo-random access to dictionary files compressed using dictzip. It doesn't implement a cache, it just loads missing chunks until it has the whole file. I'm not sure if discarding not recently used chunks is really a useful feature. If there _was_ a way to get noticed when system memory is low, I think the best way to handle that event would be to simply release it all. All in all, this is pretty useless. But it was interesting to write. This has yet to be integrated into the application proper. 2013-07-14 20:40:58 +02:00			`/*`
			`* dictzip-input-stream.c: dictzip GIO stream reader`
			`*`
			`* Copyright (c) 2013, Přemysl Janouch <p.janouch@gmail.com>`
			`* All rights reserved.`
			`*`
			`* Permission to use, copy, modify, and/or distribute this software for any`
			`* purpose with or without fee is hereby granted, provided that the above`
			`* copyright notice and this permission notice appear in all copies.`
			`*`
			`* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES`
			`* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF`
			`* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY`
			`* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES`
			`* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION`
			`* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN`
			`* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.`
			`*`
			`*/`

			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <string.h>`
			`#include <assert.h>`

			`#include <glib.h>`
			`#include <gio/gio.h>`

			`#include <zlib.h>`

			`#include "utils.h"`
			`#include "dictzip-input-stream.h"`


			`// --- Errors ------------------------------------------------------------------`

			`GQuark`
			`dictzip_error_quark (void)`
			`{`
			`return g_quark_from_static_string ("dictzip-error-quark");`
			`}`

			`// --- dictzip utilities -------------------------------------------------------`

			`static void`
			`free_gzip_header (gz_header *gzh)`
			`{`
			`g_free (gzh->comment); gzh->comment = NULL;`
			`g_free (gzh->extra); gzh->extra = NULL;`
			`g_free (gzh->name); gzh->name = NULL;`
			`}`

			`/* Reading the header in manually due to stupidity of the ZLIB API. */`
			`static gboolean`
			`read_gzip_header (GInputStream is, gz_header gzh,`
			`goffset first_block_offset, GError *error)`
			`{`
			`assert (is != NULL);`
			`assert (gzh != NULL);`

			`GDataInputStream *dis = g_data_input_stream_new (is);`
			`g_data_input_stream_set_byte_order (dis,`
			`G_DATA_STREAM_BYTE_ORDER_LITTLE_ENDIAN);`
			`g_filter_input_stream_set_close_base_stream`
			`(G_FILTER_INPUT_STREAM (dis), FALSE);`

			`GError *err = NULL;`
			`memset (gzh, 0, sizeof *gzh);`

			`// File header identification`
			`if (g_data_input_stream_read_byte (dis, NULL, &err) != 31`
			`\|\| g_data_input_stream_read_byte (dis, NULL, &err) != 139)`
			`{`
			`if (err)`
			`g_propagate_error (error, err);`
			`else`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,`
			`"wrong header magic");`
			`goto error_own;`
			`}`

			`// Compression method, only "deflate" is supported here`
			`if (g_data_input_stream_read_byte (dis, NULL, &err) != Z_DEFLATED)`
			`{`
			`if (err)`
			`g_propagate_error (error, err);`
			`else`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,`
			`"unsupported compression method");`
			`goto error_own;`
			`}`

			`guint flags = g_data_input_stream_read_byte (dis, NULL, &err);`
			`if (err) goto error;`

			`gzh->text = ((flags & 1) != 0);`
			`gzh->hcrc = ((flags & 2) != 0);`

			`gzh->time = g_data_input_stream_read_uint32 (dis, NULL, &err);`
			`if (err) goto error;`

			`gzh->xflags = g_data_input_stream_read_byte (dis, NULL, &err);`
			`if (err) goto error;`

			`gzh->os = g_data_input_stream_read_byte (dis, NULL, &err);`
			`if (err) goto error;`

			`if (flags & 4)`
			`{`
			`gzh->extra_len = g_data_input_stream_read_uint16 (dis, NULL, &err);`
			`if (err) goto error;`
			`gzh->extra_max = gzh->extra_len;`

			`gzh->extra = g_malloc (gzh->extra_len);`
			`gssize read = g_input_stream_read (G_INPUT_STREAM (dis),`
			`gzh->extra, gzh->extra_len, NULL, &err);`
			`if (err) goto error;`

			`if (read != gzh->extra_len)`
			`{`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,`
			`"unexpected end of file");`
			`goto error_own;`
			`}`
			`}`

			`if (flags & 8)`
			`{`
			`gzh->name = (Bytef *) stream_read_string (dis, &err);`
			`if (err) goto error;`
			`gzh->name_max = strlen ((char *) gzh->name) + 1;`
			`}`

			`if (flags & 16)`
			`{`
			`gzh->comment = (Bytef *) stream_read_string (dis, &err);`
			`if (err) goto error;`
			`gzh->comm_max = strlen ((char *) gzh->comment) + 1;`
			`}`

			`goffset header_size_sans_crc = g_seekable_tell (G_SEEKABLE (dis));`

			`if (!gzh->hcrc)`
			`*first_block_offset = header_size_sans_crc;`
			`else`
			`{`
			`*first_block_offset = header_size_sans_crc + 2;`
			`uLong header_crc = g_data_input_stream_read_uint16 (dis, NULL, &err);`
			`if (err) goto error;`

			`g_seekable_seek (G_SEEKABLE (is), 0, G_SEEK_SET, NULL, &err);`
			`if (err) goto error;`

			`gpointer buf = g_malloc (header_size_sans_crc);`
			`g_input_stream_read (is, buf, header_size_sans_crc, NULL, &err);`
			`if (err) goto error;`

			`uLong crc = crc32 (0, NULL, 0);`
			`crc = crc32 (crc, buf, header_size_sans_crc);`
			`g_free (buf);`

			`if (header_crc != (guint16) crc)`
			`{`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,`
			`"header checksum mismatch");`
			`goto error_own;`
			`}`
			`}`

			`gzh->done = 1;`
			`g_object_unref (dis);`
			`return TRUE;`

			`error:`
			`g_propagate_error (error, err);`
			`error_own:`
			`free_gzip_header (gzh);`
			`g_object_unref (dis);`
			`return FALSE;`
			`}`

			`static guint16 *`
			`read_random_access_field (const gz_header *gzh,`
			`gsize chunk_length, gsize n_chunks, GError **error)`
			`{`
			`if (!gzh->extra)`
			`{`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,`
			`"no 'extra' field within the header");`
			`return NULL;`
			`}`

			`guchar *extra_iterator = gzh->extra;`
			`guchar *extra_end = gzh->extra + gzh->extra_len;`

			`guint16 *chunks = NULL;`

			`while (extra_iterator <= extra_end - 4)`
			`{`
			`guchar *f = extra_iterator;`

			`guint16 length = f[2] \| (f[3] << 8);`
			`extra_iterator += length + 4;`
			`if (extra_iterator > extra_end)`
			`{`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,`
			`"overflowing header subfield");`
			`g_free (chunks);`
			`return NULL;`
			`}`

			`if (f[0] != 'R' \|\| f[1] != 'A')`
			`continue;`

			`if (chunks != NULL)`
			`{`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,`
			`"multiple RA subfields present in the header");`
			`g_free (chunks);`
			`return NULL;`
			`}`

			`guint16 version = f[4] \| (f[5] << 8);`
			`if (version != 1)`
			`{`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,`
			`"unsupported RA subfield version");`
			`return NULL;`
			`}`

			`*chunk_length = f[6] \| (f[7] << 8);`
			`if (chunk_length == 0)`
			`{`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,`
			`"invalid RA chunk length");`
			`return NULL;`
			`}`

			`*n_chunks = f[8] \| (f[9] << 8);`
			`if ((gulong) (extra_iterator - f) < 10 + n_chunks 2)`
			`{`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,`
			`"RA subfield overflow");`
			`return NULL;`
			`}`

			`chunks = g_malloc_n (n_chunks, sizeof chunks);`

			`guint i;`
			`for (i = 0; i < *n_chunks; i++)`
			`chunks[i] = f[10 + i * 2] + (f[10 + i * 2 + 1] << 8);`
			`}`

			`if (extra_iterator < extra_end - 4)`
			`{`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,`
			`"invalid 'extra' field, subfield too short");`
			`g_free (chunks);`
			`return NULL;`
			`}`

			`return chunks;`
			`}`

			`// --- DictzipInputStream ------------------------------------------------------`

			`static void dictzip_input_stream_finalize (GObject *gobject);`

			`static void dictzip_input_stream_seekable_init`
			`(GSeekableIface *iface, gpointer iface_data);`
			`static goffset dictzip_input_stream_tell (GSeekable *seekable);`
			`static gboolean dictzip_input_stream_seek (GSeekable *seekable, goffset offset,`
			`GSeekType type, GCancellable cancellable, GError *error);`

			`static gssize dictzip_input_stream_read (GInputStream stream, void buffer,`
			`gsize count, GCancellable cancellable, GError *error);`
			`static gssize dictzip_input_stream_skip (GInputStream *stream, gsize count,`
			`GCancellable cancellable, GError *error);`

			`struct dictzip_input_stream_private`
			`{`
			`GFileInfo * file_info; //!< File information from gzip header`

			`goffset first_block_offset; //!< Offset to the first block/chunk`
			`gsize chunk_length; //!< Uncompressed chunk length`
			`gsize n_chunks; //!< Number of chunks in file`
			`guint16 * chunks; //!< Chunk sizes after compression`

			`z_stream zs; //!< zlib decompression context`
			`gpointer input_buffer; //!< Input buffer`

			`goffset offset; //!< Current offset`
			`gpointer * decompressed; //!< Array of decompressed chunks`
			`gsize last_chunk_length; //!< Size of the last chunk`
			`};`

			`G_DEFINE_TYPE_EXTENDED (DictzipInputStream, dictzip_input_stream,`
			`G_TYPE_FILTER_INPUT_STREAM, 0,`
			`G_IMPLEMENT_INTERFACE (G_TYPE_SEEKABLE, dictzip_input_stream_seekable_init))`

			`static gboolean seekable_true (G_GNUC_UNUSED GSeekable *x) { return TRUE; }`
			`static gboolean seekable_false (G_GNUC_UNUSED GSeekable *x) { return FALSE; }`

			`static void`
			`dictzip_input_stream_seekable_init`
			`(GSeekableIface *iface, G_GNUC_UNUSED gpointer iface_data)`
			`{`
			`iface->tell = dictzip_input_stream_tell;`
			`iface->can_seek = seekable_true;`
			`iface->seek = dictzip_input_stream_seek;`
			`iface->can_truncate = seekable_false;`
			`}`

			`static void`
			`dictzip_input_stream_class_init (DictzipInputStreamClass *klass)`
			`{`
			`g_type_class_add_private (klass, sizeof (DictzipInputStreamPrivate));`

			`GInputStreamClass *stream_class = G_INPUT_STREAM_CLASS (klass);`
			`stream_class->read_fn = dictzip_input_stream_read;`
			`stream_class->skip = dictzip_input_stream_skip;`

			`GObjectClass *object_class = G_OBJECT_CLASS (klass);`
			`object_class->finalize = dictzip_input_stream_finalize;`
			`}`

			`static void`
			`dictzip_input_stream_init (DictzipInputStream *self)`
			`{`
			`self->priv = G_TYPE_INSTANCE_GET_PRIVATE (self,`
			`DICTZIP_TYPE_INPUT_STREAM, DictzipInputStreamPrivate);`
			`}`

			`static void`
			`dictzip_input_stream_finalize (GObject *gobject)`
			`{`
			`DictzipInputStreamPrivate *priv = DICTZIP_INPUT_STREAM (gobject)->priv;`
Fix another error path 2013-07-18 19:47:07 +02:00
			`if (priv->file_info)`
			`g_object_unref (priv->file_info);`
Add a class to handle dictzip files Provides pseudo-random access to dictionary files compressed using dictzip. It doesn't implement a cache, it just loads missing chunks until it has the whole file. I'm not sure if discarding not recently used chunks is really a useful feature. If there _was_ a way to get noticed when system memory is low, I think the best way to handle that event would be to simply release it all. All in all, this is pretty useless. But it was interesting to write. This has yet to be integrated into the application proper. 2013-07-14 20:40:58 +02:00			`g_free (priv->chunks);`
			`g_free (priv->input_buffer);`
			`inflateEnd (&priv->zs);`

			`guint i;`
			`for (i = 0; i < priv->n_chunks; i++)`
			`g_free (priv->decompressed[i]);`
			`g_free (priv->decompressed);`

			`G_OBJECT_CLASS (dictzip_input_stream_parent_class)->finalize (gobject);`
			`}`

			`static goffset`
			`dictzip_input_stream_tell (GSeekable *seekable)`
			`{`
			`return DICTZIP_INPUT_STREAM (seekable)->priv->offset;`
			`}`

			`static gpointer`
			`inflate_chunk (DictzipInputStream *self,`
			`guint chunk_id, gsize inflated_length, GError *error)`
			`{`
			`DictzipInputStreamPrivate *priv = self->priv;`
			`g_return_val_if_fail (chunk_id < priv->n_chunks, NULL);`

			`GInputStream *base_stream = G_FILTER_INPUT_STREAM (self)->base_stream;`

			`guint i;`
			`goffset offset = priv->first_block_offset;`
			`for (i = 0; i < chunk_id; i++)`
			`offset += priv->chunks[i];`

			`if (!g_seekable_seek (G_SEEKABLE (base_stream),`
			`offset, G_SEEK_SET, NULL, error))`
			`return NULL;`

			`gssize read = g_input_stream_read (base_stream, priv->input_buffer,`
			`priv->chunks[chunk_id], NULL, error);`
			`if (read == -1)`
			`return NULL;`

			`if (read != priv->chunks[chunk_id])`
			`{`
			`g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,`
			`"premature end of file");`
			`return NULL;`
			`}`

			`int z_err;`
			`gpointer chunk_data = g_malloc (priv->chunk_length);`

			`priv->zs.next_in = (Bytef *) priv->input_buffer;`
			`priv->zs.avail_in = read;`
			`priv->zs.total_in = 0;`

			`priv->zs.next_out = (Bytef *) chunk_data;`
			`priv->zs.avail_out = priv->chunk_length;`
			`priv->zs.total_out = 0;`

			`z_err = inflateReset (&priv->zs);`
			`if (z_err != Z_OK)`
			`goto error_zlib;`

			`z_err = inflate (&priv->zs, Z_BLOCK);`
			`if (z_err != Z_OK)`
			`goto error_zlib;`

			`*inflated_length = priv->zs.total_out;`
			`return chunk_data;`

			`error_zlib:`
			`g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,`
			`"failed to inflate the requested block: %s", zError (z_err));`
			`g_free (chunk_data);`
			`return NULL;`
			`}`

			`static gpointer`
			`get_chunk (DictzipInputStream self, guint chunk_id, GError *error)`
			`{`
			`DictzipInputStreamPrivate *priv = self->priv;`
			`gpointer chunk = priv->decompressed[chunk_id];`
			`if (!chunk)`
			`{`
			`/* Just inflating the file piece by piece as needed. */`
			`gsize chunk_size;`
			`chunk = inflate_chunk (self, chunk_id, &chunk_size, error);`
			`if (!chunk)`
			`return NULL;`

			`if (chunk_id + 1 == priv->n_chunks)`
			`priv->last_chunk_length = chunk_size;`
			`else if (chunk_size < priv->chunk_length)`
			`{`
			`g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,`
			`"inflated dictzip chunk is too short");`
			`g_free (chunk);`
			`return NULL;`
			`}`

			`priv->decompressed[chunk_id] = chunk;`
			`}`
			`return chunk;`
			`}`

			`static gboolean`
			`dictzip_input_stream_seek (GSeekable *seekable, goffset offset,`
			`GSeekType type, GCancellable cancellable, GError *error)`
			`{`
			`if (g_cancellable_set_error_if_cancelled (cancellable, error))`
			`return FALSE;`

			`if (type == G_SEEK_END)`
			`{`
			`/* This could be implemented by retrieving the last chunk`
			`* and deducing the filesize, should the functionality be needed. */`
			`g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,`
			`"I don't know where the stream ends, cannot seek there");`
			`return FALSE;`
			`}`

			`DictzipInputStream *self = DICTZIP_INPUT_STREAM (seekable);`
			`goffset new_offset;`

			`if (type == G_SEEK_SET)`
			`new_offset = offset;`
			`else if (type == G_SEEK_CUR)`
			`new_offset = self->priv->offset + offset;`
			`else`
			`g_assert_not_reached ();`

			`if (new_offset < 0)`
			`{`
			`g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,`
			`"cannot seek before the start of data");`
			`return FALSE;`
			`}`

			`self->priv->offset = new_offset;`
			`return TRUE;`
			`}`

			`static gssize`
			`dictzip_input_stream_read (GInputStream stream, void buffer,`
			`gsize count, GCancellable cancellable, GError *error)`
			`{`
			`if (g_cancellable_set_error_if_cancelled (cancellable, error))`
			`return -1;`

			`DictzipInputStream *self = DICTZIP_INPUT_STREAM (stream);`
			`DictzipInputStreamPrivate *priv = self->priv;`
			`gssize read = 0;`

			`guint chunk_id = priv->offset / priv->chunk_length;`
			`guint chunk_offset = priv->offset % priv->chunk_length;`

			`do`
			`{`
			`if (chunk_id >= priv->n_chunks)`
			`return read;`

			`gpointer chunk = get_chunk (self, chunk_id, error);`
			`if (!chunk)`
			`return -1;`

			`glong to_copy;`
			`if (chunk_id + 1 == priv->n_chunks)`
			`// Set by the call to get_chunk().`
			`to_copy = priv->last_chunk_length - chunk_offset;`
			`else`
			`to_copy = priv->chunk_length - chunk_offset;`

			`if (to_copy > (glong) count)`
			`to_copy = count;`

			`if (to_copy > 0)`
			`{`
			`memcpy (buffer, chunk + chunk_offset, to_copy);`
			`buffer += to_copy;`
			`priv->offset += to_copy;`
			`count -= to_copy;`
			`read += to_copy;`
			`}`

			`chunk_id++;`
			`chunk_offset = 0;`
			`}`
			`while (count);`

			`return read;`
			`}`

			`static gssize`
			`dictzip_input_stream_skip (GInputStream *stream, gsize count,`
			`GCancellable cancellable, GError *error)`
			`{`
			`if (!dictzip_input_stream_seek (G_SEEKABLE (stream), count,`
			`G_SEEK_CUR, cancellable, error))`
			`return -1;`

			`return count;`
			`}`

			`/** Create an input stream for the underlying dictzip file. */`
			`DictzipInputStream *`
			`dictzip_input_stream_new (GInputStream base_stream, GError *error)`
			`{`
			`g_return_val_if_fail (G_IS_INPUT_STREAM (base_stream), NULL);`

			`if (!G_IS_SEEKABLE (base_stream)`
			`\|\| !g_seekable_can_seek (G_SEEKABLE (base_stream)))`
			`{`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_NOT_SEEKABLE,`
			`"the underlying stream isn't seekable");`
			`return NULL;`
			`}`

			`GError *err = NULL;`
			`DictzipInputStream *self = g_object_new (DICTZIP_TYPE_INPUT_STREAM,`
			`"base-stream", base_stream, "close-base-stream", FALSE, NULL);`
			`DictzipInputStreamPrivate *priv = self->priv;`

			`/* Decode the header. */`
			`gz_header gzh;`
			`if (!read_gzip_header (G_INPUT_STREAM (base_stream),`
			`&gzh, &priv->first_block_offset, &err))`
			`{`
			`g_propagate_error (error, err);`
			`goto error;`
			`}`

			`priv->chunks = read_random_access_field (&gzh,`
			`&priv->chunk_length, &priv->n_chunks, &err);`
			`if (err)`
			`{`
			`g_propagate_error (error, err);`
			`goto error;`
			`}`

			`if (!priv->chunks)`
			`{`
			`g_set_error (error, DICTZIP_ERROR, DICTZIP_ERROR_INVALID_HEADER,`
			`"not a dictzip file");`
			`goto error;`
			`}`

			`/* Store file information. */`
			`priv->file_info = g_file_info_new ();`

			`if (gzh.time != 0)`
			`{`
			`GTimeVal m_time = { gzh.time, 0 };`
			`g_file_info_set_modification_time (priv->file_info, &m_time);`
			`}`

			`if (gzh.name && *gzh.name)`
			`g_file_info_set_name (priv->file_info, (gchar *) gzh.name);`

			`/* Initialise zlib. */`
			`int z_err;`
			`z_err = inflateInit2 (&priv->zs, -15);`
			`if (z_err != Z_OK)`
			`{`
			`g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,`
			`"zlib initialisation failed: %s", zError (z_err));`
			`goto error;`
			`}`

			`priv->input_buffer = g_malloc (65536);`
			`priv->decompressed = g_new0 (gpointer, priv->n_chunks);`
			`priv->last_chunk_length = -1; // We don't know yet.`

			`free_gzip_header (&gzh);`
			`return self;`

			`error:`
			`free_gzip_header (&gzh);`
			`g_object_unref (self);`
			`return NULL;`
			`}`

			`/** Return file information for the compressed file. */`
			`GFileInfo *`
			`dictzip_input_stream_get_file_info (DictzipInputStream *self)`
			`{`
			`g_return_val_if_fail (DICTZIP_IS_INPUT_STREAM (self), NULL);`

			`DictzipInputStreamPrivate *priv = self->priv;`
			`return priv->file_info;`
			`}`