From 475c83618a043c88e7414b839228c28452658166 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Janouch?= Date: Sun, 13 Jul 2014 04:30:23 +0200 Subject: [PATCH] Only compile regex's once --- src/common.c | 106 +++++++++++++++++++++++++++++++------------------- src/kike.c | 45 ++++++++++++++------- src/zyklonb.c | 45 +++++++++++++-------- 3 files changed, 126 insertions(+), 70 deletions(-) diff --git a/src/common.c b/src/common.c index 283535f..fcb9742 100644 --- a/src/common.c +++ b/src/common.c @@ -1417,45 +1417,6 @@ set_boolean_if_valid (bool *out, const char *s) return true; } -static void -regerror_to_str (int code, const regex_t *preg, struct str *out) -{ - size_t required = regerror (code, preg, NULL, 0); - str_ensure_space (out, required); - out->len += regerror (code, preg, - out->str + out->len, out->alloc - out->len) - 1; -} - -static size_t regex_error_domain_tag; -#define REGEX_ERROR (error_resolve_domain (®ex_error_domain_tag)) - -enum -{ - REGEX_ERROR_COMPILATION_FAILED -}; - -static bool -regex_match (const char *regex, const char *s, struct error **e) -{ - regex_t re; - int err = regcomp (&re, regex, REG_EXTENDED | REG_NOSUB); - if (err) - { - struct str desc; - - str_init (&desc); - regerror_to_str (err, &re, &desc); - error_set (e, REGEX_ERROR, REGEX_ERROR_COMPILATION_FAILED, - "failed to compile regular expression: %s", desc.str); - str_free (&desc); - return false; - } - - bool result = regexec (&re, s, 0, NULL, 0) != REG_NOMATCH; - regfree (&re); - return result; -} - static bool read_line (FILE *fp, struct str *s) { @@ -1512,6 +1473,73 @@ xssl_get_error (SSL *ssl, int result, const char **error_info) } } +// --- Regular expressions ----------------------------------------------------- + +static size_t regex_error_domain_tag; +#define REGEX_ERROR (error_resolve_domain (®ex_error_domain_tag)) + +enum +{ + REGEX_ERROR_COMPILATION_FAILED +}; + +static regex_t * +regex_compile (const char *regex, int flags, struct error **e) +{ + regex_t *re = xmalloc (sizeof *re); + int err = regcomp (re, regex, flags); + if (!err) + return re; + + struct str desc; + str_init (&desc); + + size_t required = regerror (err, re, NULL, 0); + str_ensure_space (&desc, required); + desc.len += regerror (err, re, + desc.str + desc.len, desc.alloc - desc.len) - 1; + + free (re); + error_set (e, REGEX_ERROR, REGEX_ERROR_COMPILATION_FAILED, + "%s: %s", "failed to compile regular expression", desc.str); + str_free (&desc); + return NULL; +} + +static void +regex_free (void *regex) +{ + regfree (regex); + free (regex); +} + +// The cost of hashing a string is likely to be significantly smaller than that +// of compiling the whole regular expression anew, so here is a simple cache. +// Adding basic support for subgroups is easy: check `re_nsub' and output into +// a `struct str_vector' (if all we want is the substrings). + +static void +regex_cache_init (struct str_map *cache) +{ + str_map_init (cache); + cache->free = regex_free; +} + +static bool +regex_cache_match (struct str_map *cache, const char *regex, int flags, + const char *s, struct error **e) +{ + regex_t *re = str_map_find (cache, regex); + if (!re) + { + re = regex_compile (regex, flags, e); + if (!re) + return false; + str_map_set (cache, regex, re); + } + return regexec (re, s, 0, NULL, 0) != REG_NOMATCH; +} + // --- IRC utilities ----------------------------------------------------------- struct irc_message diff --git a/src/kike.c b/src/kike.c index d5a777c..0ce7092 100644 --- a/src/kike.c +++ b/src/kike.c @@ -103,13 +103,24 @@ enum validation_result #define IRC_NICKNAME_MAX 9 #define IRC_HOSTNAME_MAX 63 -// Anything to keep it as short as possible -#define SN "[0-9A-Za-z][-0-9A-Za-z]*[0-9A-Za-z]*" -#define N4 "[0-9]{1,3}" -#define N6 "[0-9ABCDEFabcdef]{1,}" +static bool +irc_regex_match (const char *regex, const char *s) +{ + static struct str_map cache; + static bool initialized; -#define LE "A-Za-z" -#define SP "\\[\\]\\\\`_^{|}" + if (!initialized) + { + regex_cache_init (&cache); + initialized = true; + } + + struct error *e = NULL; + bool result = regex_cache_match (&cache, regex, + REG_EXTENDED | REG_NOSUB, s, &e); + hard_assert (!e); + return result; +} static const char * irc_validate_to_str (enum validation_result result) @@ -124,14 +135,20 @@ irc_validate_to_str (enum validation_result result) } } -// TODO: at least cache the resulting `regex_t' in a `struct str_map' +// Anything to keep it as short as possible +#define SN "[0-9A-Za-z][-0-9A-Za-z]*[0-9A-Za-z]*" +#define N4 "[0-9]{1,3}" +#define N6 "[0-9ABCDEFabcdef]{1,}" + +#define LE "A-Za-z" +#define SP "\\[\\]\\\\`_^{|}" static enum validation_result irc_validate_hostname (const char *hostname) { if (!*hostname) return VALIDATION_ERROR_EMPTY; - if (!regex_match ("^" SN "(\\." SN ")*$", hostname, NULL)) + if (!irc_regex_match ("^" SN "(\\." SN ")*$", hostname)) return VALIDATION_ERROR_INVALID; if (strlen (hostname) > IRC_HOSTNAME_MAX) return VALIDATION_ERROR_TOO_LONG; @@ -141,11 +158,11 @@ irc_validate_hostname (const char *hostname) static bool irc_is_valid_hostaddr (const char *hostaddr) { - if (regex_match ("^" N4 "\\." N4 "\\." N4 "\\." N4 "$", hostaddr, NULL) - || regex_match ("^" N6 ":" N6 ":" N6 ":" N6 ":" - N6 ":" N6 ":" N6 ":" N6 "$", hostaddr, NULL) - || regex_match ("^0:0:0:0:0:(0|[Ff]{4}):" - N4 "\\." N4 "\\." N4 "\\." N4 "$", hostaddr, NULL)) + if (irc_regex_match ("^" N4 "\\." N4 "\\." N4 "\\." N4 "$", hostaddr) + || irc_regex_match ("^" N6 ":" N6 ":" N6 ":" N6 ":" + N6 ":" N6 ":" N6 ":" N6 "$", hostaddr) + || irc_regex_match ("^0:0:0:0:0:(0|[Ff]{4}):" + N4 "\\." N4 "\\." N4 "\\." N4 "$", hostaddr)) return true; return false; } @@ -162,7 +179,7 @@ irc_validate_nickname (const char *nickname) { if (!*nickname) return VALIDATION_ERROR_EMPTY; - if (!regex_match ("^[" LE SP "][-0-9" LE SP "]*$", nickname, NULL)) + if (!irc_regex_match ("^[" LE SP "][-0-9" LE SP "]*$", nickname)) return VALIDATION_ERROR_INVALID; if (strlen (nickname) > IRC_NICKNAME_MAX) return VALIDATION_ERROR_TOO_LONG; diff --git a/src/zyklonb.c b/src/zyklonb.c index 69c41c1..13e7f3b 100644 --- a/src/zyklonb.c +++ b/src/zyklonb.c @@ -118,6 +118,7 @@ enum struct bot_context { struct str_map config; ///< User configuration + regex_t *admin_re; ///< Regex to match our administrator int irc_fd; ///< Socket FD of the server struct str read_buffer; ///< Input yet to be processed @@ -140,6 +141,7 @@ bot_context_init (struct bot_context *self) str_map_init (&self->config); self->config.free = free; load_config_defaults (&self->config, g_config_table); + self->admin_re = NULL; self->irc_fd = -1; str_init (&self->read_buffer); @@ -160,6 +162,8 @@ static void bot_context_free (struct bot_context *self) { str_map_free (&self->config); + if (self->admin_re) + regex_free (self->admin_re); str_free (&self->read_buffer); // TODO: terminate the plugins properly before this is called @@ -1110,25 +1114,10 @@ is_private_message (const struct irc_message *msg) static bool is_sent_by_admin (struct bot_context *ctx, const struct irc_message *msg) { - const char *admin = str_map_find (&ctx->config, "admin"); - // No administrator set -> everyone is an administrator - if (!admin) + if (!ctx->admin_re) return true; - - // TODO: precompile the regex - struct error *e = NULL; - if (regex_match (admin, msg->prefix, NULL)) - return true; - - if (e) - { - print_error ("%s: %s", "invalid admin mask", e->message); - error_free (e); - return true; - } - - return false; + return regexec (ctx->admin_re, msg->prefix, 0, NULL, 0) != REG_NOMATCH; } static void respond_to_user (struct bot_context *ctx, const struct @@ -1591,6 +1580,26 @@ irc_connect (struct bot_context *ctx, struct error **e) return true; } +static bool +load_admin_regex (struct bot_context *ctx) +{ + hard_assert (!ctx->admin_re); + const char *admin = str_map_find (&ctx->config, "admin"); + + if (!admin) + return true; + + struct error *e = NULL; + ctx->admin_re = regex_compile (admin, REG_EXTENDED | REG_NOSUB, &e); + if (!e) + return true; + + print_error ("invalid configuration value for `%s': %s", + "admin", e->message); + error_free (e); + return false; +} + static void on_signal_pipe_readable (const struct pollfd *fd, struct bot_context *ctx) { @@ -1758,6 +1767,8 @@ main (int argc, char *argv[]) (poller_dispatcher_func) on_signal_pipe_readable, &ctx); plugin_load_all_from_config (&ctx); + if (!load_admin_regex (&ctx)) + exit (EXIT_FAILURE); if (!irc_connect (&ctx, &e)) { print_error ("%s", e->message);