-- -- utm-filter.lua: filter out Google Analytics bullshit from URLs -- -- Copyright (c) 2015, Přemysl Janouch
-- -- Permission to use, copy, modify, and/or distribute this software for any -- purpose with or without fee is hereby granted. -- -- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -- SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION -- OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -- CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -- -- A list of useless URL parameters that don't affect page function local banned = { gclid = 1, utm_source = 1, utm_medium = 1, utm_term = 1, utm_content = 1, utm_campaign = 1, } -- Go through a parameter list and throw out any banned elements local do_args = function (args) local filtered = {} for part in args:gmatch ("[^&]+") do if not banned[part:match ("^[^=]*")] then table.insert (filtered, part) end end return table.concat (filtered, "&") end -- Filter parameters in both the query and the fragment part of an URL local do_single_url = function (url) return url:gsub ('^([^?#]*)%?([^#]*)', function (start, query) local clean = do_args (query) return #clean > 0 and start .. "?" .. clean or start end, 1):gsub ('^([^#]*)#(.*)', function (start, fragment) local clean = do_args (fragment) return #clean > 0 and start .. "#" .. clean or start end, 1) end local do_text = function (text) return text:gsub ('%f[%g]https?://%g+', do_single_url) end degesch.hook_irc (function (hook, server, line) local start, message = line:match ("^(.* :)(.*)$") return message and start .. do_text (message) or line end) degesch.hook_input (function (hook, buffer, input) return do_text (input) end)