From 20c8385f2e3fc00305396a3aaa74cb1d30cecc8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Eric=20Janouch?= Date: Thu, 8 Jul 2021 19:07:42 +0200 Subject: [PATCH] Spectrum analyser: optimise the x:16:2 case nncmpp CPU usage went from 2 to 1.7 percent, a 15% improvement. Sort of worth it, given that it's a constant load. The assembly certainly looks nicer. --- nncmpp.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/nncmpp.c b/nncmpp.c index 485b269..b422298 100644 --- a/nncmpp.c +++ b/nncmpp.c @@ -638,13 +638,13 @@ spectrum_decode_8 (struct spectrum *s, int sample) { size_t n = s->useful_bins; float *data = s->data + n; - int8_t *p = (int8_t *) s->buffer + sample * n * s->channels; - while (n--) + for (int8_t *p = (int8_t *) s->buffer + sample * n * s->channels; + n--; p += s->channels) { int32_t acc = 0; for (int ch = 0; ch < s->channels; ch++) - acc += *p++; - *data++ = (float) acc / -INT8_MIN / s->channels; + acc += p[ch]; + *data++ = (float) acc / s->channels / -INT8_MIN; } } @@ -653,16 +653,25 @@ spectrum_decode_16 (struct spectrum *s, int sample) { size_t n = s->useful_bins; float *data = s->data + n; - int16_t *p = (int16_t *) s->buffer + sample * n * s->channels; - while (n--) + for (int16_t *p = (int16_t *) s->buffer + sample * n * s->channels; + n--; p += s->channels) { int32_t acc = 0; for (int ch = 0; ch < s->channels; ch++) - acc += *p++; - *data++ = (float) acc / -INT16_MIN / s->channels; + acc += p[ch]; + *data++ = (float) acc / s->channels / -INT16_MIN; } } +static void +spectrum_decode_16_2 (struct spectrum *s, int sample) +{ + size_t n = s->useful_bins; + float *data = s->data + n; + for (int16_t *p = (int16_t *) s->buffer + sample * n * 2; n--; p += 2) + *data++ = ((int32_t) p[0] + p[1]) / 2. / -INT16_MIN; +} + // - - Spectrum analysis - - - - - - - - - - - - - - - - - - - - - - - - - - - - static const char *spectrum_bars[] = @@ -803,6 +812,10 @@ spectrum_init (struct spectrum *s, char *format, int bars, struct error **e) if (s->bits == 8) s->decode = spectrum_decode_8; if (s->bits == 16) s->decode = spectrum_decode_16; + // Micro-optimize to achieve some piece of mind; it's weak but measurable + if (s->bits == 16 && s->channels == 2) + s->decode = spectrum_decode_16_2; + s->buffer_size = s->samples * s->useful_bins * s->bits / 8 * s->channels; s->buffer = xcalloc (1, s->buffer_size);