Spectrum analyser: optimise the x:16:2 case

nncmpp CPU usage went from 2 to 1.7 percent, a 15% improvement.

Sort of worth it, given that it's a constant load.

The assembly certainly looks nicer.
This commit is contained in:
Přemysl Eric Janouch 2021-07-08 19:07:42 +02:00
parent fa4443a3ce
commit 20c8385f2e
Signed by: p
GPG Key ID: A0420B94F92B9493
1 changed files with 21 additions and 8 deletions

View File

@ -638,13 +638,13 @@ spectrum_decode_8 (struct spectrum *s, int sample)
{ {
size_t n = s->useful_bins; size_t n = s->useful_bins;
float *data = s->data + n; float *data = s->data + n;
int8_t *p = (int8_t *) s->buffer + sample * n * s->channels; for (int8_t *p = (int8_t *) s->buffer + sample * n * s->channels;
while (n--) n--; p += s->channels)
{ {
int32_t acc = 0; int32_t acc = 0;
for (int ch = 0; ch < s->channels; ch++) for (int ch = 0; ch < s->channels; ch++)
acc += *p++; acc += p[ch];
*data++ = (float) acc / -INT8_MIN / s->channels; *data++ = (float) acc / s->channels / -INT8_MIN;
} }
} }
@ -653,16 +653,25 @@ spectrum_decode_16 (struct spectrum *s, int sample)
{ {
size_t n = s->useful_bins; size_t n = s->useful_bins;
float *data = s->data + n; float *data = s->data + n;
int16_t *p = (int16_t *) s->buffer + sample * n * s->channels; for (int16_t *p = (int16_t *) s->buffer + sample * n * s->channels;
while (n--) n--; p += s->channels)
{ {
int32_t acc = 0; int32_t acc = 0;
for (int ch = 0; ch < s->channels; ch++) for (int ch = 0; ch < s->channels; ch++)
acc += *p++; acc += p[ch];
*data++ = (float) acc / -INT16_MIN / s->channels; *data++ = (float) acc / s->channels / -INT16_MIN;
} }
} }
static void
spectrum_decode_16_2 (struct spectrum *s, int sample)
{
size_t n = s->useful_bins;
float *data = s->data + n;
for (int16_t *p = (int16_t *) s->buffer + sample * n * 2; n--; p += 2)
*data++ = ((int32_t) p[0] + p[1]) / 2. / -INT16_MIN;
}
// - - Spectrum analysis - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - - Spectrum analysis - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static const char *spectrum_bars[] = static const char *spectrum_bars[] =
@ -803,6 +812,10 @@ spectrum_init (struct spectrum *s, char *format, int bars, struct error **e)
if (s->bits == 8) s->decode = spectrum_decode_8; if (s->bits == 8) s->decode = spectrum_decode_8;
if (s->bits == 16) s->decode = spectrum_decode_16; if (s->bits == 16) s->decode = spectrum_decode_16;
// Micro-optimize to achieve some piece of mind; it's weak but measurable
if (s->bits == 16 && s->channels == 2)
s->decode = spectrum_decode_16_2;
s->buffer_size = s->samples * s->useful_bins * s->bits / 8 * s->channels; s->buffer_size = s->samples * s->useful_bins * s->bits / 8 * s->channels;
s->buffer = xcalloc (1, s->buffer_size); s->buffer = xcalloc (1, s->buffer_size);