/* 
A benchmark of 8-bit vs 16-bit blending for ColorFromPalette when
filling 2,000 LEDs on AVR.
  * on the left: 8-bit indexing
  * on the right: 16-bit indexing
  * uncomment one of the `TYPE_TO_TEST` macros to choose amongst the contenders

Results:
  * 16-colour RGB palettes
    * 12.5ms vs 16.7ms = 35% slower
      * worst case. The awkward bit-shift amounts really penalise AVR here.
  * 32-colour RGB palettes
    * 11.1ms vs 13.6ms = 23% slower
      * the bit-shifts are slightly more amenable to the AVR instruction
        set, so both versions are a little faster than the 16-colour versions.
  * 256-colour RGB palettes
    * 4.7ms vs 11.3ms = 136% slower
      * The 8-bit variant is twice the speed of any other variant. The
        code doesn't do any blending, it simply picks a colour directly
        from the 256-colour palette.
        The 16-bit variant is the fastest of the 16-bit contenders as it needs
        no bit-shifting.

See:
  * https://github.com/FastLED/FastLED/pull/202
  * https://github.com/FastLED/FastLED/pull/1687

*/

#define TYPE_TO_TEST pal16
// #define TYPE_TO_TEST pal32
// #define TYPE_TO_TEST pal256

#include <FastLED.h>
#include "ColorFromPalette16bit.h"


#define kMatrixWidth 40
#define kMatrixHeight 50
#define NUM_LEDS ((kMatrixWidth) * (kMatrixHeight))

CRGB leds[NUM_LEDS];
CRGBPalette16 pal16 = RainbowStripeColors_p;
CRGBPalette32 pal32;
CRGBPalette256 pal256;

void setup() {
  FastLED.addLeds<WS2812B, 3, GRB>(leds, NUM_LEDS);
  FastLED.addLeds<WS2812B, 4, GRB>(leds, NUM_LEDS);
  Serial.begin(2000000);
  // stretch the 16-colour palette to fill the 32-colour palette
  for (int index = 0; index < 32; index++) {
    pal32[index] = ColorFromPalette(pal16, (uint16_t)(index << 11), 255, LINEARBLEND);
  }
  // stretch the 16-colour palette to fill the 256-colour palette
  for (int index = 0; index < 256; index++) {
    pal256[index] = ColorFromPalette(pal16, (uint16_t)(index << 8), 255, LINEARBLEND);
  }
}

void loop() {
  const int iterations = 1;
  static uint16_t indexstart = 0;
  indexstart += 127;

  uint32_t orig_us1 = micros();
  for (uint32_t i = 0; i < iterations; i++) {
    uint16_t index = indexstart;
    for (uint16_t ledno = 0; ledno < NUM_LEDS; ledno++) {
      leds[ledno] = ColorFromPalette(TYPE_TO_TEST, (uint8_t) (index >> 8), 255, LINEARBLEND);
      index += 9;
    }
  }
  uint32_t orig_us2 = micros();
  FastLED[0].showLeds();

  uint32_t fix_us1 = micros();
  for (uint32_t i = 0; i < iterations; i++) {
    uint16_t index = indexstart;
    for (uint16_t ledno = 0; ledno < NUM_LEDS; ledno++) {
      leds[ledno] = ColorFromPalette(TYPE_TO_TEST, index, 255, LINEARBLEND);
      index += 9;
    }
  }
  uint32_t fix_us2 = micros();
  FastLED[1].showLeds();

  Serial.print(float(orig_us2 - orig_us1) / iterations);
  Serial.print("μs vs ");
  Serial.print(float(fix_us2 - fix_us1) / iterations);
  Serial.print("μs  % change: ");
  Serial.println(float(fix_us2 - fix_us1) / float(orig_us2 - orig_us1) * 100.f - 100.f);
}
mega:SCL
mega:SDA
mega:AREF
mega:GND.1
mega:13
mega:12
mega:11
mega:10
mega:9
mega:8
mega:7
mega:6
mega:5
mega:4
mega:3
mega:2
mega:1
mega:0
mega:14
mega:15
mega:16
mega:17
mega:18
mega:19
mega:20
mega:21
mega:5V.1
mega:5V.2
mega:22
mega:23
mega:24
mega:25
mega:26
mega:27
mega:28
mega:29
mega:30
mega:31
mega:32
mega:33
mega:34
mega:35
mega:36
mega:37
mega:38
mega:39
mega:40
mega:41
mega:42
mega:43
mega:44
mega:45
mega:46
mega:47
mega:48
mega:49
mega:50
mega:51
mega:52
mega:53
mega:GND.4
mega:GND.5
mega:IOREF
mega:RESET
mega:3.3V
mega:5V
mega:GND.2
mega:GND.3
mega:VIN
mega:A0
mega:A1
mega:A2
mega:A3
mega:A4
mega:A5
mega:A6
mega:A7
mega:A8
mega:A9
mega:A10
mega:A11
mega:A12
mega:A13
mega:A14
mega:A15
neopixels1:DOUT
neopixels1:VDD
neopixels1:VSS
neopixels1:DIN
neopixels2:DOUT
neopixels2:VDD
neopixels2:VSS
neopixels2:DIN