#include <FastLED.h>


#define kMatrixWidth 16
#define kMatrixHeight 16
#define NUM_LEDS ((kMatrixWidth) * (kMatrixHeight))

CRGB leds[NUM_LEDS];

uint16_t XY(uint8_t x, uint8_t y) {
  if (x >= kMatrixWidth || y >= kMatrixHeight || x < 0 || y < 0)
    return -1;
  if (y & 1)
    x = kMatrixWidth - 1 - x;
  return x + (y * kMatrixWidth);
}

void setup() {
  FastLED.addLeds<WS2812B, 3, GRB>(leds, NUM_LEDS);
}

void loop() {
  static uint16_t startHue = 0;
  static uint16_t xPhase = 0;
  static uint16_t yPhase = 64 * 256;
  uint8_t pixelHue = startHue;
  for (uint16_t i = 0; i < 128; i++) {
    uint16_t x = 32767 + cos16(xPhase + i * 2 * 256);
    uint16_t y = 32767 + sin16(yPhase + i * 2 * 256);
    x /= 256 / (kMatrixWidth - 1);
    y /= 256 / (kMatrixHeight - 1);
    CRGB col = ColorFromPalette(RainbowColors_p, pixelHue, 255, LINEARBLEND);
    wu_pixel(x, y, & col);
    pixelHue += 2;
  }
  FastLED.show();
  FastLED.clear();
  // fadeToBlackBy(leds, NUM_LEDS, 32);
  startHue += 3;
  xPhase += 512;
}

void wu_pixel(uint16_t x, uint16_t y, CRGB* col) {
  // extract the fractional parts and derive their inverses
  uint8_t xx = x & 0xff, yy = y & 0xff, ix = 255 - xx, iy = 255 - yy;
  // calculate the intensities for each affected pixel
  #define WU_WEIGHT(a, b) ((uint8_t)(((a) * (b) + (a) + (b)) >> 8))
  uint8_t wu[4] = {WU_WEIGHT(ix, iy), WU_WEIGHT(xx, iy),
                   WU_WEIGHT(ix, yy), WU_WEIGHT(xx, yy)
                  };
  #undef WU_WEIGHT
  // multiply the intensities by the colour, and saturating-add them to the pixels
  for (uint8_t i = 0; i < 4; i++) {
    uint8_t local_x = (x >> 8) + (i & 1);
    uint8_t local_y = (y >> 8) + ((i >> 1) & 1);
    uint16_t xy = XY(local_x, local_y);
    if (xy > NUM_LEDS) continue;
    leds[xy].r = qadd8(leds[xy].r, col->r * wu[i] >> 8);
    leds[xy].g = qadd8(leds[xy].g, col->g * wu[i] >> 8);
    leds[xy].b = qadd8(leds[xy].b, col->b * wu[i] >> 8);
  }
}