#include <FastLED.h>

#define kMatrixWidth 16
#define kMatrixHeight 16
#define NUM_LEDS ((kMatrixWidth) * (kMatrixHeight))

CRGB leds[NUM_LEDS];

uint16_t XY(int8_t x, int8_t y) {
  if (x >= kMatrixWidth || y >= kMatrixHeight || x < 0 || y < 0)
    return -1;
  if (y & 1)
    x = kMatrixWidth - 1 - x;
  return x + (y * kMatrixWidth);
}

void setup() {
  FastLED.addLeds<WS2812B, 3, GRB>(leds, NUM_LEDS);
}

void loop() {
  static uint16_t startHue = 0;
  static uint16_t xPhase = 0;
  static int16_t xPhaseMul = 2 * 256;
  static int16_t yPhaseMul = 256;
  static int8_t yPhaseMulStep = 3;
  static int8_t xPhaseMulStep = 2;

  startHue += 512;
  xPhase += 512;

  yPhaseMul += yPhaseMulStep;
  if (yPhaseMul <= 96)
    yPhaseMulStep = random8(4) + 1;
  if (yPhaseMul >= 8 * 128)
    yPhaseMulStep = -random8(4) - 1;

  xPhaseMul += xPhaseMulStep;
  if (xPhaseMul <= 96)
    xPhaseMulStep = random8(4) + 1;
  if (xPhaseMul >= 8 * 128)
    xPhaseMulStep = -random8(4) - 1;

  uint16_t pixelHue = startHue;
  for (uint16_t i = 0; i < 384; i++) {
    uint16_t x = 32767 + cos16(xPhase + i * xPhaseMul);
    uint16_t y = 32767 + sin16(i * yPhaseMul);
    x /= 256 / (kMatrixWidth - 1);
    y /= 256 / (kMatrixHeight - 1);
    CRGB col = ColorFromPalette(RainbowStripeColors_p, pixelHue >> 8, 255, LINEARBLEND);
    wu_pixel(x, y, col);
    pixelHue += 128;
  }
  FastLED.show();
  FastLED.clear();
}

void wu_pixel(uint16_t x, uint16_t y, CRGB &col) {
  // extract the fractional parts and derive their inverses
  uint8_t xx = x & 0xff, yy = y & 0xff, ix = 255 - xx, iy = 255 - yy;
  // calculate the intensities for each affected pixel
  #define WU_WEIGHT(a, b) ((uint8_t)(((a) * (b) + (a) + (b)) >> 8))
  uint8_t wu[4] = {WU_WEIGHT(ix, iy), WU_WEIGHT(xx, iy),
                   WU_WEIGHT(ix, yy), WU_WEIGHT(xx, yy)
                  };
  #undef WU_WEIGHT
  // multiply the intensities by the colour, and saturating-add them to the pixels
  for (uint8_t i = 0; i < 4; i++) {
    uint8_t local_x = (x >> 8) + (i & 1);
    uint8_t local_y = (y >> 8) + ((i >> 1) & 1);
    uint16_t xy = XY(local_x, local_y);
    if (xy >= NUM_LEDS) continue;
    leds[xy] += col % wu[i];
  }
}