#import<FastLED.h>

#define NUM_COLS 16
#define NUM_ROWS 16
#define NUM_LEDS 256 
#define HEIGHT NUM_COLS
#define WIDTH NUM_ROWS

CRGB leds [256+1];

void setup() {
  Serial.begin(115200);
  FastLED.setCorrection(UncorrectedColor);
  FastLED.setTemperature(UncorrectedTemperature);
  LEDS.addLeds<WS2812,3,GRB>(leds,256);
}

void loop() {
  sup(leds,16,HeatColors_p, LINEARBLEND);
  FastLED.show();
}


void sup(CRGB *buffer, uint8_t lineWidth, CRGBPalette16 palette, TBlendType blendType) {
  uint32_t yHueDelta ;
  uint32_t xHueDelta ;
  static uint32_t lastMillis = 16383; // int16_t 32767/2  (for rotationspeed=0 test)
  float rotationSpeedFloat = 0.0; // -3.0 to 3.0 is ok; 0 for only translation
  int16_t mappedTranslationSpeed = map(beatsin88(256*5),0,65536,-2<<10,2<<10); // Between -2^13 - 2^13 (higher is too fast)

  uint32_t ms = millis();

  if( rotationSpeedFloat != 0 ) {
    yHueDelta = (int32_t)sin16((int16_t)round(ms * rotationSpeedFloat)) * lineWidth;
    xHueDelta = (int32_t)cos16((int16_t)round(ms * rotationSpeedFloat)) * lineWidth;
    // lastMillis = ms;
  } else {
    yHueDelta = (int32_t)sin16(lastMillis) * lineWidth;
    xHueDelta = (int32_t)cos16(lastMillis) * lineWidth;
  }
  int32_t startHue = ms * mappedTranslationSpeed;
  int32_t lineStartHue = startHue - (HEIGHT + 2) / 2 * yHueDelta;
  for (byte y = 0; y < HEIGHT; y++) {
    uint32_t pixelHue = lineStartHue - (WIDTH + 2) / 2 * xHueDelta;
    uint32_t xhd = xHueDelta;
    lineStartHue += yHueDelta;
    for (byte x = 0; x < WIDTH; x++) {
      buffer[XY(x,y)] = ColorFromPaletteExtended(palette, pixelHue >> 7, 255, blendType);
      pixelHue += xHueDelta;
    }
  }
}

// from: https://github.com/FastLED/FastLED/pull/202
CRGB ColorFromPaletteExtended(const CRGBPalette16& pal, uint16_t index, uint8_t brightness, TBlendType blendType) {
  // Extract the four most significant bits of the index as a palette index.
  uint8_t index_4bit = (index >> 12);
  // Calculate the 8-bit offset from the palette index.
  uint8_t offset = (uint8_t)(index >> 4);
  // Get the palette entry from the 4-bit index
  const CRGB* entry = &(pal[0]) + index_4bit;
  uint8_t red1   = entry->red;
  uint8_t green1 = entry->green;
  uint8_t blue1  = entry->blue;

  uint8_t blend = offset && (blendType != NOBLEND);
  if (blend) {
    if (index_4bit == 15) {
      entry = &(pal[0]);
    } else {
      entry++;
    }

    // Calculate the scaling factor and scaled values for the lower palette value.
    uint8_t f1 = 255 - offset;
    red1   = scale8_LEAVING_R1_DIRTY(red1,   f1);
    green1 = scale8_LEAVING_R1_DIRTY(green1, f1);
    blue1  = scale8_LEAVING_R1_DIRTY(blue1,  f1);

    // Calculate the scaled values for the neighbouring palette value.
    uint8_t red2   = entry->red;
    uint8_t green2 = entry->green;
    uint8_t blue2  = entry->blue;
    red2   = scale8_LEAVING_R1_DIRTY(red2,   offset);
    green2 = scale8_LEAVING_R1_DIRTY(green2, offset);
    blue2  = scale8_LEAVING_R1_DIRTY(blue2,  offset);
    cleanup_R1();

    // These sums can't overflow, so no qadd8 needed.
    red1   += red2;
    green1 += green2;
    blue1  += blue2;
  }
  if (brightness != 255) {
    // nscale8x3_video(red1, green1, blue1, brightness);
    nscale8x3(red1, green1, blue1, brightness);
  }
  return CRGB(red1, green1, blue1);
}


uint16_t XY(uint8_t x, uint8_t y) {
  if (x >= WIDTH) return NUM_LEDS;
  if (y >= HEIGHT) return NUM_LEDS;
  return y * WIDTH + x;
}