    .program tcd1304_pio

  ; MCLK is pin 2
  ; ICG is pin 1
  ; SH is pin 0

  ; Y is the counter for timers

  set pins, 0b10 [19]  ; ICG high
    wait 1 gpio 17
    set pins, 0b00

    set pins, 0b01 [7]  ; SH high

    set pins, 0b00 [11]

    set pins, 0b10
    set x, 31
    ; waits total 33312 cycles
        set y, 31
        ; takes 1024 cycles
            jmp y--, inner_loop [31]
        jmp x--, outer_loop [15]

  % c-sdk {
  static inline void tcd1304_pio_init(PIO pio, uint sm, uint offset, uint pin) {
    const int PIN_COUNT = 2;
    const int PIN_MASK = 0b11;
    // the clock divider (number of CPU cycles that each PIO cycle takes)
    float div = 66.5f;

    // initialize the pins to high indicating the stop bit
    pio_sm_set_set_pins(pio, sm, pin, PIN_COUNT);
    pio_sm_set_pindirs_with_mask(pio, sm, 0xFFFFFFFF, PIN_MASK << pin);
    pio_sm_set_pins_with_mask(pio, sm, 0xFFFFFFFF, PIN_MASK << pin);
    for (int i = 0; i < PIN_COUNT; i++) {
        pio_gpio_init(pio, pin + i);

    pio_sm_config c = tcd1304_pio_program_get_default_config(offset);
    sm_config_set_out_shift(&c, true, false, 2); // shift to right, no autopull
    sm_config_set_out_pins(&c, pin, PIN_COUNT); // Set 8 pins starting at `pin`
    sm_config_set_set_pins(&c, pin, PIN_COUNT); // Set 8 pins starting at `pin`
    sm_config_set_clkdiv(&c, div);
    pio_sm_init(pio, sm, offset, &c); // Load config
    pio_sm_set_enabled(pio, sm, true); // Start state machine


  .program tcd1304_pio_mclk

  ; MCLK is pin 2
  ; ICG is pin 1
  ; SH is pin 0

  ; Y is the counter for timers

    set pins, 0
    set pins, 1

  % c-sdk {
  static inline void tcd1304_pio_mclk_init(PIO pio, uint sm, uint offset, uint pin) {
    const int PIN_COUNT = 1;
    const int PIN_MASK = 0b11;
    // the clock divider (number of CPU cycles that each PIO cycle takes)
    float div = 33.25f;

    // initialize the pins to high indicating the stop bit
    pio_sm_set_set_pins(pio, sm, pin, PIN_COUNT);
    pio_sm_set_pindirs_with_mask(pio, sm, 0xFFFFFFFF, PIN_MASK << pin);
    pio_sm_set_pins_with_mask(pio, sm, 0xFFFFFFFF, PIN_MASK << pin);
    for (int i = 0; i < PIN_COUNT; i++) {
        pio_gpio_init(pio, pin + i);

    pio_sm_config c = tcd1304_pio_mclk_program_get_default_config(offset);
    sm_config_set_out_shift(&c, true, false, 1); // shift to right, no autopull
    sm_config_set_out_pins(&c, pin, PIN_COUNT); // Set 8 pins starting at `pin`
    sm_config_set_set_pins(&c, pin, PIN_COUNT); // Set 8 pins starting at `pin`
    sm_config_set_clkdiv(&c, div);
    pio_sm_init(pio, sm, offset, &c); // Load config
    pio_sm_set_enabled(pio, sm, true); // Start state machine
#include <stdio.h>
#include "pico/stdlib.h"

#include "hardware/clocks.h"

// For ADC input:
#include "hardware/adc.h"
#include "hardware/dma.h"

#include "segment.pio.h"
#include "mclk.pio.h"

#define PWM_CLK 20000000.0f
#define SYS_CLK 133000000
#define BaseOutPin 0

// define ml io num
#define NUM_INPUT 200 * 175
#define NUM_OUTPUT 27
#define TENSOR_ARENA_SIZE 100352

#define MCLK_PIN 17
#define SH_PIN 15

/////////////////////////// ADC configuration ////////////////////////////////
// ADC Channel and pin
#define ADC_CHAN 0
#define ADC_PIN 26
// Number of samples per read
#define NUM_SAMPLES 3694

// DMA channels for sampling ADC (VGA driver uses 0 and 1)
int sample_chan = 2;
int control_chan = 3;

// Here's where we'll have the DMA channel put ADC samples
uint8_t sample_array[NUM_SAMPLES];

uint8_t * sample_address_pointer = &sample_array[0] ;

float input_array[200 * 175];

uint8_t round_cnt = 0;  // maximum 200

  MCLK is pin 2
  ICG is pin 1
  SH is pin 0

PIO pio = pio0;
uint smOut = 0;

void init_input_array() {
  memset(input_array, -1.0f, 200 * 175 * sizeof(float));

void setup() {
  // Serial.begin(115200);

  gpio_set_dir(18, GPIO_IN);
  gpio_set_dir(19, GPIO_IN);

  Serial1.println("somthing is ok");
  // ============================== ADC CONFIGURATION ==========================
  // Init GPIO for analogue use: hi-Z, no pulls, disable digital input buffer.

  // Initialize the ADC harware
  // (resets it, enables the clock, spins until the hardware is ready)

  // Select analog mux input (0...3 are GPIO 26, 27, 28, 29; 4 is temp sensor)

  // Setup the FIFO
    true,   // Write each completed conversion to the sample FIFO
    true,   // Enable DMA data request (DREQ)
    1,      // DREQ (and IRQ) asserted when at least 1 sample present
    false,  // We won't see the ERR bit because of 8 bit reads; disable.
    true    // Shift each sample to 8 bits when pushing to FIFO

  // Divisor of 0 -> full speed. Free-running capture with the divider is
  // equivalent to pressing the ADC_CS_START_ONCE button once per `div + 1`
  // cycles (div not necessarily an integer). Each conversion takes 96
  // cycles, so in general you want a divider of 0 (hold down the button
  // continuously) or > 95 (take samples less frequently than 96 cycle
  // intervals). This is all timed by the 48 MHz ADC clock. This is setup
  // to grab a sample at 10kHz (48Mhz/10kHz - 1)
  // ============================== ADC DMA CONFIGURATION =========================

  // Channel configurations
  dma_channel_config c2 = dma_channel_get_default_config(sample_chan);
  dma_channel_config c3 = dma_channel_get_default_config(control_chan);

  // Reading from constant address, writing to incrementing byte addresses
  channel_config_set_transfer_data_size(&c2, DMA_SIZE_8);
  channel_config_set_read_increment(&c2, false);
  channel_config_set_write_increment(&c2, true);
  // Pace transfers based on availability of ADC samples
  channel_config_set_dreq(&c2, DREQ_ADC);
  // Configure the channel
                        &c2,            // channel config
                        sample_array,   // dst
                        &adc_hw->fifo,  // src
                        NUM_SAMPLES,    // transfer count
                        false           // don't start immediately

  channel_config_set_transfer_data_size(&c3, DMA_SIZE_32);  // 32-bit txfers
  channel_config_set_read_increment(&c3, false);            // no read incrementing
  channel_config_set_write_increment(&c3, false);           // no write incrementing
  channel_config_set_chain_to(&c3, sample_chan);            // chain to sample chan

    control_chan,                         // Channel to be configured
    &c3,                                  // The configuration we just created
    &dma_hw->ch[sample_chan].write_addr,  // Write address (channel 0 read address)
    &sample_address_pointer,              // Read address (POINTER TO AN ADDRESS)
    1,                                    // Number of transfers, in this case each is 4 byte
    false                                 // Don't start immediately.

  // model loading
  // ml.begin(model_data);

  set_sys_clock_khz(133000, true);
  uint offset_sh_icg = pio_add_program(pio, &tcd1304_pio_program);
  uint offset_mclk = pio_add_program(pio, &tcd1304_pio_mclk_program);
  tcd1304_pio_init(pio, 0, offset_sh_icg, SH_PIN);     // pio, sm_id, program, base_pin
  tcd1304_pio_mclk_init(pio, 1, offset_mclk, MCLK_PIN);

void loop() {
  // block until ICG low
  while (gpio_get(18)) {
    __asm__ volatile("nop");
  // block until ICG high
  while (!gpio_get(18)) {
    __asm__ volatile("nop");
  // block until MCLK low
  while (gpio_get(19)) {
    __asm__ volatile("nop");
  // block until MCLK high
  while (!gpio_get(19)) {
    __asm__ volatile("nop");

  // start sampling
  // stop sampling and restart the sample channel
  // using only 3500 of samples, from 28 to 3527
  for (int i = 0; i < 3500; i++) {
    input_array[round_cnt * 175 + i / 20] += sample_array[i + 28];
  for (int i = 0; i < 175; i++) {
    input_array[round_cnt * 175 + i] /= 20 * 255;
  for (int i = 0; i < 3694; i++) {

  round_cnt += 1;
  if (round_cnt >= 199) {
      for (int i = 0; i < 27; i++){
      Serial.print(" ");
    round_cnt = 0;