#include <Arduino.h>
#include <avr/io.h>
#include <stdlib.h>

// Define constants and function prototypes
#define fe25519_add avrnacl_fe25519_add
#define fe25519_sub avrnacl_fe25519_sub
#define fe25519_red avrnacl_fe25519_red

typedef struct {unsigned char v[32];} fe25519;

extern "C"
{
  void fe25519_sub(fe25519 *r, const fe25519 *x, const fe25519 *y);
  void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y);
  void fe25519_red(fe25519 *r, unsigned char *C);
  char bigint_subp(unsigned char* r, const unsigned char* a);
  char bigint_square256(unsigned char* r, const unsigned char* a);
  char bigint_mul256(unsigned char* r, const unsigned char* a, const unsigned char* b);
  void bigint_mul121666(unsigned char *r, const unsigned char *x);
}

void fe25519_freeze(fe25519 *r);
void fe25519_unpack(fe25519 *r, const unsigned char x[32]);
void fe25519_pack(unsigned char r[32], const fe25519 *x);
void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b);
void fe25519_setone(fe25519 *r);
void fe25519_setzero(fe25519 *r);
void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y);
void fe25519_square(fe25519 *r, const fe25519 *x);
void fe25519_invert(fe25519 *r, const fe25519 *x);
void work_cswap(fe25519 *work, char b);
void mladder(fe25519 *xr, fe25519 *zr, const unsigned char s[32]);
void fe25519_mul121666(fe25519 *r, const fe25519 *x);

int crypto_scalarmult_curve25519(unsigned char *r, const unsigned char *s, const unsigned char *p);

static const fe25519 _121666 = {{0x42, 0xDB, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};

void setup()
{
  Serial.begin(9600);
  // n represents the scalar multiplication value.
  unsigned char n[32] = {0x05};
  // p represents the x-coordinate of the base point (or the generator).
  unsigned char p[32] = {0x09};
  // q represents the result that we want to get.
  unsigned char q[32];

  crypto_scalarmult_curve25519(q, n, p);

  Serial.print("Result: ");
  for (int i = 0; i < 32; i++)
  {
    Serial.print(q[i], HEX);
    Serial.print(" ");
  }
  Serial.println();
}

void loop()
{
  // Empty loop
}

//---------------------------------------Define your functions here---------------------------------------
int crypto_scalarmult_curve25519(unsigned char *r, const unsigned char *s, const unsigned char *p)
{
  unsigned char e[32];
  unsigned char i;
  for (i = 0; i < 32; i++)
  {
    e[i] = s[i];
  }
  Serial.println();

  // Printing the scalar multiplication array e Before the modification
  Serial.print("The Scalar multiplication value Before the modification (s): ");
  for (int k = 0; k < 32; k++)
  {
    Serial.print(s[k], HEX);
    Serial.print(" ");
  }
  Serial.println();

  // Printing the scalar multiplication array e after modification
  Serial.print("The Scalar multiplication value after modification (e): ");
  for (int k = 0; k < 32; k++)
  {
    Serial.print(e[k], HEX);
    Serial.print(" ");
  }
  Serial.println();

  fe25519 t;
  fe25519 z;
  fe25519_unpack(&t, p);
  mladder(&t, &z, e);
  fe25519_invert(&z, &z);
  fe25519_mul(&t, &t, &z);
  fe25519_pack(r, &t);
  return 0;
}

void fe25519_unpack(fe25519 *r, const unsigned char x[32])
{
  unsigned char i;
  for (i = 0; i < 32; i++)
  {
    r->v[i] = x[i];
  }
  r->v[31] &= 127;
}

void mladder(fe25519 *xr, fe25519 *zr, const unsigned char s[32])
{
  Serial.println();
  fe25519 work[5];
  unsigned char bit, prevbit = 0;
  unsigned char swap;
  signed char j = 6;
  signed char i;

  work[0] = *xr;

  //----------------- Print--------------------------------
  Serial.print("Intailization = work[0] = Xg = ");
  for (int i = 0; i < 32; i++)
  {
    Serial.print(work[0].v[i], HEX);
    Serial.print(" ");
  }
  Serial.println();
  //----------------- Print--------------------------------
  fe25519_setone(work + 1);
  Serial.print("Intailization = work+1 = X0 = ");
  for (int k = 0; k < 32; k++)
  {
    Serial.print((work + 1)->v[k], HEX);
    Serial.print(" ");
  }
  Serial.println();
  //--------------------------
  fe25519_setzero(work + 2);
  Serial.print("Intailization = work+2 = Z0 = ");
  for (int k = 0; k < 32; k++)
  {
    Serial.print((work + 2)->v[k], HEX);
    Serial.print(" ");
  }
  Serial.println();
  //--------------------------
  work[3] = *xr;
  //----------------- Print--------------------------------
  Serial.print("Intailization = work[3] = X1 = ");
  for (int i = 0; i < 32; i++)
  {
    Serial.print(work[3].v[i], HEX);
    Serial.print(" ");
  }
  Serial.println();
  //----------------- Print--------------------------------
  fe25519_setone(work + 4);
  Serial.print("Intailization = work+4 = Z1 = ");
  for (int k = 0; k < 32; k++)
  {
    Serial.print((work + 4)->v[k], HEX);
    Serial.print(" ");
  }
  Serial.println();
  //--------------------------

  for (i = 31; i >= 0; i--)
  {
    while (j >= 0)
    {
      Serial.print(" ============ Inside for loop =================> Iteration: ");
      Serial.print(" i = ");
      Serial.print(i);
      Serial.print(", j = ");
      Serial.println(j);
      //---------------------
      bit = 1 & (s[i] >> j);
      swap = bit ^ prevbit;
      prevbit = bit;
      //--------------------
      Serial.print("bit = 1 & (s[i] >> j) = ");
      Serial.println(bit, HEX);
      Serial.print("swap = bit ^ prevbit = ");
      Serial.println(swap, HEX);
      Serial.print("prevbit = bit = ");
      Serial.println(prevbit, HEX);

      Serial.println(" //------------------------------//------------------------------// ");
      // Print values of work before work_cswap
      Serial.println("Before work_cswap - work: ");
      for (int k = 0; k < 5; k++)
      {
        Serial.print("work[");
        Serial.print(k);
        Serial.print("]: ");
        for (int l = 0; l < 32; l++)
        {
          Serial.print(work[k].v[l], HEX);
          Serial.print(" ");
        }
        Serial.println();
      }

      // Print value of swap before work_cswap
      Serial.print("Before work_cswap - swap: ");
      Serial.println(swap, HEX);
      //****************************************************************************************************************
      work_cswap(work, swap);
      //****************************************************************************************************************
      Serial.println("After work_cswap - work: ");
      for (int k = 0; k < 5; k++)
      {
        Serial.print("work[");
        Serial.print(k);
        Serial.print("]: ");
        for (int l = 0; l < 32; l++)
        {
          Serial.print(work[k].v[l], HEX);
          Serial.print(" ");
        }
        Serial.println();
      }

      Serial.println(" //------------------------------//------------------------------// ");

      // Print values of work before ladderstep
      Serial.println("Before ladderstep - work:");
      for (int k = 0; k < 5; k++)
      {
        Serial.print("work[");
        Serial.print(k);
        Serial.print("]: ");
        for (int l = 0; l < 32; l++)
        {
          Serial.print(work[k].v[l], HEX);
          Serial.print(" ");
        }
        Serial.println();
      }
      //****************************************************************************************************************
      ladderstep(work);
      //****************************************************************************************************************
      Serial.println("After ladderstep - work:");
      for (int k = 0; k < 5; k++)
      {
        Serial.print("work[");
        Serial.print(k);
        Serial.print("]: ");
        for (int l = 0; l < 32; l++)
        {
          Serial.print(work[k].v[l], HEX);
          Serial.print(" ");
        }
        Serial.println();
      }
      //------------------------------//------------------------------//------------------------------//------------------------------
      Serial.print(" ============ ============ ============ ============ ============ ============ Inside for loop =================> End of Iteration: ");
      Serial.print(" i = ");
      Serial.print(i);
      Serial.print(", j = ");
      Serial.println(j);
      Serial.println();
      j -= 1;
    }
    j = 7;
  }
  *xr = work[1];
  *zr = work[2];
}

void fe25519_setone(fe25519 *r)
{
  unsigned char i;
  r->v[0] = 1;
  for (i = 1; i < 32; i++)
  {
    r->v[i] = 0;
  }
}

void fe25519_setzero(fe25519 *r)
{
  unsigned char i;
  for (i = 0; i < 32; i++)
  {
    r->v[i] = 0;
  }
}

// Implement work_cswap and ladderstep functions here
void work_cswap(fe25519 *work, char b)
{
  fe25519 t;
  fe25519_setzero(&t);
  fe25519_cmov(&t, work + 1, b);
  fe25519_cmov(work + 1, work + 3, b);
  fe25519_cmov(work + 3, &t, b);
  fe25519_cmov(&t, work + 2, b);
  fe25519_cmov(work + 2, work + 4, b);
  fe25519_cmov(work + 4, &t, b);
}

void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b)
{
  unsigned char i;
  unsigned long mask = b;
  mask = -mask;
  for (i = 0; i < 32; i++)
  {
    r->v[i] ^= mask & (x->v[i] ^ r->v[i]);
  }
}

void fe25519_pack(unsigned char r[32], const fe25519 *x)
{
  unsigned char i;
  fe25519 y = *x;
  fe25519_freeze(&y);
  for (i = 0; i < 32; i++)
  {
    r[i] = y.v[i];
  }
}

/* reduction modulo 2^255-19 */
void fe25519_freeze(fe25519 *r)
{
  unsigned char c;
  fe25519 rt;
  c = bigint_subp(rt.v, r->v);
  fe25519_cmov(r, &rt, 1 - c);
  c = bigint_subp(rt.v, r->v);
  fe25519_cmov(r, &rt, 1 - c);
}

void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y)
{
  print_fe25519("Value of x", x);
  print_fe25519("Value of y", y);
  volatile unsigned char t[64] = {0};

  Serial.print("Memory contents of before multiplication: ");
  for (int i = 0; i < 64; i++)
  {
    Serial.print(t[i], HEX);
    Serial.print(" ");
  }
  Serial.println();
  //delay(20);
  bigint_mul256(t, x->v, y->v);
  //delay(20);
  print_fe25519_64("Intermidiate multiplication 64 result", t);
  fe25519_red(r, t);
  print_fe25519_64("Intermidiate multiplication 64 result, after reduction: ", t);

  fe25519_freeze(r);  // Apply freeze if needed
  print_fe25519("Intermidiate multiplication 32 result, After freeze: ", r);
}

void fe25519_square(fe25519 *r, const fe25519 *x)
{
  unsigned char t[64] = {0};
  bigint_square256(t, x->v);
  print_fe25519_64("square intermidate Result: ", t);
  fe25519_red(r, t);
  print_fe25519_64("square intermidate Result, after reduction: ", t);

  fe25519_freeze(r);  // Apply freeze if needed
  print_fe25519("After freeze: ", r);
}

void ladderstep(fe25519 *work)
{
  fe25519 t[2];
  fe25519 *t1 = &t[0];
  fe25519 *t2 = &t[1];

  // Initialize t1 and t2 to zero
  fe25519_setzero(t1);
  fe25519_setzero(t2);

  fe25519 *x0 = work;
  fe25519 *xp = work + 1;
  fe25519 *zp = work + 2;
  fe25519 *xq = work + 3;
  fe25519 *zq = work + 4;

  // Print initial values
  Serial.println(" ");
  Serial.println(" =======> Inside the (ladderstep) function, Initial values:");
  print_fe25519("t1", t1);
  print_fe25519("t2", t2);
  print_fe25519("x0", x0);
  print_fe25519("xp", xp);
  print_fe25519("zp", zp);
  print_fe25519("xq", xq);
  print_fe25519("zq", zq);

  // Before and after fe25519_add(t1, xq, zq);
  Serial.println(" 1. ------------------------------------------------> Before fe25519_add(t1, xq, zq):");
  print_fe25519("t1", t1);
  print_fe25519("xq", xq);
  print_fe25519("zq", zq);

  fe25519_add(t1, xq, zq);
  Serial.println("After fe25519_add(t1, xq, zq) and before fe25519_freeze : ");
  print_fe25519("t1", t1);

  fe25519_freeze(t1);  // Apply freeze if needed
  Serial.println("After freeze:");
  print_fe25519("t1", t1);

  // Before and after fe25519_sub(xq, xq, zq);
  Serial.println(" 2. ------------------------------------------------> Before fe25519_sub(xq, xq, zq):");
  print_fe25519("xq", xq);
  print_fe25519("zq", zq);

  fe25519_sub(xq, xq, zq);

  Serial.println("After fe25519_sub(xq, xq, zq):");
  print_fe25519("xq", xq);

  fe25519_freeze(xq);  // Apply freeze if needed
  Serial.println("After freeze:");
  print_fe25519("xq", xq);

  // Before and after fe25519_add(zq, xp, zp);
  Serial.println(" 3. ------------------------------------------------> Before fe25519_add(zq, xp, zp):");
  print_fe25519("xp", xp);
  print_fe25519("zp", zp);
  print_fe25519("The result zq", zq);

  fe25519_add(zq, xp, zp);

  Serial.println("After fe25519_add(zq, xp, zp):");
  print_fe25519("zq", zq);

  // Before and after fe25519_sub(xp, xp, zp);
  Serial.println(" 4. ------------------------------------------------> Before fe25519_sub(xp, xp, zp):");
  print_fe25519("xp", xp);
  print_fe25519("zp", zp);

  fe25519_sub(xp, xp, zp);

  Serial.println("After fe25519_sub(xp, xp, zp):");
  print_fe25519("xp", xp);

  // Before and after fe25519_mul(t1, t1, xp);
  Serial.println(" 5. ------------------------------------------------> Before fe25519_mul(t1, t1, xp):");
  print_fe25519("xp", xp);
  print_fe25519("t1", t1);
  //delay(20);
  fe25519_mul(t1, t1, xp);
  //delay(20);
  Serial.println("After fe25519_mul(t1, t1, xp):");
  print_fe25519("t1", t1);

  // Before and after fe25519_mul(xq, xq, zq);
  Serial.println(" 6. ------------------------------------------------> Before fe25519_mul(xq, xq, zq):");
  print_fe25519("xq", xq);
  print_fe25519("zq", zq);
  //delay(20);
  fe25519_mul(xq, xq, zq);
  //delay(20);
  Serial.println("After fe25519_mul(xq, xq, zq):");
  print_fe25519("xq", xq);

  fe25519_freeze(xq);  // Apply freeze if needed
  Serial.println("After freeze:");
  print_fe25519("xq", xq);

  // Before and after fe25519_square(zq, zq);
  Serial.println(" 7. ------------------------------------------------> Before fe25519_square(zq, zq):");
  print_fe25519("zq", zq);

  fe25519_square(zq, zq);

  Serial.println("After fe25519_square(zq, zq):");
  print_fe25519("zq", zq);

  // Before and after fe25519_square(xp, xp);
  Serial.println(" 8. ------------------------------------------------> Before fe25519_square(xp, xp):");
  print_fe25519("xp", xp);

  fe25519_square(xp, xp);

  Serial.println("After fe25519_square(xp, xp):");
  print_fe25519("xp", xp);

  // Before and after fe25519_sub(t2, zq, xp);
  Serial.println(" 9. ------------------------------------------------> Before fe25519_sub(t2, zq, xp):");
  print_fe25519("t2", t2);
  print_fe25519("zq", zq);
  print_fe25519("xp", xp);

  fe25519_sub(t2, zq, xp);

  Serial.println("After fe25519_sub(t2, zq, xp):");
  print_fe25519("t2", t2);

  // Before and after fe25519_mul121666(zp, t2);
  Serial.println(" 10. ------------------------------------------------> Before fe25519_mul121666(zp, t2):");
  print_fe25519("t2", t2);
  print_fe25519("zp", zp);

  fe25519_mul121666(zp, t2);

  Serial.println("After fe25519_mul121666(zp, t2):");
  print_fe25519("zp", zp);

  // Before and after fe25519_add(zp, zp, xp);
  Serial.println(" 11. ------------------------------------------------> Before fe25519_add(zp, zp, xp):");
  print_fe25519("zp", zp);
  print_fe25519("xp", xp);

  fe25519_add(zp, zp, xp);

  Serial.println("After fe25519_add(zp, zp, xp):");
  print_fe25519("zp", zp);

  // Before and after fe25519_mul(zp, t2, zp);
  Serial.println(" 12. ------------------------------------------------> Before fe25519_mul(zp, t2, zp):");
  print_fe25519("t2", t2);
  print_fe25519("zp", zp);
  //delay(20);
  fe25519_mul(zp, zp, t2);
  //delay(20);
  Serial.println("After fe25519_mul(zp, t2, zp):");
  print_fe25519("zp", zp);

  // Before and after fe25519_mul(xp, zq, xp);
  Serial.println(" 13. ------------------------------------------------> Before fe25519_mul(xp, zq, xp):");
  print_fe25519("zq", zq);
  print_fe25519("xp", xp);
  //delay(20);
  fe25519_mul(xp, zq, xp);
  //delay(20);
  Serial.println("After fe25519_mul(xp, zq, xp):");
  print_fe25519("xp", xp);

  // Before and after fe25519_sub(zq, t1, xq);
  Serial.println(" 14. ------------------------------------------------> Before fe25519_sub(zq, t1, xq):");
  print_fe25519("t1", t1);
  print_fe25519("xq", xq);
  print_fe25519("zq", zq);

  fe25519_sub(zq, xq, t1);

  Serial.println("After fe25519_sub(zq, t1, xq):");
  print_fe25519("zq", zq);

  fe25519_red(zq, zq->v);

  Serial.println("After reduction zq by calling fe25519_sub(zq, t1, xq):");
  print_fe25519("zq", zq);

  fe25519_freeze(zq);  // Apply freeze if needed
  Serial.println("After freeze:");
  print_fe25519("zq", zq);

  // Before and after fe25519_square(zq, zq);
  Serial.println(" 15. ------------------------------------------------> Before fe25519_square(zq, zq):");
  print_fe25519("zq", zq);

  fe25519_square(zq, zq);

  Serial.println("After fe25519_square(zq, zq):");
  print_fe25519("zq", zq);

  fe25519_freeze(zq);  // Apply freeze if needed
  Serial.println("After freeze:");
  print_fe25519("zq", zq);

  // Before and after fe25519_mul(zq, zq, x0);
  Serial.println(" 16. ------------------------------------------------> Before fe25519_mul(zq, zq, x0):");
  print_fe25519("x0", x0);
  print_fe25519("zq", zq);
  delay(100);
  fe25519_mul(zq, zq, x0);

  Serial.println("After fe25519_mul(zq, zq, x0):");
  print_fe25519("zq", zq);

  // Before and after fe25519_add(xq, t1, xq);
  Serial.println(" 17. ------------------------------------------------> Before fe25519_add(xq, t1, xq):");
  print_fe25519("xq", xq);
  print_fe25519("t1", t1);

  fe25519_add(xq, t1, xq);

  Serial.println("After fe25519_add(xq, t1, xq):");
  print_fe25519("xq", xq);

  // Before and after fe25519_square(xq, xq);
  Serial.println(" 18. ------------------------------------------------> Before fe25519_square(xq, xq):");
  print_fe25519("xq", xq);

  fe25519_square(xq, xq);
  Serial.println("After fe25519_square(xq, xq):");
  print_fe25519("xq", xq);

  fe25519_freeze(xq);  // Apply freeze if needed
  Serial.println("After freeze:");
  print_fe25519("xq", xq);

  Serial.println(" =======> End of the (ladderstep) function.");
}

void print_fe25519(const char* name, const fe25519 *f)
{
  Serial.print(name);
  Serial.print(": ");
  for (int i = 0; i < 32; i++)
  {
    Serial.print(f->v[i], HEX);
    Serial.print(" ");
  }
  Serial.println();
}

void print_fe25519_64(const char* name, const unsigned char *f)
{
  Serial.print(name);
  Serial.print(": ");
  for (int i = 0; i < 64; i++)
  {
    Serial.print(*(f + i), HEX);
    Serial.print(" ");
  }
  Serial.println();
}

void fe25519_mul121666(fe25519 *r, const fe25519 *x)
{
  unsigned char t[64];

  bigint_mul256(t, x->v, _121666.v);
  fe25519_red(r, t);
}

void fe25519_invert(fe25519 *r, const fe25519 *x)
{
  fe25519 z2;
  fe25519 z11;
  fe25519 z2_10_0;
  fe25519 z2_50_0;
  fe25519 z2_100_0;
  fe25519 t0;
  fe25519 t1;
  unsigned char i;

  /* 2 */ fe25519_square(&z2, x);
  /* 4 */ fe25519_square(&t1, &z2);
  /* 8 */ fe25519_square(&t0, &t1);
  /* 9 */ fe25519_mul(&z2_10_0, &t0, x);
  /* 11 */ fe25519_mul(&z11, &z2_10_0, &z2);
  /* 22 */ fe25519_square(&t0, &z11);
  /* 2^5 - 2^0 = 31 */ fe25519_mul(&z2_10_0, &t0, &z2_10_0);

  /* 2^6 - 2^1 */ fe25519_square(&t0, &z2_10_0);
  /* 2^7 - 2^2 */ fe25519_square(&t1, &t0);
  /* 2^8 - 2^3 */ fe25519_square(&t0, &t1);
  /* 2^9 - 2^4 */ fe25519_square(&t1, &t0);
  /* 2^10 - 2^5 */ fe25519_square(&t0, &t1);
  /* 2^10 - 2^0 */ fe25519_mul(&z2_10_0, &t0, &z2_10_0);

  /* 2^11 - 2^1 */ fe25519_square(&t0, &z2_10_0);
  /* 2^12 - 2^2 */ fe25519_square(&t1, &t0);
  /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) {
    fe25519_square(&t0, &t1);
    fe25519_square(&t1, &t0);
  }
  /* 2^20 - 2^0 */ fe25519_mul(&z2_50_0, &t1, &z2_10_0);

  /* 2^21 - 2^1 */ fe25519_square(&t0, &z2_50_0);
  /* 2^22 - 2^2 */ fe25519_square(&t1, &t0);
  /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) {
    fe25519_square(&t0, &t1);
    fe25519_square(&t1, &t0);
  }
  /* 2^40 - 2^0 */ fe25519_mul(&t0, &t1, &z2_50_0);

  /* 2^41 - 2^1 */ fe25519_square(&t1, &t0);
  /* 2^42 - 2^2 */ fe25519_square(&t0, &t1);
  /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) {
    fe25519_square(&t1, &t0);
    fe25519_square(&t0, &t1);
  }
  /* 2^50 - 2^0 */ fe25519_mul(&z2_50_0, &t0, &z2_10_0);

  /* 2^51 - 2^1 */ fe25519_square(&t0, &z2_50_0);
  /* 2^52 - 2^2 */ fe25519_square(&t1, &t0);
  /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) {
    fe25519_square(&t0, &t1);
    fe25519_square(&t1, &t0);
  }
  /* 2^100 - 2^0 */ fe25519_mul(&z2_100_0, &t1, &z2_50_0);

  /* 2^101 - 2^1 */ fe25519_square(&t1, &z2_100_0);
  /* 2^102 - 2^2 */ fe25519_square(&t0, &t1);
  /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) {
    fe25519_square(&t1, &t0);
    fe25519_square(&t0, &t1);
  }
  /* 2^200 - 2^0 */ fe25519_mul(&t1, &t0, &z2_100_0);

  /* 2^201 - 2^1 */ fe25519_square(&t0, &t1);
  /* 2^202 - 2^2 */ fe25519_square(&t1, &t0);
  /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) {
    fe25519_square(&t0, &t1);
    fe25519_square(&t1, &t0);
  }
  /* 2^250 - 2^0 */ fe25519_mul(&t0, &t1, &z2_50_0);

  /* 2^251 - 2^1 */ fe25519_square(&t1, &t0);
  /* 2^252 - 2^2 */ fe25519_square(&t0, &t1);
  /* 2^253 - 2^3 */ fe25519_square(&t1, &t0);
  /* 2^254 - 2^4 */ fe25519_square(&t0, &t1);
  /* 2^255 - 2^5 */ fe25519_square(&t1, &t0);
  /* 2^255 - 21 */ fe25519_mul(r, &t1, &z11);
}