#include <Arduino.h>
#include <avr/io.h>
#include <stdlib.h>

// Define constants and function prototypes
#define fe25519_add avrnacl_fe25519_add
#define fe25519_sub avrnacl_fe25519_sub
#define fe25519_red avrnacl_fe25519_red

typedef struct {unsigned char v[32];} fe25519;

extern "C"
{
  void fe25519_sub(fe25519 *r, const fe25519 *x, const fe25519 *y);
  void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y);
  void fe25519_red(fe25519 *r, unsigned char *C);
  char bigint_subp(unsigned char* r, const unsigned char* a);
  char bigint_square256(unsigned char* r, const unsigned char* a);
  char bigint_mul256(unsigned char* r, const unsigned char* a, const unsigned char* b);
  void bigint_mul121666(unsigned char *r, const unsigned char *x);
}

void fe25519_freeze(fe25519 *r);
void fe25519_unpack(fe25519 *r, const unsigned char x[32]);
void fe25519_pack(unsigned char r[32], const fe25519 *x);
void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b);
void fe25519_setone(fe25519 *r);
void fe25519_setzero(fe25519 *r);
void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y);
void fe25519_square(fe25519 *r, const fe25519 *x);
void fe25519_invert(fe25519 *r, const fe25519 *x);
void work_cswap(fe25519 *work, char b);
void mladder(fe25519 *xr, fe25519 *zr, const unsigned char s[32]);
void fe25519_mul121666(fe25519 *r, const fe25519 *x);

int crypto_scalarmult_curve25519(unsigned char *r, const unsigned char *s, const unsigned char *p);

static const fe25519 _121666 = {{0x42, 0xDB, 0x01, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}};

void setup()
{
  Serial.begin(9600);
  // n represents the scalar multiplication value.
  unsigned char n[32] = {0x05};
  // p represents the x-coordinate of the base point (or the generator).
  unsigned char p[32] = {0x09};
  // q represents the result that we want to get.
  unsigned char q[32];
    
  crypto_scalarmult_curve25519(q, n, p);
  
  Serial.print("Result: ");
  for (int i = 0; i < 32; i++)
  {
    Serial.print(q[i], HEX);
    Serial.print(" ");
  }
  Serial.println();
}

void loop()
{
  // Empty loop
}

//---------------------------------------Define your functions here---------------------------------------
int crypto_scalarmult_curve25519(unsigned char *r, const unsigned char *s, const unsigned char *p)
{
  unsigned char e[32];
  unsigned char i;
  for (i = 0; i < 32; i++)
  {
    e[i] = s[i];
  }
  Serial.println();

  // Printing the scalar multiplication array e Before the modification
  Serial.print("The Scalar multiplication value Before the modification (s): ");
  for (int k = 0; k < 32; k++)
  {
    Serial.print(s[k], HEX);
    Serial.print(" ");
  }
  Serial.println();

  // Printing the scalar multiplication array e after modification
  Serial.print("The Scalar multiplication value after modification (e): ");
  for (int k = 0; k < 32; k++)
  {
    Serial.print(e[k], HEX);
    Serial.print(" ");
  }
  Serial.println();

  fe25519 t;
  fe25519 z;
  fe25519_unpack(&t, p);
  mladder(&t, &z, e);
  fe25519_invert(&z, &z);
  fe25519_mul(&t, &t, &z);
  fe25519_pack(r, &t);
  return 0;
}

void fe25519_unpack(fe25519 *r, const unsigned char x[32])
{
  unsigned char i;
  for(i=0;i<32;i++)
  {
    r->v[i] = x[i];
  }
  r->v[31] &= 127;
}

void mladder(fe25519 *xr, fe25519 *zr, const unsigned char s[32])
{
  Serial.println();
  fe25519 work[5];
  unsigned char bit, prevbit = 0;
  unsigned char swap;
  signed char j = 6;
  signed char i;

  work[0] = *xr;

  //----------------- Print--------------------------------
  Serial.print("Intailization = work[0] = Xg = ");
  for (int i = 0; i < 32; i++)
  {
    Serial.print(work[0].v[i], HEX);
    Serial.print(" ");
  }
  Serial.println();
  //----------------- Print--------------------------------
  fe25519_setone(work+1);
  Serial.print("Intailization = work+1 = X0 = ");
  for (int k = 0; k < 32; k++)
  {
    Serial.print((work + 1)->v[k], HEX);
    Serial.print(" ");
  }
  Serial.println();
  //--------------------------
  fe25519_setzero(work+2);
  Serial.print("Intailization = work+2 = Z0 = ");
  for (int k = 0; k < 32; k++)
  {
    Serial.print((work + 2)->v[k], HEX);
    Serial.print(" ");
  }
  Serial.println();
  //--------------------------
  work[3] = *xr;
  //----------------- Print--------------------------------
  Serial.print("Intailization = work[3] = X1 = ");
  for (int i = 0; i < 32; i++)
  {
    Serial.print(work[3].v[i], HEX);
    Serial.print(" ");
  }
  Serial.println();
  //----------------- Print--------------------------------
  fe25519_setone(work+4);
  Serial.print("Intailization = work+4 = Z1 = ");
  for (int k = 0; k < 32; k++)
  {
    Serial.print((work + 4)->v[k], HEX);
    Serial.print(" ");
  }
  Serial.println();
  //--------------------------

  for (i = 31; i >= 0; i--)
  {
    while (j >= 0)
    {
      Serial.print(" ============ Inside for loop =================> Iteration: ");
      Serial.print(" i = ");
      Serial.print(i);
      Serial.print(", j = ");
      Serial.println(j);
      //---------------------
      bit = 1 & (s[i] >> j);
      swap = bit ^ prevbit;
      prevbit = bit;
      //--------------------
      Serial.print("bit = 1 & (s[i] >> j) = ");
      Serial.println(bit, HEX);
      Serial.print("swap = bit ^ prevbit = ");
      Serial.println(swap, HEX);
      Serial.print("prevbit = bit = ");
      Serial.println(prevbit, HEX);
      
      Serial.println(" //------------------------------//------------------------------// ");
      // Print values of work before work_cswap
      Serial.println("Before work_cswap - work: ");
      for (int k = 0; k < 5; k++)
      {
        Serial.print("work[");
        Serial.print(k);
        Serial.print("]: ");
        for (int l = 0; l < 32; l++)
        {
          Serial.print(work[k].v[l], HEX);
          Serial.print(" ");
        }
        Serial.println();
      }

      // Print value of swap before work_cswap
      Serial.print("Before work_cswap - swap: ");
      Serial.println(swap, HEX);
      //****************************************************************************************************************
      work_cswap(work, swap);
      //****************************************************************************************************************
      Serial.println("After work_cswap - work: ");
      for (int k = 0; k < 5; k++)
      {
        Serial.print("work[");
        Serial.print(k);
        Serial.print("]: ");
        for (int l = 0; l < 32; l++)
        {
          Serial.print(work[k].v[l], HEX);
          Serial.print(" ");
        }
        Serial.println();
      }

      Serial.println(" //------------------------------//------------------------------// ");

      // Print values of work before ladderstep
      Serial.println("Before ladderstep - work:");
      for (int k = 0; k < 5; k++)
      {
        Serial.print("work[");
        Serial.print(k);
        Serial.print("]: ");
        for (int l = 0; l < 32; l++)
        {
          Serial.print(work[k].v[l], HEX);
          Serial.print(" ");
        }
        Serial.println();
        }
      //****************************************************************************************************************
      ladderstep(work);
      //****************************************************************************************************************
      Serial.println("After ladderstep - work:");
      for (int k = 0; k < 5; k++)
      {
        Serial.print("work[");
        Serial.print(k);
        Serial.print("]: ");
        for (int l = 0; l < 32; l++)
        {
          Serial.print(work[k].v[l], HEX);
          Serial.print(" ");
        }
        Serial.println();
      }
      //------------------------------//------------------------------//------------------------------//------------------------------
      Serial.print(" ============ ============ ============ ============ ============ ============ Inside for loop =================> End of Iteration: ");
      Serial.print(" i = ");
      Serial.print(i);
      Serial.print(", j = ");
      Serial.println(j);
      Serial.println();
      j -= 1;
    }
    j = 7;
  }
  *xr = work[1];
  *zr = work[2];
}

void fe25519_setone(fe25519 *r) 
{
  unsigned char i;
  r->v[0] = 1;
  for(i=1;i<32;i++)
  {
    r->v[i]=0;
  }
}

void fe25519_setzero(fe25519 *r)
{
  unsigned char i;
  for(i=0;i<32;i++)
  {
    r->v[i]=0;
  }
}

// Implement work_cswap and ladderstep functions here
void work_cswap(fe25519 *work, char b)
{
  fe25519 t;
  fe25519_setzero(&t);
  fe25519_cmov(&t, work+1, b);
  fe25519_cmov(work+1, work+3, b);
  fe25519_cmov(work+3, &t, b);
  fe25519_cmov(&t, work+2, b);
  fe25519_cmov(work+2, work+4, b);
  fe25519_cmov(work+4, &t, b);
}

void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b)
{
  unsigned char i;
  unsigned long mask = b;
  mask = -mask;
  for(i=0;i<32;i++)
  {
    r->v[i] ^= mask & (x->v[i] ^ r->v[i]);
  }
}

void fe25519_pack(unsigned char r[32], const fe25519 *x)
{
  unsigned char i;
  fe25519 y = *x;
  fe25519_freeze(&y);
  for(i=0;i<32;i++)
  {
    r[i] = y.v[i];
  }    
}

/* reduction modulo 2^255-19 */
void fe25519_freeze(fe25519 *r)
{
  unsigned char c;
  fe25519 rt;
  c = bigint_subp(rt.v, r->v);
  fe25519_cmov(r,&rt,1-c);
  c = bigint_subp(rt.v, r->v);
  fe25519_cmov(r,&rt,1-c);
}

void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y)
{
  unsigned char a[32] = {0};
  unsigned char b[32] = {0};
  unsigned char t[64] = {0};

  memcpy(a, x->v, 32);
  memcpy(b, y->v, 32);

  // Print inputs
  Serial.print("fe25519_mul: a = ");
  for (int i = 0; i < 32; i++)
  {
    Serial.print(a[i], HEX);
    Serial.print(" ");
  }
  Serial.println();

  Serial.print("fe25519_mul: b = ");
  for (int i = 0; i < 32; i++)
  {
    Serial.print(b[i], HEX);
    Serial.print(" ");
  }
  Serial.println();

  Serial.print("fe25519_mul: t = ");
  for (int i = 0; i < 64; i++)
  {
    Serial.print(t[i], HEX);
    Serial.print(" ");
  }
  Serial.println();

  // Call the assembly function to multiply the big integers
  //delay(20);
  bigint_mul256(t, a, b);
  //delay(20);
  // Print t (before reduction)
  Serial.print("t (before reduction) = ");
  for (int i = 0; i < 64; i++)
  {
    Serial.print(t[i], HEX);
    Serial.print(" ");
  }
  Serial.println();

  // Step 4: Reduce the result to the appropriate field size
  fe25519_red(r, t);

  // Optionally apply freeze if needed
  fe25519_freeze(r);
  //fe25519_freeze(r);
}

void fe25519_square(fe25519 *r, const fe25519 *x)
{
  unsigned char t[64] = {0};
  bigint_square256(t,x->v);
  print_fe25519_64("square intermidate Result: ", t);
  fe25519_red(r,t);
  print_fe25519_64("square intermidate Result, after reduction: ", t);

  fe25519_freeze(r);  // Apply freeze if needed
  print_fe25519("After freeze: ", r);
}

void ladderstep(fe25519 *work)
{
  fe25519 t[2];
  fe25519 *t1 = &t[0];
  fe25519 *t2 = &t[1];

  // Initialize t1 and t2 to zero
  fe25519_setzero(t1);
  fe25519_setzero(t2);

  fe25519 *x0 = work;
  fe25519 *xp = work+1;
  fe25519 *zp = work+2;
  fe25519 *xq = work+3;
  fe25519 *zq = work+4;

  // Print initial values
  Serial.println(" ");
  Serial.println(" =======> Inside the (ladderstep) function, Initial values:");
  print_fe25519("t1", t1);
  print_fe25519("t2", t2);
  print_fe25519("x0", x0);
  print_fe25519("xp", xp);
  print_fe25519("zp", zp);
  print_fe25519("xq", xq);
  print_fe25519("zq", zq);

  // Before and after fe25519_add(t1, xq, zq);
  Serial.println(" 1. ------------------------------------------------> Before fe25519_add(t1, xq, zq):");
  print_fe25519("t1", t1);
  print_fe25519("xq", xq);
  print_fe25519("zq", zq);

  fe25519_add(t1, xq, zq);
  Serial.println("After fe25519_add(t1, xq, zq) and before fe25519_freeze : ");
  print_fe25519("t1", t1);

  fe25519_freeze(t1);  // Apply freeze if needed
  Serial.println("After freeze:");
  print_fe25519("t1", t1);

  // Before and after fe25519_sub(xq, xq, zq);
  Serial.println(" 2. ------------------------------------------------> Before fe25519_sub(xq, xq, zq):");
  print_fe25519("xq", xq);
  print_fe25519("zq", zq);

  fe25519_sub(xq, xq, zq);

  Serial.println("After fe25519_sub(xq, xq, zq):");
  print_fe25519("xq", xq);

  fe25519_freeze(xq);  // Apply freeze if needed
  Serial.println("After freeze:");
  print_fe25519("xq", xq);
  
  // Before and after fe25519_add(zq, xp, zp);
  Serial.println(" 3. ------------------------------------------------> Before fe25519_add(zq, xp, zp):");
  print_fe25519("xp", xp);
  print_fe25519("zp", zp);
  print_fe25519("The result zq", zq);

  fe25519_add(zq, xp, zp);

  Serial.println("After fe25519_add(zq, xp, zp):");
  print_fe25519("zq", zq);

  // Before and after fe25519_sub(xp, xp, zp);
  Serial.println(" 4. ------------------------------------------------> Before fe25519_sub(xp, xp, zp):");
  print_fe25519("xp", xp);
  print_fe25519("zp", zp);

  fe25519_sub(xp, xp, zp);

  Serial.println("After fe25519_sub(xp, xp, zp):");
  print_fe25519("xp", xp);

  // Before and after fe25519_mul(t1, t1, xp);
  Serial.println(" 5. ------------------------------------------------> Before fe25519_mul(t1, t1, xp):");
  print_fe25519("xp", xp);
  print_fe25519("t1", t1);
  //delay(20);
  fe25519_mul(t1, t1, xp);
  //delay(20);
  Serial.println("After fe25519_mul(t1, t1, xp):");
  print_fe25519("t1", t1);

  // Before and after fe25519_mul(xq, xq, zq);
  Serial.println(" 6. ------------------------------------------------> Before fe25519_mul(xq, xq, zq):");
  print_fe25519("xq", xq);
  print_fe25519("zq", zq);
  //delay(20);
  fe25519_mul(xq, xq, zq);
  //delay(20);
  Serial.println("After fe25519_mul(xq, xq, zq):");
  print_fe25519("xq", xq);

  fe25519_freeze(xq);  // Apply freeze if needed
  Serial.println("After freeze:");
  print_fe25519("xq", xq);

  // Before and after fe25519_square(zq, zq);
  Serial.println(" 7. ------------------------------------------------> Before fe25519_square(zq, zq):");
  print_fe25519("zq", zq);

  fe25519_square(zq, zq);

  Serial.println("After fe25519_square(zq, zq):");
  print_fe25519("zq", zq);

  // Before and after fe25519_square(xp, xp);
  Serial.println(" 8. ------------------------------------------------> Before fe25519_square(xp, xp):");
  print_fe25519("xp", xp);

  fe25519_square(xp, xp);

  Serial.println("After fe25519_square(xp, xp):");
  print_fe25519("xp", xp);

  // Before and after fe25519_sub(t2, zq, xp);
  Serial.println(" 9. ------------------------------------------------> Before fe25519_sub(t2, zq, xp):");
  print_fe25519("t2", t2);
  print_fe25519("zq", zq);
  print_fe25519("xp", xp);

  fe25519_sub(t2, zq, xp);

  Serial.println("After fe25519_sub(t2, zq, xp):");
  print_fe25519("t2", t2);

  // Before and after fe25519_mul121666(zp, t2);
  Serial.println(" 10. ------------------------------------------------> Before fe25519_mul121666(zp, t2):");
  print_fe25519("t2", t2);
  print_fe25519("zp", zp);

  fe25519_mul121666(zp, t2);

  Serial.println("After fe25519_mul121666(zp, t2):");
  print_fe25519("zp", zp);
  
  // Before and after fe25519_add(zp, zp, xp);
  Serial.println(" 11. ------------------------------------------------> Before fe25519_add(zp, zp, xp):");
  print_fe25519("zp", zp);
  print_fe25519("xp", xp);

  fe25519_add(zp, zp, xp);

  Serial.println("After fe25519_add(zp, zp, xp):");
  print_fe25519("zp", zp);

  // Before and after fe25519_mul(zp, t2, zp);
  Serial.println(" 12. ------------------------------------------------> Before fe25519_mul(zp, t2, zp):");
  print_fe25519("t2", t2);
  print_fe25519("zp", zp);
  //delay(20);
  fe25519_mul(zp, zp, t2);
  //delay(20);
  Serial.println("After fe25519_mul(zp, t2, zp):");
  print_fe25519("zp", zp);

  // Before and after fe25519_mul(xp, zq, xp);
  Serial.println(" 13. ------------------------------------------------> Before fe25519_mul(xp, zq, xp):");
  print_fe25519("zq", zq);
  print_fe25519("xp", xp);
  delay(20);
  fe25519_mul(xp, zq, xp);
  delay(20);
  Serial.println("After fe25519_mul(xp, zq, xp):");
  print_fe25519("xp", xp);

  // Before and after fe25519_sub(zq, t1, xq);
  Serial.println(" 14. ------------------------------------------------> Before fe25519_sub(zq, t1, xq):");
  print_fe25519("t1", t1);
  print_fe25519("xq", xq);
  print_fe25519("zq", zq);

  fe25519_sub(zq, xq, t1);
  
  Serial.println("After fe25519_sub(zq, t1, xq):");
  print_fe25519("zq", zq);

  fe25519_red(zq, zq->v);

  Serial.println("After reduction zq by calling fe25519_sub(zq, t1, xq):");
  print_fe25519("zq", zq);

  fe25519_freeze(zq);  // Apply freeze if needed
  Serial.println("After freeze:");
  print_fe25519("zq", zq);

  // Before and after fe25519_square(zq, zq);
  Serial.println(" 15. ------------------------------------------------> Before fe25519_square(zq, zq):");
  print_fe25519("zq", zq);

  fe25519_square(zq, zq);

  Serial.println("After fe25519_square(zq, zq):");
  print_fe25519("zq", zq);

  fe25519_freeze(zq);  // Apply freeze if needed
  Serial.println("After freeze:");
  print_fe25519("zq", zq);

  // Before and after fe25519_mul(zq, zq, x0);
  Serial.println(" 16. ------------------------------------------------> Before fe25519_mul(zq, zq, x0):");
  print_fe25519("x0", x0);
  print_fe25519("zq", zq);
  //delay(40);
  fe25519_mul(zq, zq, x0);
  //delay(40);
  Serial.println("After fe25519_mul(zq, zq, x0):");
  print_fe25519("zq", zq);

  // Before and after fe25519_add(xq, t1, xq);
  Serial.println(" 17. ------------------------------------------------> Before fe25519_add(xq, t1, xq):");
  print_fe25519("xq", xq);
  print_fe25519("t1", t1);

  fe25519_add(xq, t1, xq);

  Serial.println("After fe25519_add(xq, t1, xq):");
  print_fe25519("xq", xq);

  // Before and after fe25519_square(xq, xq);
  Serial.println(" 18. ------------------------------------------------> Before fe25519_square(xq, xq):");
  print_fe25519("xq", xq);

  fe25519_square(xq, xq);
  Serial.println("After fe25519_square(xq, xq):");
  print_fe25519("xq", xq);

  fe25519_freeze(xq);  // Apply freeze if needed
  Serial.println("After freeze:");
  print_fe25519("xq", xq);

  Serial.println(" =======> End of the (ladderstep) function.");
}

void print_fe25519(const char* name, const fe25519 *f)
{
    Serial.print(name);
    Serial.print(": ");
    for (int i = 0; i < 32; i++)
    {
        Serial.print(f->v[i], HEX);
        Serial.print(" ");
    }
    Serial.println();
}

void print_fe25519_64(const char* name, const unsigned char *f)
{
    Serial.print(name);
    Serial.print(": ");
    for (int i = 0; i < 64; i++)
    {
        Serial.print(*(f + i), HEX);
        Serial.print(" ");
    }
    Serial.println();
}

void fe25519_mul121666(fe25519 *r, const fe25519 *x)
{
  unsigned char t[64];

  bigint_mul256(t,x->v,_121666.v);
  fe25519_red(r,t);
}

void fe25519_invert(fe25519 *r, const fe25519 *x)
{
	fe25519 z2;
	fe25519 z11;
	fe25519 z2_10_0;
	fe25519 z2_50_0;
	fe25519 z2_100_0;
	fe25519 t0;
	fe25519 t1;
	unsigned char i;

	/* 2 */ fe25519_square(&z2,x);
	/* 4 */ fe25519_square(&t1,&z2);
	/* 8 */ fe25519_square(&t0,&t1);
	/* 9 */ fe25519_mul(&z2_10_0,&t0,x);
	/* 11 */ fe25519_mul(&z11,&z2_10_0,&z2);
	/* 22 */ fe25519_square(&t0,&z11);
	/* 2^5 - 2^0 = 31 */ fe25519_mul(&z2_10_0,&t0,&z2_10_0);

	/* 2^6 - 2^1 */ fe25519_square(&t0,&z2_10_0);
	/* 2^7 - 2^2 */ fe25519_square(&t1,&t0);
	/* 2^8 - 2^3 */ fe25519_square(&t0,&t1);
	/* 2^9 - 2^4 */ fe25519_square(&t1,&t0);
	/* 2^10 - 2^5 */ fe25519_square(&t0,&t1);
	/* 2^10 - 2^0 */ fe25519_mul(&z2_10_0,&t0,&z2_10_0);

	/* 2^11 - 2^1 */ fe25519_square(&t0,&z2_10_0);
	/* 2^12 - 2^2 */ fe25519_square(&t1,&t0);
	/* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2){ fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); }
	/* 2^20 - 2^0 */ fe25519_mul(&z2_50_0,&t1,&z2_10_0);

	/* 2^21 - 2^1 */ fe25519_square(&t0,&z2_50_0);
	/* 2^22 - 2^2 */ fe25519_square(&t1,&t0);
	/* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); }
	/* 2^40 - 2^0 */ fe25519_mul(&t0,&t1,&z2_50_0);

	/* 2^41 - 2^1 */ fe25519_square(&t1,&t0);
	/* 2^42 - 2^2 */ fe25519_square(&t0,&t1);
	/* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fe25519_square(&t1,&t0); fe25519_square(&t0,&t1); }
	/* 2^50 - 2^0 */ fe25519_mul(&z2_50_0,&t0,&z2_10_0);

	/* 2^51 - 2^1 */ fe25519_square(&t0,&z2_50_0);
	/* 2^52 - 2^2 */ fe25519_square(&t1,&t0);
	/* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); }
	/* 2^100 - 2^0 */ fe25519_mul(&z2_100_0,&t1,&z2_50_0);

	/* 2^101 - 2^1 */ fe25519_square(&t1,&z2_100_0);
	/* 2^102 - 2^2 */ fe25519_square(&t0,&t1);
	/* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fe25519_square(&t1,&t0); fe25519_square(&t0,&t1); }
	/* 2^200 - 2^0 */ fe25519_mul(&t1,&t0,&z2_100_0);

	/* 2^201 - 2^1 */ fe25519_square(&t0,&t1);
	/* 2^202 - 2^2 */ fe25519_square(&t1,&t0);
	/* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); }
	/* 2^250 - 2^0 */ fe25519_mul(&t0,&t1,&z2_50_0);

	/* 2^251 - 2^1 */ fe25519_square(&t1,&t0);
	/* 2^252 - 2^2 */ fe25519_square(&t0,&t1);
	/* 2^253 - 2^3 */ fe25519_square(&t1,&t0);
	/* 2^254 - 2^4 */ fe25519_square(&t0,&t1);
	/* 2^255 - 2^5 */ fe25519_square(&t1,&t0);
	/* 2^255 - 21 */ fe25519_mul(r,&t1,&z11);
}