Stephen Lake
Published © Apache-2.0

HLS Code to Coin: Blockchain Hashing on an FPGA

Step by step guide to transforming Filecoin Poseidon Hashing into High Level Synthesis code that runs on a Xilinx FPGA accelerator card

IntermediateFull instructions provided8 hours563
HLS Code to Coin: Blockchain Hashing on an FPGA

Things used in this project

Hardware components

Varium C1100 Blockchain Accelerator Card
AMD Varium C1100 Blockchain Accelerator Card
This project was tested on the Varium C1100. Other cards from Xilinx Alveo line or Cloud based Xilinx hardware will work as well with needed HLS adjustments to resource constraints
×1
AMD Ryzen 3700x
×1
64 GB RAM
Xilinx recommends 80 GB or more. I specified 100 GB of SWAP on a fast NVMe SSD. Most things that required that much memory didn't succeed. May be able to proceed with 32 GB of RAM and 50 GB of SWAP
×1

Software apps and online services

Ubuntu 20.04
The Ubuntu version must match the Vitis supported list of OS. With the version of Vitis 2021.2.0 this was Ubuntu 20.04
Vitis Unified Software Platform
AMD Vitis Unified Software Platform
Version v2021.2.0 (64-bit)
AMD-Xilinx Run Time (XRT) for Ubuntu 20.04

Story

Read more

Code

Baseline_OpenCL_from_Neptune.cl

C/C++
The generated OpenCL code used by the Neptune project
https://github.com/filecoin-project/neptune
License MIT or Apache 2.0
https://github.com/filecoin-project/neptune/blob/master/LICENSE-APACHE
https://github.com/filecoin-project/neptune/blob/master/LICENSE-MIT
// Defines to make the code work with both, CUDA and OpenCL
#ifdef __NVCC__
  #define DEVICE __device__
  #define GLOBAL
  #define KERNEL extern "C" __global__
  #define LOCAL __shared__
  #define CONSTANT __constant__

  #define GET_GLOBAL_ID() blockIdx.x * blockDim.x + threadIdx.x
  #define GET_GROUP_ID() blockIdx.x
  #define GET_LOCAL_ID() threadIdx.x
  #define GET_LOCAL_SIZE() blockDim.x
  #define BARRIER_LOCAL() __syncthreads()

  typedef unsigned char uchar;

  #define CUDA
#else // OpenCL
  #define DEVICE
  #define GLOBAL __global
  #define KERNEL __kernel
  #define LOCAL __local
  #define CONSTANT __constant

  #define GET_GLOBAL_ID() get_global_id(0)
  #define GET_GROUP_ID() get_group_id(0)
  #define GET_LOCAL_ID() get_local_id(0)
  #define GET_LOCAL_SIZE() get_local_size(0)
  #define BARRIER_LOCAL() barrier(CLK_LOCAL_MEM_FENCE)
#endif

#ifdef __NV_CL_C_VERSION
#define OPENCL_NVIDIA
#endif

#if defined(__WinterPark__) || defined(__BeaverCreek__) || defined(__Turks__) || \\
    defined(__Caicos__) || defined(__Tahiti__) || defined(__Pitcairn__) || \\
    defined(__Capeverde__) || defined(__Cayman__) || defined(__Barts__) || \\
    defined(__Cypress__) || defined(__Juniper__) || defined(__Redwood__) || \\
    defined(__Cedar__) || defined(__ATI_RV770__) || defined(__ATI_RV730__) || \\
    defined(__ATI_RV710__) || defined(__Loveland__) || defined(__GPU__) || \\
    defined(__Hawaii__)
#define AMD
#endif

// Returns a * b + c + d, puts the carry in d
DEVICE ulong mac_with_carry_64(ulong a, ulong b, ulong c, ulong *d) {
  #if defined(OPENCL_NVIDIA) || defined(CUDA)
    ulong lo, hi;
    asm("mad.lo.cc.u64 %0, %2, %3, %4;\\r\
"
        "madc.hi.u64 %1, %2, %3, 0;\\r\
"
        "add.cc.u64 %0, %0, %5;\\r\
"
        "addc.u64 %1, %1, 0;\\r\
"
        : "=l"(lo), "=l"(hi) : "l"(a), "l"(b), "l"(c), "l"(*d));
    *d = hi;
    return lo;
  #else
    ulong lo = a * b + c;
    ulong hi = mad_hi(a, b, (ulong)(lo < c));
    a = lo;
    lo += *d;
    hi += (lo < a);
    *d = hi;
    return lo;
  #endif
}

// Returns a + b, puts the carry in d
DEVICE ulong add_with_carry_64(ulong a, ulong *b) {
  #if defined(OPENCL_NVIDIA) || defined(CUDA)
    ulong lo, hi;
    asm("add.cc.u64 %0, %2, %3;\\r\
"
        "addc.u64 %1, 0, 0;\\r\
"
        : "=l"(lo), "=l"(hi) : "l"(a), "l"(*b));
    *b = hi;
    return lo;
  #else
    ulong lo = a + *b;
    *b = lo < a;
    return lo;
  #endif
}

// Returns a * b + c + d, puts the carry in d
DEVICE uint mac_with_carry_32(uint a, uint b, uint c, uint *d) {
  ulong res = (ulong)a * b + c + *d;
  *d = res >> 32;
  return res;
}

// Returns a + b, puts the carry in b
DEVICE uint add_with_carry_32(uint a, uint *b) {
  #if defined(OPENCL_NVIDIA) || defined(CUDA)
    uint lo, hi;
    asm("add.cc.u32 %0, %2, %3;\\r\
"
        "addc.u32 %1, 0, 0;\\r\
"
        : "=r"(lo), "=r"(hi) : "r"(a), "r"(*b));
    *b = hi;
    return lo;
  #else
    uint lo = a + *b;
    *b = lo < a;
    return lo;
  #endif
}

#ifdef CUDA
typedef uint uint32_t;
typedef int  int32_t;
typedef uint limb;

DEVICE inline uint32_t add_cc(uint32_t a, uint32_t b) {
  uint32_t r;

  asm volatile ("add.cc.u32 %0, %1, %2;" : "=r"(r) : "r"(a), "r"(b));
  return r;
}

DEVICE inline uint32_t addc_cc(uint32_t a, uint32_t b) {
  uint32_t r;

  asm volatile ("addc.cc.u32 %0, %1, %2;" : "=r"(r) : "r"(a), "r"(b));
  return r;
}

DEVICE inline uint32_t addc(uint32_t a, uint32_t b) {
  uint32_t r;

  asm volatile ("addc.u32 %0, %1, %2;" : "=r"(r) : "r"(a), "r"(b));
  return r;
}


DEVICE inline uint32_t madlo(uint32_t a, uint32_t b, uint32_t c) {
  uint32_t r;

  asm volatile ("mad.lo.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
  return r;
}

DEVICE inline uint32_t madlo_cc(uint32_t a, uint32_t b, uint32_t c) {
  uint32_t r;

  asm volatile ("mad.lo.cc.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
  return r;
}

DEVICE inline uint32_t madloc_cc(uint32_t a, uint32_t b, uint32_t c) {
  uint32_t r;

  asm volatile ("madc.lo.cc.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
  return r;
}

DEVICE inline uint32_t madloc(uint32_t a, uint32_t b, uint32_t c) {
  uint32_t r;

  asm volatile ("madc.lo.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
  return r;
}

DEVICE inline uint32_t madhi(uint32_t a, uint32_t b, uint32_t c) {
  uint32_t r;

  asm volatile ("mad.hi.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
  return r;
}

DEVICE inline uint32_t madhi_cc(uint32_t a, uint32_t b, uint32_t c) {
  uint32_t r;

  asm volatile ("mad.hi.cc.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
  return r;
}

DEVICE inline uint32_t madhic_cc(uint32_t a, uint32_t b, uint32_t c) {
  uint32_t r;

  asm volatile ("madc.hi.cc.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
  return r;
}

DEVICE inline uint32_t madhic(uint32_t a, uint32_t b, uint32_t c) {
  uint32_t r;

  asm volatile ("madc.hi.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
  return r;
}

typedef struct {
  int32_t _position;
} chain_t;

DEVICE inline
void chain_init(chain_t *c) {
  c->_position = 0;
}

DEVICE inline
uint32_t chain_add(chain_t *ch, uint32_t a, uint32_t b) {
  uint32_t r;

  ch->_position++;
  if(ch->_position==1)
    r=add_cc(a, b);
  else
    r=addc_cc(a, b);
  return r;
}

DEVICE inline
uint32_t chain_madlo(chain_t *ch, uint32_t a, uint32_t b, uint32_t c) {
  uint32_t r;

  ch->_position++;
  if(ch->_position==1)
    r=madlo_cc(a, b, c);
  else
    r=madloc_cc(a, b, c);
  return r;
}

DEVICE inline
uint32_t chain_madhi(chain_t *ch, uint32_t a, uint32_t b, uint32_t c) {
  uint32_t r;

  ch->_position++;
  if(ch->_position==1)
    r=madhi_cc(a, b, c);
  else
    r=madhic_cc(a, b, c);
  return r;
}
#endif


#define Fr_limb ulong
#define Fr_LIMBS 4
#define Fr_LIMB_BITS 64
#define Fr_INV 18446744069414584319
typedef struct { Fr_limb val[Fr_LIMBS]; } Fr;
CONSTANT Fr Fr_ONE = { { 8589934590, 6378425256633387010, 11064306276430008309, 1739710354780652911 } };
CONSTANT Fr Fr_P = { { 18446744069414584321, 6034159408538082302, 3691218898639771653, 8353516859464449352 } };
CONSTANT Fr Fr_R2 = { { 14526898881837571181, 3129137299524312099, 419701826671360399, 524908885293268753 } };
CONSTANT Fr Fr_ZERO = { { 0, 0, 0, 0 } };
#if defined(OPENCL_NVIDIA) || defined(CUDA)

DEVICE Fr Fr_sub_nvidia(Fr a, Fr b) {
asm("sub.cc.u64 %0, %0, %4;\\r\
"
"subc.cc.u64 %1, %1, %5;\\r\
"
"subc.cc.u64 %2, %2, %6;\\r\
"
"subc.u64 %3, %3, %7;\\r\
"
:"+l"(a.val[0]), "+l"(a.val[1]), "+l"(a.val[2]), "+l"(a.val[3])
:"l"(b.val[0]), "l"(b.val[1]), "l"(b.val[2]), "l"(b.val[3]));
return a;
}
DEVICE Fr Fr_add_nvidia(Fr a, Fr b) {
asm("add.cc.u64 %0, %0, %4;\\r\
"
"addc.cc.u64 %1, %1, %5;\\r\
"
"addc.cc.u64 %2, %2, %6;\\r\
"
"addc.u64 %3, %3, %7;\\r\
"
:"+l"(a.val[0]), "+l"(a.val[1]), "+l"(a.val[2]), "+l"(a.val[3])
:"l"(b.val[0]), "l"(b.val[1]), "l"(b.val[2]), "l"(b.val[3]));
return a;
}
#endif

// FinalityLabs - 2019
// Arbitrary size prime-field arithmetic library (add, sub, mul, pow)

#define Fr_BITS (Fr_LIMBS * Fr_LIMB_BITS)
#if Fr_LIMB_BITS == 32
  #define Fr_mac_with_carry mac_with_carry_32
  #define Fr_add_with_carry add_with_carry_32
#elif Fr_LIMB_BITS == 64
  #define Fr_mac_with_carry mac_with_carry_64
  #define Fr_add_with_carry add_with_carry_64
#endif

// Greater than or equal
DEVICE bool Fr_gte(Fr a, Fr b) {
  for(char i = Fr_LIMBS - 1; i >= 0; i--){
    if(a.val[i] > b.val[i])
      return true;
    if(a.val[i] < b.val[i])
      return false;
  }
  return true;
}

// Equals
DEVICE bool Fr_eq(Fr a, Fr b) {
  for(uchar i = 0; i < Fr_LIMBS; i++)
    if(a.val[i] != b.val[i])
      return false;
  return true;
}

// Normal addition
#if defined(OPENCL_NVIDIA) || defined(CUDA)
  #define Fr_add_ Fr_add_nvidia
  #define Fr_sub_ Fr_sub_nvidia
#else
  DEVICE Fr Fr_add_(Fr a, Fr b) {
    bool carry = 0;
    for(uchar i = 0; i < Fr_LIMBS; i++) {
      Fr_limb old = a.val[i];
      a.val[i] += b.val[i] + carry;
      carry = carry ? old >= a.val[i] : old > a.val[i];
    }
    return a;
  }
  Fr Fr_sub_(Fr a, Fr b) {
    bool borrow = 0;
    for(uchar i = 0; i < Fr_LIMBS; i++) {
      Fr_limb old = a.val[i];
      a.val[i] -= b.val[i] + borrow;
      borrow = borrow ? old <= a.val[i] : old < a.val[i];
    }
    return a;
  }
#endif

// Modular subtraction
DEVICE Fr Fr_sub(Fr a, Fr b) {
  Fr res = Fr_sub_(a, b);
  if(!Fr_gte(a, b)) res = Fr_add_(res, Fr_P);
  return res;
}

// Modular addition
DEVICE Fr Fr_add(Fr a, Fr b) {
  Fr res = Fr_add_(a, b);
  if(Fr_gte(res, Fr_P)) res = Fr_sub_(res, Fr_P);
  return res;
}


#ifdef CUDA
// Code based on the work from Supranational, with special thanks to Niall Emmart:
//
// We would like to acknowledge Niall Emmart at Nvidia for his significant
// contribution of concepts and code for generating efficient SASS on
// Nvidia GPUs. The following papers may be of interest:
//     Optimizing Modular Multiplication for NVIDIA\'s Maxwell GPUs
//     https://ieeexplore.ieee.org/document/7563271
//
//     Faster modular exponentiation using double precision floating point
//     arithmetic on the GPU
//     https://ieeexplore.ieee.org/document/8464792

DEVICE void Fr_reduce(uint32_t accLow[Fr_LIMBS], uint32_t np0, uint32_t fq[Fr_LIMBS]) {
  // accLow is an IN and OUT vector
  // count must be even
  const uint32_t count = Fr_LIMBS;
  uint32_t accHigh[Fr_LIMBS];
  uint32_t bucket=0, lowCarry=0, highCarry=0, q;
  int32_t  i, j;

  #pragma unroll
  for(i=0;i<count;i++)
    accHigh[i]=0;

  // bucket is used so we don\'t have to push a carry all the way down the line

  #pragma unroll
  for(j=0;j<count;j++) {       // main iteration
    if(j%2==0) {
      add_cc(bucket, 0xFFFFFFFF);
      accLow[0]=addc_cc(accLow[0], accHigh[1]);
      bucket=addc(0, 0);

      q=accLow[0]*np0;

      chain_t chain1;
      chain_init(&chain1);

      #pragma unroll
      for(i=0;i<count;i+=2) {
        accLow[i]=chain_madlo(&chain1, q, fq[i], accLow[i]);
        accLow[i+1]=chain_madhi(&chain1, q, fq[i], accLow[i+1]);
      }
      lowCarry=chain_add(&chain1, 0, 0);

      chain_t chain2;
      chain_init(&chain2);
      for(i=0;i<count-2;i+=2) {
        accHigh[i]=chain_madlo(&chain2, q, fq[i+1], accHigh[i+2]);    // note the shift down
        accHigh[i+1]=chain_madhi(&chain2, q, fq[i+1], accHigh[i+3]);
      }
      accHigh[i]=chain_madlo(&chain2, q, fq[i+1], highCarry);
      accHigh[i+1]=chain_madhi(&chain2, q, fq[i+1], 0);
    }
    else {
      add_cc(bucket, 0xFFFFFFFF);
      accHigh[0]=addc_cc(accHigh[0], accLow[1]);
      bucket=addc(0, 0);

      q=accHigh[0]*np0;

      chain_t chain3;
      chain_init(&chain3);
      #pragma unroll
      for(i=0;i<count;i+=2) {
        accHigh[i]=chain_madlo(&chain3, q, fq[i], accHigh[i]);
        accHigh[i+1]=chain_madhi(&chain3, q, fq[i], accHigh[i+1]);
      }
      highCarry=chain_add(&chain3, 0, 0);

      chain_t chain4;
      chain_init(&chain4);
      for(i=0;i<count-2;i+=2) {
        accLow[i]=chain_madlo(&chain4, q, fq[i+1], accLow[i+2]);    // note the shift down
        accLow[i+1]=chain_madhi(&chain4, q, fq[i+1], accLow[i+3]);
      }
      accLow[i]=chain_madlo(&chain4, q, fq[i+1], lowCarry);
      accLow[i+1]=chain_madhi(&chain4, q, fq[i+1], 0);
    }
  }

  // at this point, accHigh needs to be shifted back a word and added to accLow
  // we\'ll use one other trick.  Bucket is either 0 or 1 at this point, so we
  // can just push it into the carry chain.

  chain_t chain5;
  chain_init(&chain5);
  chain_add(&chain5, bucket, 0xFFFFFFFF);    // push the carry into the chain
  #pragma unroll
  for(i=0;i<count-1;i++)
    accLow[i]=chain_add(&chain5, accLow[i], accHigh[i+1]);
  accLow[i]=chain_add(&chain5, accLow[i], highCarry);
}

// Requirement: yLimbs >= xLimbs
DEVICE inline
void Fr_mult_v1(uint32_t *x, uint32_t *y, uint32_t *xy) {
  const uint32_t xLimbs  = Fr_LIMBS;
  const uint32_t yLimbs  = Fr_LIMBS;
  const uint32_t xyLimbs = Fr_LIMBS * 2;
  uint32_t temp[Fr_LIMBS * 2];
  uint32_t carry = 0;

  #pragma unroll
  for (int32_t i = 0; i < xyLimbs; i++) {
    temp[i] = 0;
  }

  #pragma unroll
  for (int32_t i = 0; i < xLimbs; i++) {
    chain_t chain1;
    chain_init(&chain1);
    #pragma unroll
    for (int32_t j = 0; j < yLimbs; j++) {
      if ((i + j) % 2 == 1) {
        temp[i + j - 1] = chain_madlo(&chain1, x[i], y[j], temp[i + j - 1]);
        temp[i + j]     = chain_madhi(&chain1, x[i], y[j], temp[i + j]);
      }
    }
    if (i % 2 == 1) {
      temp[i + yLimbs - 1] = chain_add(&chain1, 0, 0);
    }
  }

  #pragma unroll
  for (int32_t i = xyLimbs - 1; i > 0; i--) {
    temp[i] = temp[i - 1];
  }
  temp[0] = 0;

  #pragma unroll
  for (int32_t i = 0; i < xLimbs; i++) {
    chain_t chain2;
    chain_init(&chain2);

    #pragma unroll
    for (int32_t j = 0; j < yLimbs; j++) {
      if ((i + j) % 2 == 0) {
        temp[i + j]     = chain_madlo(&chain2, x[i], y[j], temp[i + j]);
        temp[i + j + 1] = chain_madhi(&chain2, x[i], y[j], temp[i + j + 1]);
      }
    }
    if ((i + yLimbs) % 2 == 0 && i != yLimbs - 1) {
      temp[i + yLimbs]     = chain_add(&chain2, temp[i + yLimbs], carry);
      temp[i + yLimbs + 1] = chain_add(&chain2, temp[i + yLimbs + 1], 0);
      carry = chain_add(&chain2, 0, 0);
    }
    if ((i + yLimbs) % 2 == 1 && i != yLimbs - 1) {
      carry = chain_add(&chain2, carry, 0);
    }
  }

  #pragma unroll
  for(int32_t i = 0; i < xyLimbs; i++) {
    xy[i] = temp[i];
  }
}

DEVICE Fr Fr_mul_nvidia(Fr a, Fr b) {
  // Perform full multiply
  limb ab[2 * Fr_LIMBS];
  Fr_mult_v1(a.val, b.val, ab);

  uint32_t io[Fr_LIMBS];
  #pragma unroll
  for(int i=0;i<Fr_LIMBS;i++) {
    io[i]=ab[i];
  }
  Fr_reduce(io, Fr_INV, Fr_P.val);

  // Add io to the upper words of ab
  ab[Fr_LIMBS] = add_cc(ab[Fr_LIMBS], io[0]);
  int j;
  #pragma unroll
  for (j = 1; j < Fr_LIMBS - 1; j++) {
    ab[j + Fr_LIMBS] = addc_cc(ab[j + Fr_LIMBS], io[j]);
  }
  ab[2 * Fr_LIMBS - 1] = addc(ab[2 * Fr_LIMBS - 1], io[Fr_LIMBS - 1]);

  Fr r;
  #pragma unroll
  for (int i = 0; i < Fr_LIMBS; i++) {
    r.val[i] = ab[i + Fr_LIMBS];
  }

  if (Fr_gte(r, Fr_P)) {
    r = Fr_sub_(r, Fr_P);
  }

  return r;
}

#endif

// Modular multiplication
DEVICE Fr Fr_mul_default(Fr a, Fr b) {
  /* CIOS Montgomery multiplication, inspired from Tolga Acar\'s thesis:
   * https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
   * Learn more:
   * https://en.wikipedia.org/wiki/Montgomery_modular_multiplication
   * https://alicebob.cryptoland.net/understanding-the-montgomery-reduction-algorithm/
   */
  Fr_limb t[Fr_LIMBS + 2] = {0};
  for(uchar i = 0; i < Fr_LIMBS; i++) {
    Fr_limb carry = 0;
    for(uchar j = 0; j < Fr_LIMBS; j++)
      t[j] = Fr_mac_with_carry(a.val[j], b.val[i], t[j], &carry);
    t[Fr_LIMBS] = Fr_add_with_carry(t[Fr_LIMBS], &carry);
    t[Fr_LIMBS + 1] = carry;

    carry = 0;
    Fr_limb m = Fr_INV * t[0];
    Fr_mac_with_carry(m, Fr_P.val[0], t[0], &carry);
    for(uchar j = 1; j < Fr_LIMBS; j++)
      t[j - 1] = Fr_mac_with_carry(m, Fr_P.val[j], t[j], &carry);

    t[Fr_LIMBS - 1] = Fr_add_with_carry(t[Fr_LIMBS], &carry);
    t[Fr_LIMBS] = t[Fr_LIMBS + 1] + carry;
  }

  Fr result;
  for(uchar i = 0; i < Fr_LIMBS; i++) result.val[i] = t[i];

  if(Fr_gte(result, Fr_P)) result = Fr_sub_(result, Fr_P);

  return result;
}

#ifdef CUDA
DEVICE Fr Fr_mul(Fr a, Fr b) {
  return Fr_mul_nvidia(a, b);
}
#else
DEVICE Fr Fr_mul(Fr a, Fr b) {
  return Fr_mul_default(a, b);
}
#endif

// Squaring is a special case of multiplication which can be done ~1.5x faster.
// https://stackoverflow.com/a/16388571/1348497
DEVICE Fr Fr_sqr(Fr a) {
  return Fr_mul(a, a);
}

// Left-shift the limbs by one bit and subtract by modulus in case of overflow.
// Faster version of Fr_add(a, a)
DEVICE Fr Fr_double(Fr a) {
  for(uchar i = Fr_LIMBS - 1; i >= 1; i--)
    a.val[i] = (a.val[i] << 1) | (a.val[i - 1] >> (Fr_LIMB_BITS - 1));
  a.val[0] <<= 1;
  if(Fr_gte(a, Fr_P)) a = Fr_sub_(a, Fr_P);
  return a;
}

// Modular exponentiation (Exponentiation by Squaring)
// https://en.wikipedia.org/wiki/Exponentiation_by_squaring
DEVICE Fr Fr_pow(Fr base, uint exponent) {
  Fr res = Fr_ONE;
  while(exponent > 0) {
    if (exponent & 1)
      res = Fr_mul(res, base);
    exponent = exponent >> 1;
    base = Fr_sqr(base);
  }
  return res;
}


// Store squares of the base in a lookup table for faster evaluation.
DEVICE Fr Fr_pow_lookup(GLOBAL Fr *bases, uint exponent) {
  Fr res = Fr_ONE;
  uint i = 0;
  while(exponent > 0) {
    if (exponent & 1)
      res = Fr_mul(res, bases[i]);
    exponent = exponent >> 1;
    i++;
  }
  return res;
}

DEVICE Fr Fr_mont(Fr a) {
  return Fr_mul(a, Fr_R2);
}

DEVICE Fr Fr_unmont(Fr a) {
  Fr one = Fr_ZERO;
  one.val[0] = 1;
  return Fr_mul(a, one);
}

// Get `i`th bit (From most significant digit) of the field.
DEVICE bool Fr_get_bit(Fr l, uint i) {
  return (l.val[Fr_LIMBS - 1 - i / Fr_LIMB_BITS] >> (Fr_LIMB_BITS - 1 - (i % Fr_LIMB_BITS))) & 1;
}

// Get `window` consecutive bits, (Starting from `skip`th bit) from the field.
DEVICE uint Fr_get_bits(Fr l, uint skip, uint window) {
  uint ret = 0;
  for(uint i = 0; i < window; i++) {
    ret <<= 1;
    ret |= Fr_get_bit(l, skip + i);
  }
  return ret;
}

DEVICE Fr quintic_s_box(Fr l, Fr pre_add, Fr post_add) {
    Fr tmp = Fr_add(l, pre_add);
    tmp = Fr_sqr(l);
    tmp = Fr_sqr(tmp);
    tmp = Fr_mul(tmp, l);
    tmp = Fr_add(tmp, post_add);

    return tmp;
  }

DEVICE Fr scalar_product(CONSTANT Fr* a, Fr* b, int size) {
    Fr res = Fr_ZERO;

    for (int i = 0; i < size; ++i) {
        Fr tmp = Fr_mul(a[i], b[i]);
        res = Fr_add(res, tmp);
      }

    return res;
  }

typedef struct state_2_standard {
  Fr elements[3];
  int current_round;
  int rk_offset;
} state_2_standard;

DEVICE state_2_standard apply_matrix_2_standard (CONSTANT Fr matrix[3][3], state_2_standard s) {
    Fr tmp[3];
    for (int i = 0; i < 3; ++i) {
        tmp[i] = s.elements[i];
        s.elements[i] = Fr_ZERO;
      }

    for (int j = 0; j < 3; ++j) {
        for (int i = 0; i < 3; ++i) {
            s.elements[j] = Fr_add(s.elements[j], Fr_mul(matrix[i][j], tmp[i]));
          }
      }
    return s;
  }

DEVICE state_2_standard apply_sparse_matrix_2_standard (CONSTANT Fr sm[5], state_2_standard s) {
    Fr first_elt = s.elements[0];

    s.elements[0] = scalar_product(sm + 0, s.elements, 3);

    for (int i = 1; i < 3; ++i) {
        Fr val = Fr_mul((sm + 3)[i-1], first_elt);
        s.elements[i] = Fr_add(s.elements[i], val);
      }

    return s;
  }

DEVICE state_2_standard apply_round_matrix_2_standard (CONSTANT Fr constants[373], state_2_standard s) {
    if (s.current_round == 3) {
        s = apply_matrix_2_standard((CONSTANT Fr (*)[3])(constants + 89), s);
      } else if ((s.current_round > 3) && (s.current_round < 4 + 55)) {
        int index = s.current_round - 3 - 1;
        s = apply_sparse_matrix_2_standard(constants + 98 + (index * 5), s);
      } else {
        s = apply_matrix_2_standard((CONSTANT Fr (*)[3])(constants + 80), s);
      }
    return s;
  }

DEVICE state_2_standard add_full_round_keys_2_standard (CONSTANT Fr constants[373], state_2_standard s) {
    for (int i = 0; i < 3; ++i) {
        s.elements[i] = Fr_add(s.elements[i], (constants + 1)[s.rk_offset + i]);
      }
    s.rk_offset += 3;
    return s;
  }

DEVICE state_2_standard full_round_2_standard (CONSTANT Fr constants[373], state_2_standard s) {
    for (int i = 0; i < 3; ++i) {
        s.elements[i] = quintic_s_box(s.elements[i], Fr_ZERO, (constants + 1)[s.rk_offset + i]);
      }
    s.rk_offset += 3;
    s = apply_round_matrix_2_standard(constants, s);
    s.current_round += 1;
    return s;
}

DEVICE state_2_standard last_full_round_2_standard (CONSTANT Fr constants[373], state_2_standard s) {
    for (int i = 0; i < 3; ++i) {
        s.elements[i] = quintic_s_box(s.elements[i], Fr_ZERO, Fr_ZERO);
      }
    s = apply_round_matrix_2_standard(constants, s);
    return s;
}

DEVICE state_2_standard partial_round_2_standard (CONSTANT Fr constants[373], state_2_standard s) {
    s.elements[0] = quintic_s_box(s.elements[0], Fr_ZERO, (constants + 1)[s.rk_offset]);
    s.rk_offset += 1;
    s = apply_round_matrix_2_standard(constants, s);
    s.current_round += 1;
    return s;
}

DEVICE state_2_standard hash_2_standard (CONSTANT Fr constants[373], state_2_standard s) {
    s = add_full_round_keys_2_standard(constants, s);

    for (int i = 0; i < 4; ++i) {
        s = full_round_2_standard(constants, s);
      }
    for (int i = 0; i < 55; ++ i) {
        s = partial_round_2_standard(constants, s);
      }
    for (int i = 0; i < (4 - 1); ++ i) {
        s = full_round_2_standard(constants, s);
      }
    s = last_full_round_2_standard(constants, s);

    return s;
  }

KERNEL void hash_preimages_2_standard(CONSTANT Fr constants[373],
                             GLOBAL Fr *preimages,
                             GLOBAL Fr *digests,
                             int batch_size
                             ) {
    int global_id = GET_GLOBAL_ID();

    if (global_id < batch_size) {
        int offset = global_id * 2;


        state_2_standard s;
        s.elements[0] = constants[0];
        for (int i = 0; i < 2; ++i) {
            s.elements[i+1] = preimages[offset + i];
          }

        s.current_round = 0;
        s.rk_offset = 0;

        s = hash_2_standard(constants, s);

        digests[global_id] = s.elements[1];
      }
  }

krnl_poseidon_arity2_1.cpp

C/C++
Top level entry function
Data structs
Dataflow functions
Basic output to know it is being called
#include <stdint.h>
#include <hls_stream.h>
#include <cstdio>
#include "ap_int.h"
#include <iostream>
#include <math.h>
using namespace std;

typedef struct Fr {
	unsigned long long val[4];
} Fr;

typedef struct state_2_standard {
	Fr elements[3];
	int current_round;
	int rk_offset;
} state_2_standard;

static void load_input(Fr* preimages, hls::stream<state_2_standard> &preimage_stream, const Fr constantZero, int size) {
#pragma HLS inline off
	load_input: for (int i = 0; i < size; i++) {
		state_2_standard s;
		s.elements[0] = constantZero;
		for (int j=0; j < 2; j++) {
			Fr preimage = preimages[i * 2 + j];
			s.elements[j+1] = Fr { (preimage.val[3], preimage.val[2], preimage.val[1], preimage.val[0]) };
		}
		s.current_round = 0;
		s.rk_offset = 0;
		preimage_stream << s;
	}
}

static void hash_2_standard(const Fr constants[], hls::stream<state_2_standard> &in, hls::stream<Fr> &out, int size) {
#pragma HLS inline off
	compute: for (int i = 0; i < size; i++) {
		state_2_standard s = in.read();

		int testAdd = size * 2;

		cout << "hello from the kernel " << testAdd << endl;

		out.write(Fr { { 0, 0, 0, 0 } });
	}
}

static void store_result(hls::stream<Fr> &out_stream, Fr* out, int size) {
#pragma HLS inline off
	store_result: for (int i = 0; i < size; i++) {
		out[i] = out_stream.read();
	}
}

extern "C" {
	void poseidon_hash(
			const Fr constants[373],
			Fr *preimages,
			Fr *digests,
			int batch_size
	) {
#pragma HLS interface m_axi port = preimages bundle = gmem0
#pragma HLS interface m_axi port = digests bundle = gmem1

#pragma HLS dataflow

		const unsigned int correctSize = batch_size/2;
		Fr constantsZero;

		static hls::stream<state_2_standard> preimage_stream("preimage_stream");
		static hls::stream<Fr> digests_stream("digests_stream");
#pragma HLS stream variable=preimage_stream
#pragma HLS stream variable=digests_stream

		constantsZero = constants[0];
		load_input(preimages, preimage_stream, constantsZero, correctSize);
		hash_2_standard(constants, preimage_stream, digests_stream, correctSize);
		store_result(digests_stream, digests, correctSize);

	}
}

krnl_poseidon_arity2_final.cpp

C/C++
Final state
#include <stdint.h>
#include <hls_stream.h>
#include <cstdio>
#include "ap_int.h"
#include <iostream>
#include <math.h>
using namespace std;

typedef struct Fr_4limb {
	ap_uint<64> val[4];
} Fr_4limb;

typedef struct Fr {
	ap_uint<256> val;
} Fr;

typedef struct state_2_standard {
	Fr elements[3];
	ap_uint<7> current_round;
	ap_uint<9> rk_offset;
} state_2_standard;



const Fr Fr_ONE = { (ap_int<64>("1739710354780652911",10), ap_int<64>("11064306276430008309",10), ap_int<64>("6378425256633387010",10), ap_int<64>("8589934590",10)) };
const Fr Fr_P = { (ap_int<64>("8353516859464449352",10), ap_int<64>("3691218898639771653",10), ap_int<64>("6034159408538082302",10), ap_int<64>("18446744069414584321",10)) };
const Fr Fr_R2 = { (ap_int<64>("524908885293268753",10), ap_int<64>("419701826671360399",10), ap_int<64>("3129137299524312099",10), ap_int<64>("14526898881837571181",10)) };
const Fr Fr_ZERO = { 0 };
const ap_int<64> Fr_INV = ap_int<64>("18446744069414584319",10);


// Greater than or equal
static bool Fr_gte(Fr a, Fr b) {
	return a.val >= b.val;
}

// Normal addition
static Fr Fr_add_(Fr a, Fr b) {
	return Fr { a.val + b.val };
}
static Fr Fr_sub_(Fr a, Fr b) {
	return Fr { a.val - b.val };
}

// Modular subtraction
static Fr Fr_sub(Fr a, Fr b) {
  Fr res = Fr_sub_(a, b);
  if(!Fr_gte(a, b))
	  res = Fr_add_(res, Fr_P);
  return res;
}

// Modular addition
static Fr Fr_add(Fr a, Fr b) {
#pragma HLS INLINE
	Fr res = Fr_add_(a, b);
	if (Fr_gte(res, Fr_P))
		res = Fr_sub_(res, Fr_P);
	return res;
}

const ap_uint<64> Fr_P_first_limb = Fr_P.val(63, 0);
const ap_uint<192> Fr_P_remainder = Fr_P.val(255, 64);

static ap_uint<384> Fr_mul_part(Fr a, Fr b, ap_uint<384> t384, ap_uint<3> i) {
#pragma HLS inline

	//ap_uint<8> range = i * ap_uint<7>(64);

	ap_uint<64> bSubRange = b.val.range((i * 64) + 63, i * 64);

	t384 += a.val * bSubRange;

	ap_uint<64> t_first_limb = t384.range(63, 0);
	ap_uint<64> m = Fr_INV * t_first_limb;

	ap_uint<129> res = m * Fr_P_first_limb + t384(63, 0);
	ap_uint<64> carry = res.range(128, 64);

	return m * Fr_P_remainder + t384(383, 64) + carry;
}

static Fr Fr_mul(Fr a, Fr b) {
#pragma HLS inline
	ap_uint<384> t384 = ap_uint<384>(0);

	Fr_mul_loop: for (ap_uint<3> i = 0; i < 4; i++) {
		t384 = Fr_mul_part(a, b, t384, i);
	}

	Fr result = Fr { (t384(255, 192), t384(191, 128), t384(127, 64), t384(63, 0)) };

	if (Fr_gte(result, Fr_P)) {
		result = Fr_sub_(result, Fr_P);
	}

	return result;
}

static Fr Fr_mont(Fr a) {
  return Fr_mul(a, Fr_R2);
}

static Fr Fr_unmont(Fr a) {
  Fr one = Fr { 1 };
  return Fr_mul(a, one);
}

static ap_uint<256> Fr_unmont_to_decimal(Fr a) {
	return a.val;
}






static void transform_constants(const Fr_4limb constants[], Fr constantsFr[], ap_uint<9> size) {
#pragma HLS inline off
	transform_constants: for (ap_uint<9> i=0; i<size; i++) {
		constantsFr[i] = Fr { (constants[i].val[3], constants[i].val[2], constants[i].val[1], constants[i].val[0]) };
	}
}

static void load_input(Fr_4limb* preimages, hls::stream<state_2_standard> &preimage_stream, const Fr constantZero, ap_uint<9> size) {
#pragma HLS inline off
	load_input: for (ap_uint<32> i = 0; i < size; i++) {
		state_2_standard s;
		s.elements[0] = constantZero;
		for (ap_uint<2> j=0; j < 2; j++) {
			Fr_4limb preimage = preimages[i * 2 + j];
			s.elements[j+1] = Fr { (preimage.val[3], preimage.val[2], preimage.val[1], preimage.val[0]) };
		}
		s.current_round = 0;
		s.rk_offset = 0;
		preimage_stream << s;
	}
}



static Fr quintic_s_box(Fr l, Fr pre_add, Fr post_add) {
#pragma HLS allocation operation instances=mul limit=15
    Fr tmp = Fr_add(l, pre_add);
    tmp = Fr_mul(l, l);
    tmp = Fr_mul(tmp, tmp);
    tmp = Fr_mul(tmp, l);
    tmp = Fr_add(tmp, post_add);

    return tmp;
}

static Fr scalar_product(const Fr* a, Fr* b, ap_uint<2> size) {
#pragma HLS inline
    Fr res = Fr_ZERO;

    multiply_add_loop: for (ap_uint<2> i = 0; i < size; ++i) {
        Fr tmp = Fr_mul(a[i], b[i]);
        res = Fr_add(res, tmp);
    }

    return res;
}

static state_2_standard apply_matrix_2_standard(const Fr matrix[9], state_2_standard s) {
#pragma HLS allocation operation instances=mul limit=30
    Fr tmp[3];
    initialize: for (ap_uint<2> i = 0; i < 3; ++i) {
        tmp[i] = s.elements[i];
        s.elements[i] = Fr_ZERO;
    }

    multiply_add_loop: for (ap_uint<2> j = 0; j < 3; ++j) {
        for (ap_uint<2> i = 0; i < 3; ++i) {
            s.elements[j] = Fr_add(s.elements[j], Fr_mul(matrix[i * 3 + j], tmp[i]));
        }
    }
    return s;
}

static state_2_standard apply_sparse_matrix_2_standard(const Fr sm[5], state_2_standard s) {
#pragma HLS inline
    Fr first_elt = s.elements[0];

    s.elements[0] = scalar_product(sm + 0, s.elements, 3);

    multiply_add_loop: for (ap_uint<2> i = 1; i < 3; ++i) {
        Fr val = Fr_mul((sm + 3)[i-1], first_elt);
        s.elements[i] = Fr_add(s.elements[i], val);
    }

    return s;
}

static state_2_standard apply_round_matrix_2_standard(const Fr constants[373], state_2_standard s) {
#pragma HLS inline
	if ((s.current_round > 3) && (s.current_round < 4 + 55)) {
		ap_uint<7> index = s.current_round - 3 - 1;
		ap_uint<9> index2 = index * 5;
		s = apply_sparse_matrix_2_standard(constants + 98 + (index2), s);
	} else {
		Fr *matrix;
		if (s.current_round == 3) {
			matrix = (Fr (*))(constants + 89);
		} else {
			matrix = (Fr (*))(constants + 80);
		}

		s = apply_matrix_2_standard(matrix, s);
	}

	return s;
}

static state_2_standard add_full_round_keys_2_standard(const Fr constants[373], state_2_standard s) {
	add_loop: for (ap_uint<2> i = 0; i < 3; ++i) {
        s.elements[i] = Fr_add(s.elements[i], (constants + 1)[s.rk_offset + i]);
    }
    s.rk_offset += 3;
    return s;
}

static state_2_standard all_rounds_2_standard(const Fr constants[], state_2_standard s, ap_uint<1> full, ap_uint<1> partial, ap_uint<1> last) {
//#pragma HLS pipeline
#pragma HLS allocation operation instances=mul limit=30
	//95 is the fastest at 300MHz with all DSP

	state_2_standard s2;
	s2.current_round = s.current_round;

	loop_quintic: for (ap_uint<2> i = 0; i < 3; ++i) {
		Fr post_add;
		if (full || partial) {
			post_add = (constants + 1)[s.rk_offset + i];
		} else {
			post_add = Fr_ZERO;
		}

		if (i == 0 || full || last) {
			s2.elements[i] = quintic_s_box(s.elements[i], Fr_ZERO, post_add);
		} else {
			s2.elements[i] = s.elements[i];
		}
	}

	if (full) {
		s2.rk_offset = s.rk_offset + 3;
	} else if (partial) {
		s2.rk_offset = s.rk_offset + 1;
	} else {
		s2.rk_offset = s.rk_offset;
	}

	state_2_standard s3 = apply_round_matrix_2_standard(constants, s2);

	s3.current_round = s.current_round + 1;

	return s3;
}

static void hash_2_standard(const Fr constants[], hls::stream<state_2_standard> &in, hls::stream<Fr> &out, ap_uint<32> size) {

	state_2_standard s[512];
	ap_uint<7> round[512] = {0};
	ap_uint<9> index = 0;
	ap_uint<32> totalReads = 0;

	ap_uint<32> fitToProcessingSize = (size >> 9) << 9;

	if (fitToProcessingSize < size) {
		fitToProcessingSize += 512;
	}

	ap_uint<39> processSize = fitToProcessingSize * 65;

	main_loop: for (ap_uint<32> i=0; i < processSize; i++) {
#pragma HLS loop_tripcount min=65000 max=65000
#pragma HLS PIPELINE

		if (round[index] == 0) {
			if (totalReads < size) {
				s[index] = add_full_round_keys_2_standard(constants, in.read());
				round[index] = 1;
				totalReads = totalReads + 1;
			}
		} else if (round[index] >= 1 && round[index] <= 63) {
			ap_uint<7> currentP = round[index];

			ap_uint<1> full = ((currentP >= 1 && currentP <= 4) || (currentP >= 60 && currentP <= 62));
			ap_uint<1> partial = (currentP >= 5 && currentP <= 59);
			ap_uint<1> last = (currentP == 63);

			s[index] = all_rounds_2_standard(constants, s[index], full, partial, last);

			round[index] = round[index] + 1;
		} else {
			out.write(s[index].elements[1]);
			round[index] = 0;
		}


		index++;
		if (index >= 512) {
			index = 0;
		}
	}
}

static void store_result(hls::stream<Fr> &out_stream, Fr_4limb* out, ap_uint<9> size) {
#pragma HLS inline off
	store_result: for (ap_uint<9> i = 0; i < size; i++) {
		Fr data = out_stream.read();
		out[i] = Fr_4limb { { data.val(63, 0), data.val(127, 64), data.val(191, 128), data.val(255, 192) } };
	}
}

extern "C" {
	void poseidon_hash(
			const Fr_4limb constants[373],
			Fr_4limb *preimages,
			Fr_4limb *digests,
			int batch_size
	) {
#pragma HLS interface m_axi port = preimages bundle = gmem0
#pragma HLS interface m_axi port = digests bundle = gmem1

#pragma HLS dataflow

		Fr constantsFr[373];
#pragma HLS array_partition variable=constantsFr complete

		const unsigned int correctSize = batch_size/2;
		Fr constantsZero;

		static hls::stream<state_2_standard> preimage_stream("preimage_stream");
		static hls::stream<Fr> digests_stream("digests_stream");
#pragma HLS stream variable=preimage_stream
#pragma HLS stream variable=digests_stream

		transform_constants(constants, constantsFr, 373);
		constantsZero = constantsFr[0];
		load_input(preimages, preimage_stream, constantsZero, correctSize);
		hash_2_standard(constantsFr, preimage_stream, digests_stream, correctSize);
		store_result(digests_stream, digests, correctSize);

	}
}

Baseline_OpenCL_from_Neptune_simplified.cl

C/C++
Simplified version of the generated OpenCL code used by the Neptune project
https://github.com/filecoin-project/neptune
License MIT or Apache 2.0
https://github.com/filecoin-project/neptune/blob/master/LICENSE-APACHE
https://github.com/filecoin-project/neptune/blob/master/LICENSE-MIT
// Returns a * b + c + d, puts the carry in d
ulong mac_with_carry_64(ulong a, ulong b, ulong c, ulong *d) {
    ulong lo = a * b + c;
    ulong hi = mad_hi(a, b, (ulong)(lo < c));
    a = lo;
    lo += *d;
    hi += (lo < a);
    *d = hi;
    return lo;
}

// Returns a + b, puts the carry in d
ulong add_with_carry_64(ulong a, ulong *b) {
    ulong lo = a + *b;
    *b = lo < a;
    return lo;
}

// Returns a * b + c + d, puts the carry in d
uint mac_with_carry_32(uint a, uint b, uint c, uint *d) {
  ulong res = (ulong)a * b + c + *d;
  *d = res >> 32;
  return res;
}

// Returns a + b, puts the carry in b
uint add_with_carry_32(uint a, uint *b) {
    uint lo = a + *b;
    *b = lo < a;
    return lo;
}

#define Fr_limb ulong
#define Fr_LIMBS 4
#define Fr_LIMB_BITS 64
#define Fr_INV 18446744069414584319
typedef struct { Fr_limb val[Fr_LIMBS]; } Fr;
__constant Fr Fr_ONE = { { 8589934590, 6378425256633387010, 11064306276430008309, 1739710354780652911 } };
__constant Fr Fr_P = { { 18446744069414584321, 6034159408538082302, 3691218898639771653, 8353516859464449352 } };
__constant Fr Fr_R2 = { { 14526898881837571181, 3129137299524312099, 419701826671360399, 524908885293268753 } };
__constant Fr Fr_ZERO = { { 0, 0, 0, 0 } };


// Greater than or equal
bool Fr_gte(Fr a, Fr b) {
  for(char i = Fr_LIMBS - 1; i >= 0; i--){
    if(a.val[i] > b.val[i])
      return true;
    if(a.val[i] < b.val[i])
      return false;
  }
  return true;
}

// Equals
bool Fr_eq(Fr a, Fr b) {
  for(uchar i = 0; i < Fr_LIMBS; i++)
    if(a.val[i] != b.val[i])
      return false;
  return true;
}

// Normal addition
  Fr Fr_add_(Fr a, Fr b) {
    bool carry = 0;
    for(uchar i = 0; i < Fr_LIMBS; i++) {
      Fr_limb old = a.val[i];
      a.val[i] += b.val[i] + carry;
      carry = carry ? old >= a.val[i] : old > a.val[i];
    }
    return a;
  }
  Fr Fr_sub_(Fr a, Fr b) {
    bool borrow = 0;
    for(uchar i = 0; i < Fr_LIMBS; i++) {
      Fr_limb old = a.val[i];
      a.val[i] -= b.val[i] + borrow;
      borrow = borrow ? old <= a.val[i] : old < a.val[i];
    }
    return a;
  }

// Modular subtraction
Fr Fr_sub(Fr a, Fr b) {
  Fr res = Fr_sub_(a, b);
  if(!Fr_gte(a, b)) res = Fr_add_(res, Fr_P);
  return res;
}

// Modular addition
Fr Fr_add(Fr a, Fr b) {
  Fr res = Fr_add_(a, b);
  if(Fr_gte(res, Fr_P)) res = Fr_sub_(res, Fr_P);
  return res;
}

// Modular multiplication
Fr Fr_mul(Fr a, Fr b) {
  /* CIOS Montgomery multiplication, inspired from Tolga Acar\'s thesis:
   * https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
   * Learn more:
   * https://en.wikipedia.org/wiki/Montgomery_modular_multiplication
   * https://alicebob.cryptoland.net/understanding-the-montgomery-reduction-algorithm/
   */
  Fr_limb t[Fr_LIMBS + 2] = {0};
  for(uchar i = 0; i < Fr_LIMBS; i++) {
    Fr_limb carry = 0;
    for(uchar j = 0; j < Fr_LIMBS; j++)
      t[j] = mac_with_carry_64(a.val[j], b.val[i], t[j], &carry);
    t[Fr_LIMBS] = add_with_carry_64(t[Fr_LIMBS], &carry);
    t[Fr_LIMBS + 1] = carry;

    carry = 0;
    Fr_limb m = Fr_INV * t[0];
    mac_with_carry_64(m, Fr_P.val[0], t[0], &carry);
    for(uchar j = 1; j < Fr_LIMBS; j++)
      t[j - 1] = mac_with_carry_64(m, Fr_P.val[j], t[j], &carry);

    t[Fr_LIMBS - 1] = add_with_carry_64(t[Fr_LIMBS], &carry);
    t[Fr_LIMBS] = t[Fr_LIMBS + 1] + carry;
  }

  Fr result;
  for(uchar i = 0; i < Fr_LIMBS; i++) result.val[i] = t[i];

  if(Fr_gte(result, Fr_P)) result = Fr_sub_(result, Fr_P);

  return result;
}

// Squaring is a special case of multiplication which can be done ~1.5x faster.
// https://stackoverflow.com/a/16388571/1348497
Fr Fr_sqr(Fr a) {
  return Fr_mul(a, a);
}



Fr Fr_mont(Fr a) {
  return Fr_mul(a, Fr_R2);
}

Fr Fr_unmont(Fr a) {
  Fr one = Fr_ZERO;
  one.val[0] = 1;
  return Fr_mul(a, one);
}


Fr quintic_s_box(Fr l, Fr pre_add, Fr post_add) {
    Fr tmp = Fr_add(l, pre_add);
    tmp = Fr_sqr(l);
    tmp = Fr_sqr(tmp);
    tmp = Fr_mul(tmp, l);
    tmp = Fr_add(tmp, post_add);

    return tmp;
  }

Fr scalar_product(__constant Fr* a, Fr* b, int size) {
    Fr res = Fr_ZERO;

    for (int i = 0; i < size; ++i) {
        Fr tmp = Fr_mul(a[i], b[i]);
        res = Fr_add(res, tmp);
      }

    return res;
  }

typedef struct state_2_standard {
  Fr elements[3];
  int current_round;
  int rk_offset;
} state_2_standard;

state_2_standard apply_matrix_2_standard (__constant Fr matrix[3][3], state_2_standard s) {
    Fr tmp[3];
    for (int i = 0; i < 3; ++i) {
        tmp[i] = s.elements[i];
        s.elements[i] = Fr_ZERO;
      }

    for (int j = 0; j < 3; ++j) {
        for (int i = 0; i < 3; ++i) {
            s.elements[j] = Fr_add(s.elements[j], Fr_mul(matrix[i][j], tmp[i]));
          }
      }
    return s;
  }

state_2_standard apply_sparse_matrix_2_standard (__constant Fr sm[5], state_2_standard s) {
    Fr first_elt = s.elements[0];

    s.elements[0] = scalar_product(sm + 0, s.elements, 3);

    for (int i = 1; i < 3; ++i) {
        Fr val = Fr_mul((sm + 3)[i-1], first_elt);
        s.elements[i] = Fr_add(s.elements[i], val);
      }

    return s;
  }

state_2_standard apply_round_matrix_2_standard (__constant Fr constants[373], state_2_standard s) {
    if (s.current_round == 3) {
        s = apply_matrix_2_standard((__constant Fr (*)[3])(constants + 89), s);
      } else if ((s.current_round > 3) && (s.current_round < 4 + 55)) {
        int index = s.current_round - 3 - 1;
        s = apply_sparse_matrix_2_standard(constants + 98 + (index * 5), s);
      } else {
        s = apply_matrix_2_standard((__constant Fr (*)[3])(constants + 80), s);
      }
    return s;
  }

state_2_standard add_full_round_keys_2_standard (__constant Fr constants[373], state_2_standard s) {
    for (int i = 0; i < 3; ++i) {
        s.elements[i] = Fr_add(s.elements[i], (constants + 1)[s.rk_offset + i]);
      }
    s.rk_offset += 3;
    return s;
  }

state_2_standard full_round_2_standard (__constant Fr constants[373], state_2_standard s) {
    for (int i = 0; i < 3; ++i) {
        s.elements[i] = quintic_s_box(s.elements[i], Fr_ZERO, (constants + 1)[s.rk_offset + i]);
      }
    s.rk_offset += 3;
    s = apply_round_matrix_2_standard(constants, s);
    s.current_round += 1;
    return s;
}

state_2_standard last_full_round_2_standard (__constant Fr constants[373], state_2_standard s) {
    for (int i = 0; i < 3; ++i) {
        s.elements[i] = quintic_s_box(s.elements[i], Fr_ZERO, Fr_ZERO);
      }
    s = apply_round_matrix_2_standard(constants, s);
    return s;
}

state_2_standard partial_round_2_standard (__constant Fr constants[373], state_2_standard s) {
    s.elements[0] = quintic_s_box(s.elements[0], Fr_ZERO, (constants + 1)[s.rk_offset]);
    s.rk_offset += 1;
    s = apply_round_matrix_2_standard(constants, s);
    s.current_round += 1;
    return s;
}

state_2_standard hash_2_standard (__constant Fr constants[373], state_2_standard s) {
    s = add_full_round_keys_2_standard(constants, s);

    for (int i = 0; i < 4; ++i) {
        s = full_round_2_standard(constants, s);
      }
    for (int i = 0; i < 55; ++ i) {
        s = partial_round_2_standard(constants, s);
      }
    for (int i = 0; i < (4 - 1); ++ i) {
        s = full_round_2_standard(constants, s);
      }
    s = last_full_round_2_standard(constants, s);

    return s;
  }

__kernel void hash_preimages_2_standard(__constant Fr constants[373],
                             __global Fr *preimages,
                             __global Fr *digests,
                             int batch_size
                             ) {
    int global_id = get_global_id(0);

    if (global_id < batch_size) {
        int offset = global_id * 2;


        state_2_standard s;
        s.elements[0] = constants[0];
        for (int i = 0; i < 2; ++i) {
            s.elements[i+1] = preimages[offset + i];
          }

        s.current_round = 0;
        s.rk_offset = 0;

        s = hash_2_standard(constants, s);

        digests[global_id] = s.elements[1];
      }
  }

host_poseidon_1.cpp

C/C++
Simple host for running our initial kernel
vadd.cpp use and modification with permissions form AMD-Xilinx
/*******************************************************************************
Vendor: Xilinx
Associated Filename: vadd.cpp
Purpose: VITIS vector addition

*******************************************************************************
Copyright (C) 2019 XILINX, Inc.

This file contains confidential and proprietary information of Xilinx, Inc. and
is protected under U.S. and international copyright and other intellectual
property laws.

DISCLAIMER
This disclaimer is not a license and does not grant any rights to the materials
distributed herewith. Except as otherwise provided in a valid license issued to
you by Xilinx, and to the maximum extent permitted by applicable law:
(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX
HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY,
INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR
FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether
in contract or tort, including negligence, or under any other theory of
liability) for any loss or damage of any kind or nature related to, arising under
or in connection with these materials, including for any direct, or any indirect,
special, incidental, or consequential loss or damage (including loss of data,
profits, goodwill, or any type of loss or damage suffered as a result of any
action brought by a third party) even if such damage or loss was reasonably
foreseeable or Xilinx had been advised of the possibility of the same.

CRITICAL APPLICATIONS
Xilinx products are not designed or intended to be fail-safe, or for use in any
application requiring fail-safe performance, such as life-support or safety
devices or systems, Class III medical devices, nuclear facilities, applications
related to the deployment of airbags, or any other applications that could lead
to death, personal injury, or severe property or environmental damage
(individually and collectively, "Critical Applications"). Customer assumes the
sole risk and liability of any use of Xilinx products in Critical Applications,
subject only to applicable laws and regulations governing limitations on product
liability.

THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT
ALL TIMES.

*******************************************************************************/
#define OCL_CHECK(error, call)                                                                   \
    call;                                                                                        \
    if (error != CL_SUCCESS) {                                                                   \
        printf("%s:%d Error calling " #call ", error code is: %d\n", __FILE__, __LINE__, error); \
        exit(EXIT_FAILURE);                                                                      \
    }

#include <stdlib.h>
#include <fstream>
#include <iostream>
#include "vadd.h"
#include "ap_int.h"
using namespace std;

static const int DATA_SIZE = 1;

static const std::string error_message =
    "Error: Result mismatch:\n"
    "i = %d CPU result = %d Device result = %d\n";

typedef struct Fr {
	unsigned long long val[4];
} Fr;

int main(int argc, char* argv[]) {

    //TARGET_DEVICE macro needs to be passed from gcc command line
    if(argc != 2) {
		std::cout << "Usage: " << argv[0] <<" <xclbin>" << std::endl;
		return EXIT_FAILURE;
	}

    std::string xclbinFilename = argv[1];

    // Compute the size of array in bytes
    size_t size_in_bytes = DATA_SIZE * sizeof(int);
    size_t constants_size_in_bytes = 373 * sizeof(Fr);

    // Creates a vector of DATA_SIZE elements with an initial value of 10 and 32
    // using customized allocator for getting buffer alignment to 4k boundary

    std::vector<cl::Device> devices;
    cl::Device device;
    cl_int err;
    cl::Context context;
    cl::CommandQueue q;
    cl::Kernel krnl_poseidon_hash;
    cl::Program program;
    std::vector<cl::Platform> platforms;
    bool found_device = false;

    //traversing all Platforms To find Xilinx Platform and targeted
    //Device in Xilinx Platform
    cl::Platform::get(&platforms);
    for(size_t i = 0; (i < platforms.size() ) & (found_device == false) ;i++){
        cl::Platform platform = platforms[i];
        std::string platformName = platform.getInfo<CL_PLATFORM_NAME>();
        if ( platformName == "Xilinx"){
            devices.clear();
            platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices);
	    if (devices.size()){
		    device = devices[0];
		    found_device = true;
		    break;
	    }
        }
    }
    if (found_device == false){
       std::cout << "Error: Unable to find Target Device "
           << device.getInfo<CL_DEVICE_NAME>() << std::endl;
       return EXIT_FAILURE;
    }

    // Creating Context and Command Queue for selected device
    OCL_CHECK(err, context = cl::Context(device, NULL, NULL, NULL, &err));
    OCL_CHECK(err, q = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err));

    std::cout << "INFO: Reading " << xclbinFilename << std::endl;
    FILE* fp;
    if ((fp = fopen(xclbinFilename.c_str(), "r")) == nullptr) {
        printf("ERROR: %s xclbin not available please build\n", xclbinFilename.c_str());
        exit(EXIT_FAILURE);
    }
    // Load xclbin
    std::cout << "Loading: '" << xclbinFilename << "'\n";
    std::ifstream bin_file(xclbinFilename, std::ifstream::binary);
    bin_file.seekg (0, bin_file.end);
    unsigned nb = bin_file.tellg();
    bin_file.seekg (0, bin_file.beg);
    char *buf = new char [nb];
    bin_file.read(buf, nb);

    // Creating Program from Binary File
    cl::Program::Binaries bins;
    bins.push_back({buf,nb});
    devices.resize(1);
    OCL_CHECK(err, program = cl::Program(context, devices, bins, NULL, &err));

    // This call will get the kernel object from program. A kernel is an
    // OpenCL function that is executed on the FPGA.
    OCL_CHECK(err, krnl_poseidon_hash = cl::Kernel(program,"poseidon_hash", &err));

    // These commands will allocate memory on the Device. The cl::Buffer objects can
    // be used to reference the memory locations on the device.
    OCL_CHECK(err, cl::Buffer buffer_constants(context, CL_MEM_READ_ONLY, constants_size_in_bytes, NULL, &err));
    OCL_CHECK(err, cl::Buffer buffer_preimages(context, CL_MEM_READ_ONLY, size_in_bytes, NULL, &err));
    OCL_CHECK(err, cl::Buffer buffer_digests(context, CL_MEM_WRITE_ONLY, size_in_bytes, NULL, &err));

    //set the kernel Arguments
    int narg=0;
    OCL_CHECK(err, err = krnl_poseidon_hash.setArg(narg++,buffer_constants));
    OCL_CHECK(err, err = krnl_poseidon_hash.setArg(narg++,buffer_preimages));
    OCL_CHECK(err, err = krnl_poseidon_hash.setArg(narg++,buffer_digests));
    OCL_CHECK(err, err = krnl_poseidon_hash.setArg(narg++,DATA_SIZE));

    //We then need to map our OpenCL buffers to get the pointers
    Fr *ptr_constants;
    Fr *ptr_preimages;
    Fr *ptr_digests;
    OCL_CHECK(err, ptr_constants = (Fr*)q.enqueueMapBuffer (buffer_constants , CL_TRUE , CL_MAP_WRITE , 0, constants_size_in_bytes, NULL, NULL, &err));
    OCL_CHECK(err, ptr_preimages = (Fr*)q.enqueueMapBuffer (buffer_preimages , CL_TRUE , CL_MAP_WRITE , 0, size_in_bytes, NULL, NULL, &err));
    OCL_CHECK(err, ptr_digests = (Fr*)q.enqueueMapBuffer (buffer_digests , CL_TRUE , CL_MAP_READ , 0, size_in_bytes, NULL, NULL, &err));

    cout << "setting up data" << endl;
    for (unsigned long long i = 0; i < 373; i++) {
    	ptr_constants[i] = Fr { { i, 0, 0, 0 } };
    }

    for (unsigned long long i = 0; i < DATA_SIZE; i++) {
		ptr_preimages[i] = Fr { { i, 0, 0, 0 } };
	}

    // Data will be migrated to kernel space
    OCL_CHECK(err, err = q.enqueueMigrateMemObjects({buffer_preimages}, 0/* 0 means from host*/));

    //Launch the Kernel
    OCL_CHECK(err, err = q.enqueueTask(krnl_poseidon_hash));

    // The result of the previous kernel execution will need to be retrieved in
    // order to view the results. This call will transfer the data from FPGA to
    // source_results vector
    OCL_CHECK(err, q.enqueueMigrateMemObjects({buffer_digests},CL_MIGRATE_MEM_OBJECT_HOST));

    cout << "waiting" << endl;
    OCL_CHECK(err, q.finish());
    cout << "finish" << endl;

    //Verify the result
    int match = 0;
    for (int i=0; i< (DATA_SIZE >> 1); i++) {
    	cout << " output " << ptr_digests[i].val[0] << " " << ptr_digests[i].val[1] << " " << ptr_digests[i].val[2] << " " << ptr_digests[i].val[3] << endl;
    }

    OCL_CHECK(err, err = q.enqueueUnmapMemObject(buffer_constants, ptr_constants));
    OCL_CHECK(err, err = q.enqueueUnmapMemObject(buffer_preimages, ptr_preimages));
    OCL_CHECK(err, err = q.enqueueUnmapMemObject(buffer_digests, ptr_digests));
    OCL_CHECK(err, err = q.finish());

    cout << "TEST " << (match ? "FAILED" : "PASSED") << std::endl;
    return (match ? EXIT_FAILURE :  EXIT_SUCCESS);

}

host_poseidon_2.cpp

C/C++
Final state
/*******************************************************************************
Vendor: Xilinx
Associated Filename: vadd.cpp
Purpose: VITIS vector addition

*******************************************************************************
Copyright (C) 2019 XILINX, Inc.

This file contains confidential and proprietary information of Xilinx, Inc. and
is protected under U.S. and international copyright and other intellectual
property laws.

DISCLAIMER
This disclaimer is not a license and does not grant any rights to the materials
distributed herewith. Except as otherwise provided in a valid license issued to
you by Xilinx, and to the maximum extent permitted by applicable law:
(1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX
HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY,
INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, OR
FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether
in contract or tort, including negligence, or under any other theory of
liability) for any loss or damage of any kind or nature related to, arising under
or in connection with these materials, including for any direct, or any indirect,
special, incidental, or consequential loss or damage (including loss of data,
profits, goodwill, or any type of loss or damage suffered as a result of any
action brought by a third party) even if such damage or loss was reasonably
foreseeable or Xilinx had been advised of the possibility of the same.

CRITICAL APPLICATIONS
Xilinx products are not designed or intended to be fail-safe, or for use in any
application requiring fail-safe performance, such as life-support or safety
devices or systems, Class III medical devices, nuclear facilities, applications
related to the deployment of airbags, or any other applications that could lead
to death, personal injury, or severe property or environmental damage
(individually and collectively, "Critical Applications"). Customer assumes the
sole risk and liability of any use of Xilinx products in Critical Applications,
subject only to applicable laws and regulations governing limitations on product
liability.

THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT
ALL TIMES.

*******************************************************************************/
#define OCL_CHECK(error, call)                                                                   \
    call;                                                                                        \
    if (error != CL_SUCCESS) {                                                                   \
        printf("%s:%d Error calling " #call ", error code is: %d\n", __FILE__, __LINE__, error); \
        exit(EXIT_FAILURE);                                                                      \
    }

#include <stdlib.h>
#include <fstream>
#include <iostream>
#include "vadd.h"
#include "ap_int.h"
using namespace std;

static const int DATA_SIZE = 2;

static const std::string error_message =
    "Error: Result mismatch:\n"
    "i = %d CPU result = %d Device result = %d\n";

typedef struct Fr {
	ap_uint<64> val[4];
} Fr;

int main(int argc, char* argv[]) {

    //TARGET_DEVICE macro needs to be passed from gcc command line
    if(argc != 2) {
		std::cout << "Usage: " << argv[0] <<" <xclbin>" << std::endl;
		return EXIT_FAILURE;
	}

    std::string xclbinFilename = argv[1];

    // Compute the size of array in bytes
    size_t size_in_bytes = DATA_SIZE * sizeof(Fr);
    size_t constants_size_in_bytes = 373 * sizeof(Fr);

    // Creates a vector of DATA_SIZE elements with an initial value of 10 and 32
    // using customized allocator for getting buffer alignment to 4k boundary

    std::vector<cl::Device> devices;
    cl::Device device;
    cl_int err;
    cl::Context context;
    cl::CommandQueue q;
    cl::Kernel krnl_poseidon_hash;
    cl::Program program;
    std::vector<cl::Platform> platforms;
    bool found_device = false;

    //traversing all Platforms To find Xilinx Platform and targeted
    //Device in Xilinx Platform
    cl::Platform::get(&platforms);
    for(size_t i = 0; (i < platforms.size() ) & (found_device == false) ;i++){
        cl::Platform platform = platforms[i];
        std::string platformName = platform.getInfo<CL_PLATFORM_NAME>();
        if ( platformName == "Xilinx"){
            devices.clear();
            platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices);
	    if (devices.size()){
		    device = devices[0];
		    found_device = true;
		    break;
	    }
        }
    }
    if (found_device == false){
       std::cout << "Error: Unable to find Target Device "
           << device.getInfo<CL_DEVICE_NAME>() << std::endl;
       return EXIT_FAILURE;
    }

    // Creating Context and Command Queue for selected device
    OCL_CHECK(err, context = cl::Context(device, NULL, NULL, NULL, &err));
    OCL_CHECK(err, q = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err));

    std::cout << "INFO: Reading " << xclbinFilename << std::endl;
    FILE* fp;
    if ((fp = fopen(xclbinFilename.c_str(), "r")) == nullptr) {
        printf("ERROR: %s xclbin not available please build\n", xclbinFilename.c_str());
        exit(EXIT_FAILURE);
    }
    // Load xclbin
    std::cout << "Loading: '" << xclbinFilename << "'\n";
    std::ifstream bin_file(xclbinFilename, std::ifstream::binary);
    bin_file.seekg (0, bin_file.end);
    unsigned nb = bin_file.tellg();
    bin_file.seekg (0, bin_file.beg);
    char *buf = new char [nb];
    bin_file.read(buf, nb);

    // Creating Program from Binary File
    cl::Program::Binaries bins;
    bins.push_back({buf,nb});
    devices.resize(1);
    OCL_CHECK(err, program = cl::Program(context, devices, bins, NULL, &err));

    // This call will get the kernel object from program. A kernel is an
    // OpenCL function that is executed on the FPGA.
    OCL_CHECK(err, krnl_poseidon_hash = cl::Kernel(program,"poseidon_hash", &err));

    // These commands will allocate memory on the Device. The cl::Buffer objects can
    // be used to reference the memory locations on the device.
    OCL_CHECK(err, cl::Buffer buffer_constants(context, CL_MEM_READ_ONLY, constants_size_in_bytes, NULL, &err));
    OCL_CHECK(err, cl::Buffer buffer_preimages(context, CL_MEM_READ_ONLY, size_in_bytes, NULL, &err));
    OCL_CHECK(err, cl::Buffer buffer_digests(context, CL_MEM_WRITE_ONLY, size_in_bytes, NULL, &err));

    //set the kernel Arguments
    int narg=0;
    OCL_CHECK(err, err = krnl_poseidon_hash.setArg(narg++,buffer_constants));
    OCL_CHECK(err, err = krnl_poseidon_hash.setArg(narg++,buffer_preimages));
    OCL_CHECK(err, err = krnl_poseidon_hash.setArg(narg++,buffer_digests));
    OCL_CHECK(err, err = krnl_poseidon_hash.setArg(narg++,DATA_SIZE));

    //We then need to map our OpenCL buffers to get the pointers
    Fr *ptr_constants;
    Fr *ptr_preimages;
    Fr *ptr_digests;
    OCL_CHECK(err, ptr_constants = (Fr*)q.enqueueMapBuffer (buffer_constants , CL_TRUE , CL_MAP_WRITE , 0, constants_size_in_bytes, NULL, NULL, &err));
    OCL_CHECK(err, ptr_preimages = (Fr*)q.enqueueMapBuffer (buffer_preimages , CL_TRUE , CL_MAP_WRITE , 0, size_in_bytes, NULL, NULL, &err));
    OCL_CHECK(err, ptr_digests = (Fr*)q.enqueueMapBuffer (buffer_digests , CL_TRUE , CL_MAP_READ , 0, size_in_bytes, NULL, NULL, &err));

    cout << "setting up data" << endl;
//    for (unsigned long long i = 0; i < 373; i++) {
//    	ptr_constants[i] = Fr { { i, 0, 0, 0 } };
//    }
//
//    for (unsigned long long i = 0; i < DATA_SIZE; i++) {
//		ptr_preimages[i] = Fr { { i, 0, 0, 0 } };
//	}

    ptr_constants[0] = Fr { {25769803770LLU, 688531696190609414LLU, 14746174755580473312LLU, 5219131064341958734LLU} };
    ptr_constants[1] = Fr { {2586023662232715565LLU, 126591111841514091LLU, 6369287046331605992LLU, 3032985663142005057LLU} };
    ptr_constants[2] = Fr { {15939922731401505517LLU, 14018416911069663106LLU, 15851334265683226510LLU, 6921981741405656779LLU} };
    ptr_constants[3] = Fr { {4585642813721984958LLU, 5323118516173059455LLU, 4461769743650524738LLU, 7575385862932132712LLU} };
    ptr_constants[4] = Fr { {17963382907232313279LLU, 5803302119610209305LLU, 11481458567430971976LLU, 4978652746447710604LLU} };
    ptr_constants[5] = Fr { {7155177870813441893LLU, 17799960673883283912LLU, 1413862116665356678LLU, 2964591783541484596LLU} };
    ptr_constants[6] = Fr { {208250222812694759LLU, 17671111334957518850LLU, 15107799744777742273LLU, 2411363865468797748LLU} };
    ptr_constants[7] = Fr { {3699605827063000195LLU, 15143434007785742954LLU, 1430444016349108299LLU, 6413264134765336873LLU} };
    ptr_constants[8] = Fr { {13413861131981017654LLU, 16023671670735879404LLU, 2506955124830805409LLU, 5297733040175138010LLU} };
    ptr_constants[9] = Fr { {3944133672794305685LLU, 16782975202730688858LLU, 4326043043940438513LLU, 5530754134002307092LLU} };
    ptr_constants[10] = Fr { {10008212841043037212LLU, 13226836244885882447LLU, 15081894151418494694LLU, 246810350752849859LLU} };
    ptr_constants[11] = Fr { {6592150231562492861LLU, 1184687213179370713LLU, 3904341924855161811LLU, 8260341784349969691LLU} };
    ptr_constants[12] = Fr { {3492317771337795360LLU, 13682852126930028150LLU, 2390707349531266259LLU, 6303926013987945341LLU} };
    ptr_constants[13] = Fr { {13242295585067045241LLU, 10694772051155260711LLU, 13907678153599378018LLU, 2067223775050241116LLU} };
    ptr_constants[14] = Fr { {4736614751973423784LLU, 3146864846419305561LLU, 12962872407393842370LLU, 2853354720971854420LLU} };
    ptr_constants[15] = Fr { {8221635392713389328LLU, 3235439860055846871LLU, 6181623046722268636LLU, 6092380918846021059LLU} };
    ptr_constants[16] = Fr { {3634597003290824492LLU, 6182718203042363867LLU, 13573268484705666369LLU, 672629558234364175LLU} };
    ptr_constants[17] = Fr { {12954042299314509153LLU, 10609991639192850706LLU, 201636740964291430LLU, 5257318070966016693LLU} };
    ptr_constants[18] = Fr { {3607307102771003134LLU, 8123493550591763659LLU, 2483355979240104936LLU, 26543454325358403LLU} };
    ptr_constants[19] = Fr { {17407243835371099227LLU, 7914524619643845120LLU, 2026421188253544829LLU, 4645573052014533104LLU} };
    ptr_constants[20] = Fr { {10905051186200947186LLU, 13034855056805570875LLU, 15025428639544032332LLU, 2379373986875198622LLU} };
    ptr_constants[21] = Fr { {16374078923510027482LLU, 16625637350075691429LLU, 8840865823795658299LLU, 7596954407141636507LLU} };
    ptr_constants[22] = Fr { {7560161004420835531LLU, 2320465419212359591LLU, 8850487117878972276LLU, 548972412242391481LLU} };
    ptr_constants[23] = Fr { {17169289913802623133LLU, 3104137213883512524LLU, 6615536567816292788LLU, 3091170916891025583LLU} };
    ptr_constants[24] = Fr { {7770055005966267362LLU, 16442013717932343913LLU, 14346157083284878874LLU, 7827817224612190811LLU} };
    ptr_constants[25] = Fr { {4950794043579950393LLU, 14751920884072505098LLU, 9185747154714522275LLU, 3608425615020387546LLU} };
    ptr_constants[26] = Fr { {682733802284196218LLU, 15839058016019093574LLU, 11896768969805969921LLU, 1937141463542617088LLU} };
    ptr_constants[27] = Fr { {2578118347129170081LLU, 14308906962666163196LLU, 3586377254669010187LLU, 3318439509696165393LLU} };
    ptr_constants[28] = Fr { {9369000636091679149LLU, 9213093884873253002LLU, 18239457677131419242LLU, 1737110669796959020LLU} };
    ptr_constants[29] = Fr { {10321613957974478569LLU, 15972222581114302589LLU, 1273913180148121691LLU, 6949497498945192044LLU} };
    ptr_constants[30] = Fr { {10631938225314845352LLU, 13862514146566717823LLU, 9961564955452188460LLU, 189846938442921629LLU} };
    ptr_constants[31] = Fr { {882173459226178274LLU, 18243327384941006853LLU, 7156091006005410108LLU, 812935473286697320LLU} };
    ptr_constants[32] = Fr { {3767226246313989274LLU, 14854561598663591154LLU, 2791455959834036249LLU, 5061478504363552745LLU} };
    ptr_constants[33] = Fr { {17905996394844047950LLU, 3804657535350520289LLU, 9996111128949525004LLU, 640448800051662566LLU} };
    ptr_constants[34] = Fr { {15801905283343756948LLU, 11458894984531813966LLU, 1185329425353846608LLU, 6849945669539751534LLU} };
    ptr_constants[35] = Fr { {10440970963559122306LLU, 6304950648133973057LLU, 14190690176240498685LLU, 7211646450206765716LLU} };
    ptr_constants[36] = Fr { {8153662412903910728LLU, 3849753749750574375LLU, 13619719243221061496LLU, 1727360785179089545LLU} };
    ptr_constants[37] = Fr { {11802283595798676662LLU, 15877182625722471532LLU, 5893663141783290713LLU, 1973739506096651482LLU} };
    ptr_constants[38] = Fr { {2896395492486222476LLU, 1579136774116668461LLU, 14988267849130453543LLU, 1207162455283784341LLU} };
    ptr_constants[39] = Fr { {5858567512617897914LLU, 16702298852869798673LLU, 18086491245483111914LLU, 1835967552976776564LLU} };
    ptr_constants[40] = Fr { {367546583965170230LLU, 13864960834340632506LLU, 6473609201744914391LLU, 4096924317875829376LLU} };
    ptr_constants[41] = Fr { {3022589841921182472LLU, 11747497961539745766LLU, 15102583619336371534LLU, 3767381644631202616LLU} };
    ptr_constants[42] = Fr { {2020612466201788643LLU, 2207916099732117586LLU, 6933927741446181921LLU, 2639452435676446991LLU} };
    ptr_constants[43] = Fr { {12184074441013543751LLU, 14642815442589331378LLU, 15027490055412230488LLU, 5562069268898017984LLU} };
    ptr_constants[44] = Fr { {6853268535991998503LLU, 16624719592920798372LLU, 5096482106577541825LLU, 995111367083720533LLU} };
    ptr_constants[45] = Fr { {11614519714028230373LLU, 9628791076683484098LLU, 9369683891373173341LLU, 6195433621154226341LLU} };
    ptr_constants[46] = Fr { {17034116612519175649LLU, 4395054172173580294LLU, 14976388527937961366LLU, 8297599935692179342LLU} };
    ptr_constants[47] = Fr { {2970114231186917341LLU, 10945500202704704502LLU, 17270676407810754402LLU, 6800714971717725352LLU} };
    ptr_constants[48] = Fr { {1454941712080899747LLU, 6765959499519142518LLU, 4597607800312462629LLU, 6656153464814859124LLU} };
    ptr_constants[49] = Fr { {1791780021583302101LLU, 13471178248603658805LLU, 6869233773105347483LLU, 2405941910180559341LLU} };
    ptr_constants[50] = Fr { {1049886615243269300LLU, 13662294714970837156LLU, 3225023249011239856LLU, 7501030446940545039LLU} };
    ptr_constants[51] = Fr { {9359540500611954968LLU, 16627402304234130732LLU, 6584271112364869162LLU, 5753181833378558063LLU} };
    ptr_constants[52] = Fr { {6794122384950235448LLU, 16616232668407505919LLU, 2760864994263303809LLU, 880108536078215629LLU} };
    ptr_constants[53] = Fr { {15482707768110645933LLU, 7512493592498433444LLU, 11535909220505797387LLU, 5222596014395836087LLU} };
    ptr_constants[54] = Fr { {10587188199608544917LLU, 12195974647585135647LLU, 14419246817519386761LLU, 3961773367104456511LLU} };
    ptr_constants[55] = Fr { {16548566198621264367LLU, 16894607989430012366LLU, 16012593624576391827LLU, 4005453916330705405LLU} };
    ptr_constants[56] = Fr { {11552290444725337290LLU, 261797823107738176LLU, 15429364035226389660LLU, 585544549019304514LLU} };
    ptr_constants[57] = Fr { {14331672499974237759LLU, 3661660608184749438LLU, 2196837238210877009LLU, 1264094110818142873LLU} };
    ptr_constants[58] = Fr { {5037052949702467998LLU, 9371321783565426544LLU, 17720049796321834492LLU, 6703818633041967871LLU} };
    ptr_constants[59] = Fr { {4341263504933240683LLU, 3221629473216893431LLU, 16879177146712392073LLU, 2396505226577201645LLU} };
    ptr_constants[60] = Fr { {13568864730592774568LLU, 17724296277731105619LLU, 4652891693521415910LLU, 5416049515195195789LLU} };
    ptr_constants[61] = Fr { {1536015705216405868LLU, 12499360979232780098LLU, 4068924441486065620LLU, 5580305707592735451LLU} };
    ptr_constants[62] = Fr { {10831921108950549469LLU, 12828363099511338220LLU, 3143757012132858336LLU, 5628379662527456388LLU} };
    ptr_constants[63] = Fr { {7567012952952788412LLU, 1947256301820352571LLU, 3640834791786937366LLU, 3961060401143741593LLU} };
    ptr_constants[64] = Fr { {8892986686233728172LLU, 7865856845416634015LLU, 14824780127058582090LLU, 5788833212461065026LLU} };
    ptr_constants[65] = Fr { {6603931043796695255LLU, 5841541644334431074LLU, 4499520952586616461LLU, 8338280636871056119LLU} };
    ptr_constants[66] = Fr { {5154529345041682466LLU, 11081065483194487687LLU, 3293607820063172964LLU, 2307368717828348996LLU} };
    ptr_constants[67] = Fr { {6149412688651073221LLU, 2650201178210956767LLU, 1576450932159453887LLU, 2778378512071005033LLU} };
    ptr_constants[68] = Fr { {16965812709237780898LLU, 17640372597704443681LLU, 5888248453526969918LLU, 5632833661428707592LLU} };
    ptr_constants[69] = Fr { {16269206676778892247LLU, 11098038493928308442LLU, 576531171083701025LLU, 4716925109100650206LLU} };
    ptr_constants[70] = Fr { {15054542068374606946LLU, 2650140229465504137LLU, 17644282553328508127LLU, 2585392825141583938LLU} };
    ptr_constants[71] = Fr { {1304463137595614485LLU, 4002070013314588719LLU, 16343210170261468958LLU, 6252414693715233546LLU} };
    ptr_constants[72] = Fr { {17218344587045271006LLU, 8464917039951509833LLU, 764738769263865322LLU, 2061325019254342712LLU} };
    ptr_constants[73] = Fr { {11835405516907955170LLU, 4760729741177567574LLU, 12080340871747391462LLU, 4244137769735140953LLU} };
    ptr_constants[74] = Fr { {17736598162767056929LLU, 8557486869981282466LLU, 12556636597531381382LLU, 5638360705109771822LLU} };
    ptr_constants[75] = Fr { {16162087626635637654LLU, 8729552434491626049LLU, 8181460754158096976LLU, 3603582612649172474LLU} };
    ptr_constants[76] = Fr { {8468555304702485984LLU, 1814147893891890394LLU, 5158143895997157749LLU, 2614479816430627012LLU} };
    ptr_constants[77] = Fr { {13138267171137705122LLU, 4897355266398925588LLU, 5566416655140726367LLU, 2781154572149227617LLU} };
    ptr_constants[78] = Fr { {18354814944506672398LLU, 16048376450139999343LLU, 14827503981030984648LLU, 7601009096118037144LLU} };
    ptr_constants[79] = Fr { {12581623813096842725LLU, 18210493991648847051LLU, 12917470515121929890LLU, 6526444561623760646LLU} };
    ptr_constants[80] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[81] = Fr { {0LLU, 0LLU, 0LLU, 4611686018427387904LLU} };
    ptr_constants[82] = Fr { {1717986918LLU, 1275685051326677402LLU, 5902210070027911985LLU, 347942070956130582LLU} };
    ptr_constants[83] = Fr { {0LLU, 0LLU, 0LLU, 4611686018427387904LLU} };
    ptr_constants[84] = Fr { {1717986918LLU, 1275685051326677402LLU, 5902210070027911985LLU, 347942070956130582LLU} };
    ptr_constants[85] = Fr { {18446744072277895851LLU, 2011386469512694100LLU, 1230406299546590551LLU, 5858962965439741720LLU} };
    ptr_constants[86] = Fr { {1717986918LLU, 1275685051326677402LLU, 5902210070027911985LLU, 347942070956130582LLU} };
    ptr_constants[87] = Fr { {18446744072277895851LLU, 2011386469512694100LLU, 1230406299546590551LLU, 5858962965439741720LLU} };
    ptr_constants[88] = Fr { {15811494917868205788LLU, 16624336857528583898LLU, 17920762862402342329LLU, 7408687358795335574LLU} };
    ptr_constants[89] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[90] = Fr { {16694109684948860110LLU, 17444007090331702679LLU, 7930196697102882324LLU, 5867341378212175538LLU} };
    ptr_constants[91] = Fr { {4663336344427289798LLU, 11380393292576378109LLU, 12978865824822796211LLU, 3124811967992520802LLU} };
    ptr_constants[92] = Fr { {0LLU, 0LLU, 0LLU, 4611686018427387904LLU} };
    ptr_constants[93] = Fr { {11880908025287336503LLU, 15859759903322814680LLU, 1227245850215411483LLU, 8178282100535476905LLU} };
    ptr_constants[94] = Fr { {7266316941852012378LLU, 10565516968623163312LLU, 10450080668881795661LLU, 7298141990289750207LLU} };
    ptr_constants[95] = Fr { {1717986918LLU, 1275685051326677402LLU, 5902210070027911985LLU, 347942070956130582LLU} };
    ptr_constants[96] = Fr { {7266316941852012378LLU, 10565516968623163312LLU, 10450080668881795661LLU, 7298141990289750207LLU} };
    ptr_constants[97] = Fr { {7281400039008680969LLU, 16512712590601965578LLU, 3651767361745292527LLU, 1379967604711560024LLU} };
    ptr_constants[98] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[99] = Fr { {9714325598640845LLU, 9588354716784383069LLU, 18179323589894775430LLU, 5699939055348191667LLU} };
    ptr_constants[100] = Fr { {14713856794391328083LLU, 14932931357726034187LLU, 13965172956443630316LLU, 3052945481822905272LLU} };
    ptr_constants[101] = Fr { {14944007128586298941LLU, 5264590774638420798LLU, 4125819926426475624LLU, 7295642404933108796LLU} };
    ptr_constants[102] = Fr { {12134222085601227298LLU, 10077574737094507129LLU, 15324526840460519616LLU, 6400836920638644229LLU} };
    ptr_constants[103] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[104] = Fr { {14752081712895153864LLU, 3165234369794068713LLU, 17400561878142601607LLU, 2484163743986567480LLU} };
    ptr_constants[105] = Fr { {9130929669464692807LLU, 1613390081406542218LLU, 5026025118223136742LLU, 6358456375356809693LLU} };
    ptr_constants[106] = Fr { {12607436636129346308LLU, 17617158922923327577LLU, 11125531731695496053LLU, 2453946991702092464LLU} };
    ptr_constants[107] = Fr { {11816188975588965221LLU, 14618274942397868612LLU, 10658557365204858829LLU, 5744347229805454540LLU} };
    ptr_constants[108] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[109] = Fr { {5087129422277972259LLU, 12763070724620869232LLU, 4564176169224671443LLU, 3505729411898486627LLU} };
    ptr_constants[110] = Fr { {2445474230953005551LLU, 4949057177830667505LLU, 12403151595440125967LLU, 5300243372603233751LLU} };
    ptr_constants[111] = Fr { {9299838118977267282LLU, 10757491228315820644LLU, 11657083318480169481LLU, 3921091317960530509LLU} };
    ptr_constants[112] = Fr { {16523279484935503285LLU, 15386492123599924846LLU, 13656470921393806721LLU, 5006262252981248564LLU} };
    ptr_constants[113] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[114] = Fr { {17412183119733210048LLU, 16278702331222794404LLU, 11342738154889738148LLU, 2419871463760014552LLU} };
    ptr_constants[115] = Fr { {10420580398962391355LLU, 3774305409865100669LLU, 17115667109647720733LLU, 7507156875293017590LLU} };
    ptr_constants[116] = Fr { {11863088622599852888LLU, 10084182500907397000LLU, 12636596082726259483LLU, 5329072204340165930LLU} };
    ptr_constants[117] = Fr { {15737880670422355636LLU, 13274482797209833195LLU, 8289797411624857308LLU, 3766034287411864977LLU} };
    ptr_constants[118] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[119] = Fr { {6448982959014533570LLU, 14257617645101917143LLU, 4015663230225164252LLU, 7582628906925953722LLU} };
    ptr_constants[120] = Fr { {7025934014619871096LLU, 1750024473990640575LLU, 3809125191148406860LLU, 4260268512632869008LLU} };
    ptr_constants[121] = Fr { {10992632082589485604LLU, 13632919001077939886LLU, 17183602707682000356LLU, 1295459235449235214LLU} };
    ptr_constants[122] = Fr { {17404536287215622933LLU, 7596369904827055614LLU, 10921457218542648604LLU, 5359331565108565269LLU} };
    ptr_constants[123] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[124] = Fr { {17833072322903507216LLU, 4798205563375019432LLU, 9554304892482255194LLU, 1112768285562075665LLU} };
    ptr_constants[125] = Fr { {7788241851610358742LLU, 9366263255284141064LLU, 9821829462747097141LLU, 1474594754889285509LLU} };
    ptr_constants[126] = Fr { {2374864798834289281LLU, 10156526923448625974LLU, 13911272925848120768LLU, 1549337670759712758LLU} };
    ptr_constants[127] = Fr { {11455071333126002339LLU, 6959605017335873597LLU, 14095729915794098085LLU, 901440092105086367LLU} };
    ptr_constants[128] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[129] = Fr { {1175305954718212588LLU, 7135398340165047006LLU, 4902214527071147500LLU, 7708034061129818823LLU} };
    ptr_constants[130] = Fr { {13747364925508041768LLU, 4403678705227573343LLU, 6773939819211919770LLU, 5965129014125591061LLU} };
    ptr_constants[131] = Fr { {14159043837292826868LLU, 11168293315485570956LLU, 5075745339661317508LLU, 6043192800035665823LLU} };
    ptr_constants[132] = Fr { {8326382632480074414LLU, 6954372599948057141LLU, 10968464482140757783LLU, 2044194664515477564LLU} };
    ptr_constants[133] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[134] = Fr { {681614271730023290LLU, 6275603284088407749LLU, 4691727701082505177LLU, 1421992733318302361LLU} };
    ptr_constants[135] = Fr { {13579206596006309440LLU, 17260515951619890847LLU, 15225758847843379709LLU, 7705844870896829999LLU} };
    ptr_constants[136] = Fr { {6877543057389938822LLU, 5423140637502146415LLU, 995256503145025136LLU, 6919509419942510153LLU} };
    ptr_constants[137] = Fr { {10292932700701563771LLU, 11609037333034643145LLU, 4905599838762827296LLU, 6236601661675414440LLU} };
    ptr_constants[138] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[139] = Fr { {17771810680912689119LLU, 1034464913263051107LLU, 12207804893192491018LLU, 3239409397039355342LLU} };
    ptr_constants[140] = Fr { {15668942992724396263LLU, 8884906613117008987LLU, 5680866228428638471LLU, 4520125907381911575LLU} };
    ptr_constants[141] = Fr { {17225830907687990106LLU, 6940043757051427055LLU, 8603438931958788187LLU, 2653481513530389911LLU} };
    ptr_constants[142] = Fr { {5836866015044259610LLU, 5039187548467424538LLU, 16299537293923712950LLU, 1577404521775015874LLU} };
    ptr_constants[143] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[144] = Fr { {16004116140518868000LLU, 14588956074832287693LLU, 5602168313944870275LLU, 2236587883251301266LLU} };
    ptr_constants[145] = Fr { {12227719948720320296LLU, 11654354851976665769LLU, 6623955755892704979LLU, 3771246199919161867LLU} };
    ptr_constants[146] = Fr { {11786548852893431779LLU, 14610804851112111406LLU, 18320326564938507359LLU, 4361936251821211350LLU} };
    ptr_constants[147] = Fr { {11734801714793692632LLU, 6936237698591154433LLU, 13402358895402970338LLU, 7345158835211106106LLU} };
    ptr_constants[148] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[149] = Fr { {8313233898222421099LLU, 5877352453322092374LLU, 17842794276771702675LLU, 1632759733934417187LLU} };
    ptr_constants[150] = Fr { {8367043555085003775LLU, 8911297017864296829LLU, 7677789404983894550LLU, 2104873655691923242LLU} };
    ptr_constants[151] = Fr { {7758570423781724359LLU, 6896263818014811416LLU, 7407594187306503052LLU, 2835204144750651816LLU} };
    ptr_constants[152] = Fr { {2379427580682897403LLU, 2119546924331960777LLU, 5844280793505803763LLU, 7733042190190477092LLU} };
    ptr_constants[153] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[154] = Fr { {6746502851419744858LLU, 3833091957206553936LLU, 7923582810418845561LLU, 4854122652467761987LLU} };
    ptr_constants[155] = Fr { {4776871877668111354LLU, 17838457572487932295LLU, 909618368324630784LLU, 6340727357051464071LLU} };
    ptr_constants[156] = Fr { {11419168223717834892LLU, 18380003538035881384LLU, 10395509137975509006LLU, 5769145283353350760LLU} };
    ptr_constants[157] = Fr { {3333630161447622452LLU, 8391238557458201750LLU, 1716049776707309259LLU, 64030297122550763LLU} };
    ptr_constants[158] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[159] = Fr { {6449686167666858547LLU, 7854271818706768960LLU, 629265634528919635LLU, 913810175406869829LLU} };
    ptr_constants[160] = Fr { {2685243795422041041LLU, 17410122772545312368LLU, 5578743416775395194LLU, 123692567473036274LLU} };
    ptr_constants[161] = Fr { {8775629508398307662LLU, 16863096769079521239LLU, 9028073891560092955LLU, 5870731753204655683LLU} };
    ptr_constants[162] = Fr { {10022719350892127464LLU, 9154043859474079475LLU, 16854999098052118264LLU, 7521553133559839297LLU} };
    ptr_constants[163] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[164] = Fr { {3582152270181858785LLU, 8483838903254450565LLU, 3885578185325508060LLU, 6607740388582957848LLU} };
    ptr_constants[165] = Fr { {1019345757948476162LLU, 17387069240641177212LLU, 3010441005981938293LLU, 1960011389711103586LLU} };
    ptr_constants[166] = Fr { {9797825691342668999LLU, 17420724785892189050LLU, 15150735285456259351LLU, 3477119675532079654LLU} };
    ptr_constants[167] = Fr { {313549273220518421LLU, 4417336661248365898LLU, 12369370307212091922LLU, 2649889586410310717LLU} };
    ptr_constants[168] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[169] = Fr { {13184150795594823222LLU, 1094926958030601911LLU, 5460367139289304538LLU, 2762018890597035414LLU} };
    ptr_constants[170] = Fr { {7769977276524326834LLU, 9751217146033106441LLU, 2922378298253527721LLU, 1580184950424089900LLU} };
    ptr_constants[171] = Fr { {662822318452541098LLU, 1547430706114993016LLU, 7801115309921448373LLU, 2576593573893492339LLU} };
    ptr_constants[172] = Fr { {13719381595881988470LLU, 11847102890032183888LLU, 1850146808169976954LLU, 4405178789377168158LLU} };
    ptr_constants[173] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[174] = Fr { {18074330158853697159LLU, 10171201472169255034LLU, 350600261918052973LLU, 6384779176166388301LLU} };
    ptr_constants[175] = Fr { {7260228949597756036LLU, 11525440000502360766LLU, 7652230823660942000LLU, 1083863706086968714LLU} };
    ptr_constants[176] = Fr { {5249493002154392820LLU, 10126742590447000050LLU, 4893075529162352610LLU, 6496241661632471618LLU} };
    ptr_constants[177] = Fr { {16124286385275500656LLU, 18130607304516464455LLU, 15845933217170587843LLU, 981257961830428612LLU} };
    ptr_constants[178] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[179] = Fr { {14663167693724810409LLU, 9632931188899161911LLU, 14382188379827376779LLU, 5912808777962145433LLU} };
    ptr_constants[180] = Fr { {13272935961198759575LLU, 11694844132304184172LLU, 6597657215183259020LLU, 622287759316504341LLU} };
    ptr_constants[181] = Fr { {12225561777227173693LLU, 14682435111717544408LLU, 6309521798516334625LLU, 2604888271043207084LLU} };
    ptr_constants[182] = Fr { {9447586267949797022LLU, 4247500515500976506LLU, 10711500236937337822LLU, 2437516606685183799LLU} };
    ptr_constants[183] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[184] = Fr { {11908595691063436262LLU, 4243577635590204760LLU, 8403951486948860068LLU, 4349232563948811239LLU} };
    ptr_constants[185] = Fr { {6975244144314966017LLU, 7811932934803638973LLU, 8258730134746095976LLU, 1273259639549964129LLU} };
    ptr_constants[186] = Fr { {13693818794859018574LLU, 16180737229064465873LLU, 15250831790511016854LLU, 7122502540675373382LLU} };
    ptr_constants[187] = Fr { {16337951150807180150LLU, 4820817432726876389LLU, 16603157523572970206LLU, 399513089877234964LLU} };
    ptr_constants[188] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[189] = Fr { {8463391580918902984LLU, 1714000470873244823LLU, 11912118828137540429LLU, 3031210386589794451LLU} };
    ptr_constants[190] = Fr { {10008560469715901266LLU, 17536386607960796877LLU, 34672585950623924LLU, 8066923636325277146LLU} };
    ptr_constants[191] = Fr { {11595077925902070653LLU, 4258576810279155572LLU, 3467680922131005641LLU, 3599797543931608915LLU} };
    ptr_constants[192] = Fr { {5529889422921033428LLU, 11221576295444147093LLU, 15025804565535151236LLU, 8342597497262291930LLU} };
    ptr_constants[193] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[194] = Fr { {3975663197499411501LLU, 8609891298932888009LLU, 12964086454359706822LLU, 836927102110245167LLU} };
    ptr_constants[195] = Fr { {18212646341940067142LLU, 2286082192509654898LLU, 7174031652285426370LLU, 6431057598838738297LLU} };
    ptr_constants[196] = Fr { {3500043592247466625LLU, 5766000312052779267LLU, 7284312922049115002LLU, 7035825791380063105LLU} };
    ptr_constants[197] = Fr { {10030183029599763333LLU, 2805739685774462444LLU, 10586552382973325853LLU, 1460870710683348671LLU} };
    ptr_constants[198] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[199] = Fr { {12439054263934686425LLU, 17262664511408366864LLU, 4649461961952482256LLU, 1796129477526135373LLU} };
    ptr_constants[200] = Fr { {16440662917572586892LLU, 16333531411994737938LLU, 17241318036913873593LLU, 6030541828438461422LLU} };
    ptr_constants[201] = Fr { {17855680848693645096LLU, 588222541745943205LLU, 13813688480305570743LLU, 7364871225960911789LLU} };
    ptr_constants[202] = Fr { {3262793366115211679LLU, 3581631440293010358LLU, 5724121911613052193LLU, 1633745211162376946LLU} };
    ptr_constants[203] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[204] = Fr { {17525750720949407205LLU, 6364451256295555615LLU, 1836042041866838055LLU, 2478118448173563892LLU} };
    ptr_constants[205] = Fr { {12766797493414167523LLU, 506459117937366685LLU, 11319698178540271343LLU, 2553113698322972895LLU} };
    ptr_constants[206] = Fr { {1632008284964266813LLU, 12799865708288791990LLU, 3975486484769115399LLU, 5228853771524389222LLU} };
    ptr_constants[207] = Fr { {5563257171071027772LLU, 13270078409348291239LLU, 2840602910224925532LLU, 1159128648292626522LLU} };
    ptr_constants[208] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[209] = Fr { {11781864417518927657LLU, 2788221689447245088LLU, 14059803314767454580LLU, 1199593201337356572LLU} };
    ptr_constants[210] = Fr { {4744786663666354034LLU, 17006290501971951972LLU, 16066990836633113278LLU, 6943441285013112250LLU} };
    ptr_constants[211] = Fr { {10921201263192957273LLU, 12548310600496354077LLU, 15603972334910430623LLU, 4434763397389194113LLU} };
    ptr_constants[212] = Fr { {7754654649346698087LLU, 5469570104777099504LLU, 17667075858710231290LLU, 4332262717671734080LLU} };
    ptr_constants[213] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[214] = Fr { {8066302411480075744LLU, 7840027377685721682LLU, 3769093733591644354LLU, 5852367846150696344LLU} };
    ptr_constants[215] = Fr { {3080678924476578907LLU, 2212933361394550882LLU, 13863515397797537910LLU, 8153116433349902305LLU} };
    ptr_constants[216] = Fr { {5297357625024239652LLU, 13972797088274686856LLU, 17303126000034670398LLU, 5431476851493074421LLU} };
    ptr_constants[217] = Fr { {11208843846932749903LLU, 12043217065587526262LLU, 16777866640768647424LLU, 1713071071721686801LLU} };
    ptr_constants[218] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[219] = Fr { {2741598439261926750LLU, 15475915551426417492LLU, 16837786546331264262LLU, 1693974622175344717LLU} };
    ptr_constants[220] = Fr { {13203893720423071423LLU, 10645128500353874422LLU, 11393738849382738020LLU, 6117990731248649411LLU} };
    ptr_constants[221] = Fr { {1619714563944193081LLU, 6702857135521901265LLU, 4281236571374791698LLU, 8114177466294364837LLU} };
    ptr_constants[222] = Fr { {15083093025611679889LLU, 4764307283694170783LLU, 4229080239030928622LLU, 1132704314797640411LLU} };
    ptr_constants[223] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[224] = Fr { {18121255368918026871LLU, 4857895665407752411LLU, 13629224772634825388LLU, 2193811732256288780LLU} };
    ptr_constants[225] = Fr { {7613701908814699655LLU, 13105982223062212577LLU, 13307137890259347980LLU, 6327552502114166491LLU} };
    ptr_constants[226] = Fr { {1791162257257255056LLU, 7552011898859756668LLU, 7680560103104691821LLU, 3070576343671578922LLU} };
    ptr_constants[227] = Fr { {11258241512032192676LLU, 7155797239585492259LLU, 965757846338217730LLU, 1562085516145157685LLU} };
    ptr_constants[228] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[229] = Fr { {13501681960366340620LLU, 10143805367443044561LLU, 13551870156651670273LLU, 101101620226210612LLU} };
    ptr_constants[230] = Fr { {6743130083310631719LLU, 4159621102906783069LLU, 16909372106383037043LLU, 275105067646589848LLU} };
    ptr_constants[231] = Fr { {5767081072974827166LLU, 13351226112875063333LLU, 13808823188030747757LLU, 7474345124325409016LLU} };
    ptr_constants[232] = Fr { {7515825071574641519LLU, 17015721731775116578LLU, 17706482569037568318LLU, 7783540540946092843LLU} };
    ptr_constants[233] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[234] = Fr { {3824191402665565151LLU, 13497134828422119728LLU, 7128754266437361721LLU, 6470434094242418267LLU} };
    ptr_constants[235] = Fr { {10680123891660017818LLU, 2707680394453832203LLU, 9593668520566525244LLU, 1249510545339278797LLU} };
    ptr_constants[236] = Fr { {13153650165756864045LLU, 345407776466483167LLU, 3224140222610354243LLU, 3211698861149853110LLU} };
    ptr_constants[237] = Fr { {371362182411043706LLU, 7594695119693436785LLU, 8878863045364319993LLU, 7577964674714832959LLU} };
    ptr_constants[238] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[239] = Fr { {14227796843065160996LLU, 7265284196610865436LLU, 14215471000245973021LLU, 388536328476436872LLU} };
    ptr_constants[240] = Fr { {10508052870181564457LLU, 14774099264876585353LLU, 13978285412485674575LLU, 2251373481644163033LLU} };
    ptr_constants[241] = Fr { {2274705865945939466LLU, 15111611713211557404LLU, 14662933568048081168LLU, 5862270171521459308LLU} };
    ptr_constants[242] = Fr { {6094626477886741147LLU, 13522156290499373648LLU, 3223938529562475386LLU, 7223358845394697879LLU} };
    ptr_constants[243] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[244] = Fr { {13205382080658589147LLU, 15989146395783681999LLU, 16241166560338848224LLU, 1845188989213389438LLU} };
    ptr_constants[245] = Fr { {8264531805159318022LLU, 4318962737552543847LLU, 17542834107783357628LLU, 2375313423424869757LLU} };
    ptr_constants[246] = Fr { {15019210811593772058LLU, 15436029440539407784LLU, 10740152740344452687LLU, 6100217470985284679LLU} };
    ptr_constants[247] = Fr { {6693228678014749722LLU, 14667951644554411079LLU, 2035075353071307663LLU, 4463512857445956051LLU} };
    ptr_constants[248] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[249] = Fr { {12012087478829769364LLU, 3825852175923412452LLU, 15888820275970315498LLU, 7447736855985048995LLU} };
    ptr_constants[250] = Fr { {1624711641880807261LLU, 7583769466758290511LLU, 5477974518617315195LLU, 7210339519699280315LLU} };
    ptr_constants[251] = Fr { {6607838625717294305LLU, 7841538873763198011LLU, 15329216367235738568LLU, 1984624383875933899LLU} };
    ptr_constants[252] = Fr { {8398882240184548439LLU, 16004338465372972465LLU, 6193783283359325299LLU, 2476391499295679859LLU} };
    ptr_constants[253] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[254] = Fr { {12511466272957367811LLU, 4218706072902007439LLU, 11716473739639018387LLU, 7424930178176744484LLU} };
    ptr_constants[255] = Fr { {16288778393510455773LLU, 3748817748755652187LLU, 7208251160680503195LLU, 3862484047281109992LLU} };
    ptr_constants[256] = Fr { {15941492463164108522LLU, 15923515090768231958LLU, 1309076451877278282LLU, 4260511434864318553LLU} };
    ptr_constants[257] = Fr { {9449194354281104634LLU, 9494271059947758101LLU, 5549383722097760037LLU, 6795132581418421135LLU} };
    ptr_constants[258] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[259] = Fr { {1527740836906820040LLU, 13921610089051273379LLU, 11293112822021639131LLU, 5748590682616795281LLU} };
    ptr_constants[260] = Fr { {1776963566448864142LLU, 12194277459554781940LLU, 16949717034300015267LLU, 3579311125383426559LLU} };
    ptr_constants[261] = Fr { {18140857162561251268LLU, 8556229051501867549LLU, 11196734168396521863LLU, 8195564934864849315LLU} };
    ptr_constants[262] = Fr { {11160996337593641411LLU, 12831177608914682572LLU, 9173902884257623423LLU, 7374873827883642787LLU} };
    ptr_constants[263] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[264] = Fr { {6750623450449044860LLU, 443608140918095347LLU, 17751461717740272612LLU, 6758380106432599760LLU} };
    ptr_constants[265] = Fr { {9731847446509569084LLU, 3590312569395162734LLU, 1405333859053401448LLU, 3856147710575988537LLU} };
    ptr_constants[266] = Fr { {17654598715937797091LLU, 6686010497668656436LLU, 11433050262773651631LLU, 1659836155287935604LLU} };
    ptr_constants[267] = Fr { {17561997058479684892LLU, 11936532787410789599LLU, 12130313533672474686LLU, 3743298553628436536LLU} };
    ptr_constants[268] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[269] = Fr { {2972099262790083568LLU, 4330553758078682554LLU, 10795025434959062243LLU, 5335774050168297782LLU} };
    ptr_constants[270] = Fr { {10860323480094350774LLU, 16061809646445083551LLU, 7200766714261808883LLU, 4859567065712269504LLU} };
    ptr_constants[271] = Fr { {6317220398534023903LLU, 12056101160735882571LLU, 5066281353427411235LLU, 5535306398259247913LLU} };
    ptr_constants[272] = Fr { {17181253893922920211LLU, 13373863267195532105LLU, 6440287951672407126LLU, 1645945881923626261LLU} };
    ptr_constants[273] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[274] = Fr { {12857628738718505201LLU, 15984669414098336805LLU, 1638434137234748487LLU, 6332291646033410795LLU} };
    ptr_constants[275] = Fr { {16101486811400055746LLU, 16631757154052794744LLU, 6341517402856207188LLU, 1583519541558373845LLU} };
    ptr_constants[276] = Fr { {900060220103456489LLU, 3761292786537880225LLU, 13116081518848173139LLU, 7485718303940509386LLU} };
    ptr_constants[277] = Fr { {14687157248936455200LLU, 16447584892659338398LLU, 5799464941211207678LLU, 4180535962112197770LLU} };
    ptr_constants[278] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[279] = Fr { {13863587447272193916LLU, 18174918564809419640LLU, 6182255920872423379LLU, 8213191740371303948LLU} };
    ptr_constants[280] = Fr { {6199983291321043561LLU, 7466081120760997996LLU, 16815839150749793587LLU, 5856143965220629850LLU} };
    ptr_constants[281] = Fr { {10740125302303451484LLU, 2626695697138990897LLU, 13961573729012759103LLU, 1719798535720994118LLU} };
    ptr_constants[282] = Fr { {7269606234863101085LLU, 8829647625768187893LLU, 9545993116567583735LLU, 7765780771027176098LLU} };
    ptr_constants[283] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[284] = Fr { {15488985951500494310LLU, 5907974857718084291LLU, 7605399227420034875LLU, 2340211780295550786LLU} };
    ptr_constants[285] = Fr { {561060653456291516LLU, 14788599992949870268LLU, 8527364700900971458LLU, 1807670767976047810LLU} };
    ptr_constants[286] = Fr { {776874788860103813LLU, 2285683726573677297LLU, 6970621277964201650LLU, 6958955168919407687LLU} };
    ptr_constants[287] = Fr { {789572217435102293LLU, 4431824199754095522LLU, 4568825580695262167LLU, 297341639729785617LLU} };
    ptr_constants[288] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[289] = Fr { {15489136677960210203LLU, 12565315272510176888LLU, 4911580736208000876LLU, 8009035428924307564LLU} };
    ptr_constants[290] = Fr { {465608218122710563LLU, 12287904431362980399LLU, 16716641233150235266LLU, 3034993080559488918LLU} };
    ptr_constants[291] = Fr { {10920785090751663932LLU, 7395158350710538012LLU, 10913047438927838137LLU, 3970792018286596093LLU} };
    ptr_constants[292] = Fr { {17381748346349854520LLU, 15183099088440959265LLU, 5108009957572988214LLU, 233306409928953662LLU} };
    ptr_constants[293] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[294] = Fr { {16703041027001657355LLU, 78619252118720970LLU, 15204967992223727078LLU, 158485332003071483LLU} };
    ptr_constants[295] = Fr { {3087246430293635990LLU, 2642892792637555458LLU, 18403870680794764308LLU, 4354022039496784000LLU} };
    ptr_constants[296] = Fr { {12674618722411223537LLU, 8715598641899949713LLU, 9764344565435727294LLU, 5820385308815528944LLU} };
    ptr_constants[297] = Fr { {2353633493702622277LLU, 11982698354389009304LLU, 2844691360589643216LLU, 1803959392104932994LLU} };
    ptr_constants[298] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[299] = Fr { {16152978657011991503LLU, 1617135751158245936LLU, 16931517285492193213LLU, 5769477521995414574LLU} };
    ptr_constants[300] = Fr { {15083496562770887131LLU, 13780474759908603577LLU, 16056940130746893667LLU, 6416321868701696085LLU} };
    ptr_constants[301] = Fr { {16280905623949986954LLU, 8554386186597436404LLU, 16349096772784730906LLU, 556534487603333066LLU} };
    ptr_constants[302] = Fr { {15927788640746212502LLU, 10512993163846624737LLU, 2066182782262138126LLU, 840403029911376578LLU} };
    ptr_constants[303] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[304] = Fr { {825380071778835662LLU, 4453371822650861455LLU, 15654954072275007227LLU, 3058634168462477197LLU} };
    ptr_constants[305] = Fr { {13191903985249659793LLU, 5608156952349944610LLU, 4149465588973043800LLU, 1679303738255086409LLU} };
    ptr_constants[306] = Fr { {10002511406663040518LLU, 1145097697543384285LLU, 200541640682150109LLU, 7226740081855364158LLU} };
    ptr_constants[307] = Fr { {4516746127087278697LLU, 11714173514167715711LLU, 16690126827465639527LLU, 3020635686858010763LLU} };
    ptr_constants[308] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[309] = Fr { {10357315775310363637LLU, 16123107513637714747LLU, 1117856318162181084LLU, 4511468095836271227LLU} };
    ptr_constants[310] = Fr { {7731827555253199330LLU, 4585064038005746198LLU, 4695781973741669639LLU, 3136391998150982168LLU} };
    ptr_constants[311] = Fr { {3385139459681808839LLU, 18007421290803560883LLU, 2700359599209287878LLU, 6551888596091789198LLU} };
    ptr_constants[312] = Fr { {15370064142519934667LLU, 13982758290238064426LLU, 17140325842887815825LLU, 3754810109857130371LLU} };
    ptr_constants[313] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[314] = Fr { {2130318141258646743LLU, 466162089794995932LLU, 3466390830383275149LLU, 4209531238680567724LLU} };
    ptr_constants[315] = Fr { {8979680778172649564LLU, 12643661785599397833LLU, 3493497681173550675LLU, 6172298813008833938LLU} };
    ptr_constants[316] = Fr { {1049291477864135992LLU, 8938514528186091784LLU, 320676634101186401LLU, 6568048691160007360LLU} };
    ptr_constants[317] = Fr { {7983640554172054218LLU, 532675094096075001LLU, 15073491602912765271LLU, 4698419196872488429LLU} };
    ptr_constants[318] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[319] = Fr { {12375832065809921219LLU, 17147567759874632704LLU, 2997473237671519803LLU, 2984424964499512448LLU} };
    ptr_constants[320] = Fr { {16131734905786118754LLU, 829057009376737003LLU, 7314049015935150897LLU, 3771171357212521918LLU} };
    ptr_constants[321] = Fr { {6494219708671021461LLU, 11672861536147609892LLU, 17123991683280078508LLU, 3449845297896419440LLU} };
    ptr_constants[322] = Fr { {2192034046983382784LLU, 6832633010776591573LLU, 1291021082245039155LLU, 5195817095412323163LLU} };
    ptr_constants[323] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[324] = Fr { {18076509748770025681LLU, 4584858483230573248LLU, 15592196397486485072LLU, 1782314676399619432LLU} };
    ptr_constants[325] = Fr { {7441629578666162857LLU, 17109122841853908269LLU, 6376846277799122500LLU, 6008474390032927439LLU} };
    ptr_constants[326] = Fr { {7656122648497381117LLU, 14085050549840454275LLU, 14784268926765979757LLU, 6001024574241248197LLU} };
    ptr_constants[327] = Fr { {263180562765936512LLU, 10276314170738989630LLU, 12688598484383274879LLU, 132215323145899844LLU} };
    ptr_constants[328] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[329] = Fr { {7315139421404482306LLU, 11112763214962722870LLU, 2214155316437490324LLU, 3863930391458079932LLU} };
    ptr_constants[330] = Fr { {3636633371657448434LLU, 2017955760118006676LLU, 2368047820115245848LLU, 2547658740267763003LLU} };
    ptr_constants[331] = Fr { {13115329852174680679LLU, 10394822596734243332LLU, 14775708683865517040LLU, 8229598069729539194LLU} };
    ptr_constants[332] = Fr { {8062247192808109833LLU, 2283154550960642219LLU, 8250223037651198613LLU, 1070079183378694096LLU} };
    ptr_constants[333] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[334] = Fr { {10677614014527590927LLU, 11592587521185562241LLU, 12028281093504118842LLU, 83593620407649906LLU} };
    ptr_constants[335] = Fr { {10522872798217097148LLU, 17869933458048521696LLU, 4222319743865860956LLU, 6974737403946348050LLU} };
    ptr_constants[336] = Fr { {3604676892440693364LLU, 6866354513268246340LLU, 4463008918884578226LLU, 3583495122123891980LLU} };
    ptr_constants[337] = Fr { {15336785827723523341LLU, 1822420233341690933LLU, 9502314686204095257LLU, 3309809974506318031LLU} };
    ptr_constants[338] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[339] = Fr { {14957381380453763166LLU, 9813694206331874341LLU, 4093077854349785925LLU, 622273834108291366LLU} };
    ptr_constants[340] = Fr { {18215948410638014738LLU, 16979950998773460093LLU, 11832832490524993526LLU, 7971587377328270594LLU} };
    ptr_constants[341] = Fr { {10660987110952326380LLU, 3996804299699261380LLU, 10594455405614010100LLU, 90128492760465938LLU} };
    ptr_constants[342] = Fr { {8834876829232309659LLU, 14472428540376085269LLU, 3731164194277157938LLU, 6356486194394783921LLU} };
    ptr_constants[343] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[344] = Fr { {5412576205304129533LLU, 7160492324731143844LLU, 8203295314697213644LLU, 8135866150614596187LLU} };
    ptr_constants[345] = Fr { {14318547277564884413LLU, 9467294949569554236LLU, 6237794478757951480LLU, 2038083536680606203LLU} };
    ptr_constants[346] = Fr { {8313312932900232552LLU, 17117772544122516987LLU, 9569623416123823447LLU, 1216691669362243138LLU} };
    ptr_constants[347] = Fr { {6028412552011864639LLU, 7059994390371397280LLU, 10025729896616411577LLU, 4092851069006773476LLU} };
    ptr_constants[348] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[349] = Fr { {13922094761135229036LLU, 11584034939462748091LLU, 13011029182512475279LLU, 4472908877409021743LLU} };
    ptr_constants[350] = Fr { {15684640361939059629LLU, 4278128445740181539LLU, 1731680284334702248LLU, 3437171809751567969LLU} };
    ptr_constants[351] = Fr { {9068762784323184063LLU, 15641918239731736827LLU, 12033256250361516378LLU, 2726574172543934978LLU} };
    ptr_constants[352] = Fr { {13171920551302215256LLU, 8873672030892133865LLU, 11250181223087836785LLU, 7369667752906516597LLU} };
    ptr_constants[353] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[354] = Fr { {1709176862041163955LLU, 16902340368789572429LLU, 3137835519083194205LLU, 6201103297470253643LLU} };
    ptr_constants[355] = Fr { {16419633225741640954LLU, 12930963041978004229LLU, 10936615742413251534LLU, 1634295874269082350LLU} };
    ptr_constants[356] = Fr { {9972117429239281307LLU, 2865202693712669341LLU, 13978451311944500590LLU, 4049764099524878388LLU} };
    ptr_constants[357] = Fr { {12931245275493664601LLU, 1937747821748430939LLU, 1559781182265090048LLU, 6487618000155086194LLU} };
    ptr_constants[358] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[359] = Fr { {3983564986243320LLU, 9650205745938384648LLU, 14256003256873245620LLU, 398801057276971206LLU} };
    ptr_constants[360] = Fr { {18442019619133979665LLU, 10349453243984125934LLU, 5072362703437323133LLU, 3057027382454436042LLU} };
    ptr_constants[361] = Fr { {1756832768461135430LLU, 14003235138717690688LLU, 15682862367593476850LLU, 1425039201208159985LLU} };
    ptr_constants[362] = Fr { {2384273039884611609LLU, 7972591134155790694LLU, 10445544421358408142LLU, 7552207208663469515LLU} };
    ptr_constants[363] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[364] = Fr { {9302899160970LLU, 8752269366585827446LLU, 10706793536200000237LLU, 2538418709072838430LLU} };
    ptr_constants[365] = Fr { {18446733076875289498LLU, 6247611111269315685LLU, 17331948782206644756LLU, 4878836728663888105LLU} };
    ptr_constants[366] = Fr { {18446744070846240086LLU, 4022772939025388201LLU, 2460812599093181102LLU, 7106239912452095536LLU} };
    ptr_constants[367] = Fr { {10540996614202786416LLU, 12783804640121292468LLU, 13667873977068926994LLU, 22747312306445271LLU} };
    ptr_constants[368] = Fr { {1431655765LLU, 16435357604196857515LLU, 17216337774162961064LLU, 3364409071415034087LLU} };
    ptr_constants[369] = Fr { {25769803770LLU, 688531696190609414LLU, 14746174755580473312LLU, 5219131064341958734LLU} };
    ptr_constants[370] = Fr { {18446744051375721682LLU, 1862838406462745389LLU, 15504989458184902274LLU, 4700125114425078237LLU} };
    ptr_constants[371] = Fr { {0LLU, 0LLU, 0LLU, 4611686018427387904LLU} };
    ptr_constants[372] = Fr { {1717986918LLU, 1275685051326677402LLU, 5902210070027911985LLU, 347942070956130582LLU} };

    for (int i = 0; i < DATA_SIZE; i++) {
    	ptr_preimages[i] = Fr { { 8589934590LLU, 6378425256633387010LLU, 11064306276430008309LLU, 1739710354780652911LLU } };
    }

    // Data will be migrated to kernel space
    OCL_CHECK(err, err = q.enqueueMigrateMemObjects({buffer_preimages}, 0/* 0 means from host*/));

    //Launch the Kernel
    OCL_CHECK(err, err = q.enqueueTask(krnl_poseidon_hash));

    // The result of the previous kernel execution will need to be retrieved in
    // order to view the results. This call will transfer the data from FPGA to
    // source_results vector
    OCL_CHECK(err, q.enqueueMigrateMemObjects({buffer_digests},CL_MIGRATE_MEM_OBJECT_HOST));

    cout << "waiting" << endl;
    OCL_CHECK(err, q.finish());
    cout << "finish" << endl;

    //Verify the result
    int match = 0;
    for (int i=0; i< (DATA_SIZE >> 1); i++) {
    	cout << " output " << ptr_digests[i].val[0] << " " << ptr_digests[i].val[1] << " " << ptr_digests[i].val[2] << " " << ptr_digests[i].val[3] << endl;

    	if (ptr_digests[i].val[0] != 8548788455249036922LLU) {
    		match = 1;
    		break;
    	}
    	if (ptr_digests[i].val[1] != 17398649214534635121LLU) {
    		match = 1;
    		break;
    	}
    	if (ptr_digests[i].val[2] != 16798001049544414347LLU) {
    		match = 1;
    		break;
    	}
    	if (ptr_digests[i].val[3] != 3287283526808376185LLU) {
    		match = 1;
    		break;
    	}
    }

    OCL_CHECK(err, err = q.enqueueUnmapMemObject(buffer_constants, ptr_constants));
    OCL_CHECK(err, err = q.enqueueUnmapMemObject(buffer_preimages, ptr_preimages));
    OCL_CHECK(err, err = q.enqueueUnmapMemObject(buffer_digests, ptr_digests));
    OCL_CHECK(err, err = q.finish());

    cout << "TEST " << (match ? "FAILED" : "PASSED") << std::endl;
    return (match ? EXIT_FAILURE :  EXIT_SUCCESS);

}

Credits

Stephen Lake

Stephen Lake

1 project • 2 followers
Software engineering consultant with 18+ years of experience

Comments