Tom Simpson
Published © Apache-2.0

FFT Acceleration with Vitis

Learn how to add an FFT accelerator for Xilinx MPSoC devices using the Vitis acceleration flow.

AdvancedFull instructions provided3 hours7,095

Things used in this project

Hardware components

UltraZed-EG Starter Kit
Avnet UltraZed-EG Starter Kit
×1

Software apps and online services

Vitis 2020.1

Story

Read more

Code

fft_infc.cpp

C/C++
Custom HLS DMA to convert AXI-MM to AXI-Stream for FFT Accelerator Kernel
/*----------------------------------------------------------------------------
**      _____
**     *     *
**    *____   *____
**   * *===*   *==*
**  *___*===*___**  AVNET
**       *======*
**        *====*
**----------------------------------------------------------------------------
**
** This design is the property of Avnet.  Publication of this
** design is not authorized without written consent from Avnet.
**
** Disclaimer:
**    Avnet, Inc. makes no warranty for the use of this code or design.
**    This code is provided  "As Is". Avnet, Inc assumes no responsibility for
**    any errors, which may appear in this code, nor does it make a commitment
**    to update the information contained herein. Avnet, Inc specifically
**    disclaims any implied warranties of fitness for a particular purpose.
**                     Copyright(c) 2020 Avnet, Inc.
**                             All rights reserved.
**
**----------------------------------------------------------------------------
**
** Create Date:         Feb. 1 2021
** File Name:           fft_infc.cpp
**
** Tool versions:       Vitis 2020.1
**
** Description:         FFT Accelerator interface kernel
**
** Revision:            Feb 1, 2020: 1.00 Initial version
**
**----------------------------------------------------------------------------*/

#include "string.h"
#include "ap_int.h"
#include "hls_stream.h"

typedef ap_uint<64>  data64_t;
typedef ap_uint<128> data128_t;

void hls_dma( int                     num_words,
              data128_t              *din,
              data128_t              *dout,
              hls::stream<data64_t>  &strm_in,
              hls::stream<data64_t>  &strm_out )
{
#pragma HLS DATAFLOW  

  data128_t temp_in, temp_out;

  rd_loop: for (int i = 0; i < num_words; i++)
  {
     temp_in = *din++;
     strm_out << temp_in.range(63, 0);
     strm_out << temp_in.range(127, 64);
  }

  wr_loop: for (int i = 0; i < num_words; i++)
  {
     temp_out.range(63, 0)   = strm_in.read();
     temp_out.range(127, 64) = strm_in.read();
    *dout++ = temp_out;
  }
}

extern "C" {
void fft_infc( int                      fft_select,
               int                      num_fft,
               int                      fft_dir,
               data128_t               *din,
               data128_t               *dout,
               volatile ap_uint<24>    *config,
               hls::stream<data64_t>   &strm_in,
               hls::stream<data64_t>   &strm_out )
{
#pragma HLS INTERFACE axis port=config
#pragma HLS INTERFACE axis port=strm_in
#pragma HLS INTERFACE axis port=strm_out
#pragma HLS INTERFACE m_axi port=din offset=slave bundle=gmem1 max_read_burst_length=256
#pragma HLS INTERFACE m_axi port=dout offset=slave bundle=gmem2 max_write_burst_length=256
#pragma HLS INTERFACE s_axilite port=fft_select
#pragma HLS INTERFACE s_axilite port=num_fft
#pragma HLS INTERFACE s_axilite port=fft_dir
#pragma HLS INTERFACE s_axilite port=return

  ap_uint<24> tmp_config = 0;
  tmp_config[8] = (fft_dir == 1) ? 1 : 0;
  tmp_config.range(4, 0) = fft_select;
  *config = tmp_config;

  int num_128b_words = (num_fft << (fft_select-1));
  hls_dma(num_128b_words, din, dout, strm_in, strm_out);

}
}

kernel.xml

XML
<!-------------------------------------------------------------------------------
#
#        ** **        **          **  ****      **  **********  ********** 
#       **   **        **        **   ** **     **  **              **
#      **     **        **      **    **  **    **  **              **
#     **       **        **    **     **   **   **  *********       **
#    **         **        **  **      **    **  **  **              **
#   **           **        ****       **     ** **  **              **
#  **  .........  **        **        **      ****  **********      **
#     ...........
#                                     Reach Further
#
# ----------------------------------------------------------------------------
#
#  This design is the property of Avnet.  Publication of this
#  design is not authorized without written consent from Avnet.
#
#  Disclaimer:
#     Avnet, Inc. makes no warranty for the use of this code or design.
#     This code is provided  "As Is". Avnet, Inc assumes no responsibility for
#     any errors, which may appear in this code, nor does it make a commitment
#     to update the information contained herein. Avnet, Inc specifically
#     disclaims any implied warranties of fitness for a particular purpose.
#                      Copyright(c) 2020 Avnet, Inc.
#                              All rights reserved.
#
# ---------------------------------------------------------------------------->

<?xml version="1.0" encoding="UTF-8"?>
<root versionMajor="1" versionMinor="0">
  <kernel name="fft" language="ip_c" vlnv="xilinx.com:ip:xfft:1.0" attributes="" preferredWorkGroupSizeMultiple="0" workGroupSize="1" hwControlProtocol="ap_ctrl_none">
    <ports>
      <port name="S_AXIS_CONFIG" mode="read_only"  dataWidth="24" portType="stream"></port>
      <port name="S_AXIS_DATA"   mode="read_only"  dataWidth="64" portType="stream"></port>
      <port name="M_AXIS_DATA"   mode="write_only" dataWidth="64" portType="stream"></port>
    </ports>
    <args>
      <arg name="config" addressQualifier="4" id="0" port="S_AXIS_CONFIG" size="3" offset="0" type="uint*" hostOffset="0" hostSize="4"></arg>
      <arg name="data"   addressQualifier="4" id="1" port="S_AXIS_DATA"   size="8" offset="0" type="uint*" hostOffset="0" hostSize="8"></arg>
      <arg name="result" addressQualifier="4" id="2" port="M_AXIS_DATA"   size="8" offset="0" type="uint*" hostOffset="0" hostSize="8"></arg>
    </args>
  </kernel>
</root>

package_xo.tcl

Tcl
# ----------------------------------------------------------------------------
#
#        ** **        **          **  ****      **  **********  ********** ®
#       **   **        **        **   ** **     **  **              **
#      **     **        **      **    **  **    **  **              **
#     **       **        **    **     **   **   **  *********       **
#    **         **        **  **      **    **  **  **              **
#   **           **        ****       **     ** **  **              **
#  **  .........  **        **        **      ****  **********      **
#     ...........
#                                     Reach Further™
#
# ----------------------------------------------------------------------------
#
#  This design is the property of Avnet.  Publication of this
#  design is not authorized without written consent from Avnet.
#
#  Disclaimer:
#     Avnet, Inc. makes no warranty for the use of this code or design.
#     This code is provided  "As Is". Avnet, Inc assumes no responsibility for
#     any errors, which may appear in this code, nor does it make a commitment
#     to update the information contained herein. Avnet, Inc specifically
#     disclaims any implied warranties of fitness for a particular purpose.
#                      Copyright(c) 2020 Avnet, Inc.
#                              All rights reserved.
#
# ----------------------------------------------------------------------------

package_xo -force -xo_path ../fft.xo -kernel_name fft -kernel_xml ../kernel.xml -ip_directory ../fft_ip -verbose

package_ip.tcl

Tcl
# ----------------------------------------------------------------------------
#
#        ** **        **          **  ****      **  **********  ********** ®
#       **   **        **        **   ** **     **  **              **
#      **     **        **      **    **  **    **  **              **
#     **       **        **    **     **   **   **  *********       **
#    **         **        **  **      **    **  **  **              **
#   **           **        ****       **     ** **  **              **
#  **  .........  **        **        **      ****  **********      **
#     ...........
#                                     Reach Further™
#
# ----------------------------------------------------------------------------
#
#  This design is the property of Avnet.  Publication of this
#  design is not authorized without written consent from Avnet.
#
#  Disclaimer:
#     Avnet, Inc. makes no warranty for the use of this code or design.
#     This code is provided  "As Is". Avnet, Inc assumes no responsibility for
#     any errors, which may appear in this code, nor does it make a commitment
#     to update the information contained herein. Avnet, Inc specifically
#     disclaims any implied warranties of fitness for a particular purpose.
#                      Copyright(c) 2020 Avnet, Inc.
#                              All rights reserved.
#
# ----------------------------------------------------------------------------

# Create the IP project
create_project xfft ./xfft -part xczu3eg-sbva484-1-i
set_property board_part avnet.com:ultra96v2:part0:1.1 [current_project]

# Add the FFT IP to the block design and configure
create_bd_design "xfft"
update_compile_order -fileset sources_1
startgroup
create_bd_cell -type ip -vlnv xilinx.com:ip:xfft:9.1 xfft_0
endgroup
set_property -dict [list CONFIG.data_format.VALUE_SRC USER] [get_bd_cells xfft_0]
set_property -dict [list CONFIG.transform_length {16384} CONFIG.target_clock_frequency {600} CONFIG.implementation_options {pipelined_streaming_io} CONFIG.run_time_configurable_transform_length {true} CONFIG.data_format {floating_point} CONFIG.throttle_scheme {nonrealtime} CONFIG.output_ordering {natural_order} CONFIG.phase_factor_width {24} CONFIG.complex_mult_type {use_mults_performance} CONFIG.number_of_stages_using_block_ram_for_data_and_phase_factors {7}] [get_bd_cells xfft_0]
startgroup
make_bd_pins_external  [get_bd_pins xfft_0/aclk]
make_bd_intf_pins_external  [get_bd_intf_pins xfft_0/S_AXIS_DATA] [get_bd_intf_pins xfft_0/S_AXIS_CONFIG]
endgroup
startgroup
make_bd_intf_pins_external  [get_bd_intf_pins xfft_0/M_AXIS_DATA]
endgroup
set_property name S_AXIS_DATA [get_bd_intf_ports S_AXIS_DATA_0]
set_property name S_AXIS_CONFIG [get_bd_intf_ports S_AXIS_CONFIG_0]
set_property name ap_clk [get_bd_ports aclk_0]
set_property name M_AXIS_DATA [get_bd_intf_ports M_AXIS_DATA_0]

# Create the HDL wrapper
make_wrapper -files [get_files {./xfft/xfft.srcs/sources_1/bd/xfft/xfft.bd}] -top
add_files -norecurse ./xfft/xfft.srcs/sources_1/bd/xfft/hdl/xfft_wrapper.v

# Package the IP
ipx::package_project -root_dir ../fft_ip -vendor user.org -library user -taxonomy /UserIP -import_files -set_current false
ipx::unload_core ../fft_ip/component.xml
ipx::edit_ip_in_project -upgrade true -name tmp_edit_project -directory ../fft_ip ../fft_ip/component.xml
update_compile_order -fileset sources_1
ipx::associate_bus_interfaces -busif M_AXIS_DATA -clock ap_clk [ipx::current_core]
ipx::associate_bus_interfaces -busif S_AXIS_CONFIG -clock ap_clk [ipx::current_core]
ipx::associate_bus_interfaces -busif S_AXIS_DATA -clock ap_clk [ipx::current_core]
#ipx::add_bus_parameter FREQ_HZ [ipx::get_bus_interfaces ap_clk -of_objects [ipx::current_core]]
#set_property value_resolve_type user [ipx::get_bus_parameters -of [::ipx::get_bus_interfaces -of [ipx::current_core] *clk*]
set_property previous_version_for_upgrade user.org:user:xfft_wrapper:1.0 [ipx::current_core]
set_property core_revision 1 [ipx::current_core]
#set_property xpm_libraries {XPM_CDC XPM_MEMORY XPM_FIFO} [ipx::current_core]
#set_property sdx_kernel true [ipx::current_core]
#set_property sdx_kernel_type rtl [ipx::current_core]
ipx::create_xgui_files [ipx::current_core]
ipx::update_checksums [ipx::current_core]
ipx::save_core [ipx::current_core]
ipx::check_integrity -quiet [ipx::current_core]
ipx::archive_core ../fft_ip/xilinx.com_ip_xfft_1.0.zip [ipx::current_core]
close_project -delete

create_ip_xo.sh

SH
#!/bin/bash
# ----------------------------------------------------------------------------
#
#        ** **        **          **  ****      **  **********  ********** ®
#       **   **        **        **   ** **     **  **              **
#      **     **        **      **    **  **    **  **              **
#     **       **        **    **     **   **   **  *********       **
#    **         **        **  **      **    **  **  **              **
#   **           **        ****       **     ** **  **              **
#  **  .........  **        **        **      ****  **********      **
#     ...........
#                                     Reach Further™
#
# ----------------------------------------------------------------------------
#
#  This design is the property of Avnet.  Publication of this
#  design is not authorized without written consent from Avnet.
#
#  Disclaimer:
#     Avnet, Inc. makes no warranty for the use of this code or design.
#     This code is provided  "As Is". Avnet, Inc assumes no responsibility for
#     any errors, which may appear in this code, nor does it make a commitment
#     to update the information contained herein. Avnet, Inc specifically
#     disclaims any implied warranties of fitness for a particular purpose.
#                      Copyright(c) 2020 Avnet, Inc.
#                              All rights reserved.
#
# ----------------------------------------------------------------------------

mkdir -p vivado
cd vivado
vivado -mode batch -source ../package_ip.tcl
cd ..

mkdir -p build
cd build
vivado -mode batch -source ../package_xo.tcl

get_compile_fftw.sh

SH
#!/bin/bash
# ----------------------------------------------------------------------------
#
#        ** **        **          **  ****      **  **********  ********** ®
#       **   **        **        **   ** **     **  **              **
#      **     **        **      **    **  **    **  **              **
#     **       **        **    **     **   **   **  *********       **
#    **         **        **  **      **    **  **  **              **
#   **           **        ****       **     ** **  **              **
#  **  .........  **        **        **      ****  **********      **
#     ...........
#                                     Reach Further™
#
# ----------------------------------------------------------------------------
#
#  This design is the property of Avnet.  Publication of this
#  design is not authorized without written consent from Avnet.
#
#  Disclaimer:
#     Avnet, Inc. makes no warranty for the use of this code or design.
#     This code is provided  "As Is". Avnet, Inc assumes no responsibility for
#     any errors, which may appear in this code, nor does it make a commitment
#     to update the information contained herein. Avnet, Inc specifically
#     disclaims any implied warranties of fitness for a particular purpose.
#                      Copyright(c) 2020 Avnet, Inc.
#                              All rights reserved.
#
# ----------------------------------------------------------------------------

if [[ -z "$XILINX_VITIS" ]]; then
  echo "ERROR: The Vitis environment is not set up correctly."
  echo "       Please source settings64.sh located in the Vitis install"
  echo "       directory, and then rerun this script."
  exit
fi

# Navigate to the app directory in the project structure and create an FFTW directory
mkdir -p ~/uz3eg_fft/app/fftw
cd ~/uz3eg_fft/app/fftw

# Download and extract the FFTW library tarball
wget "ftp://ftp.fftw.org/pub/fftw/fftw-3.3.8.tar.gz"
tar -xvzf fftw-3.3.8.tar.gz
cd fftw-3.3.8

# Compile the FFTW library for ARM A53 architecture
./configure \
    --enable-single \
    --enable-neon \
    --host=aarch64-linux-gnu \
    --with-slow-timer \
    ARM_CPU_TYPE=cortex-a53 \
    --prefix=$PWD/build/install

make -j$(nproc)

make install

compile_app.sh

SH
#!/bin/bash
# ----------------------------------------------------------------------------
#
#        ** **        **          **  ****      **  **********  ********** ®
#       **   **        **        **   ** **     **  **              **
#      **     **        **      **    **  **    **  **              **
#     **       **        **    **     **   **   **  *********       **
#    **         **        **  **      **    **  **  **              **
#   **           **        ****       **     ** **  **              **
#  **  .........  **        **        **      ****  **********      **
#     ...........
#                                     Reach Further™
#
# ----------------------------------------------------------------------------
#
#  This design is the property of Avnet.  Publication of this
#  design is not authorized without written consent from Avnet.
#
#  Disclaimer:
#     Avnet, Inc. makes no warranty for the use of this code or design.
#     This code is provided  "As Is". Avnet, Inc assumes no responsibility for
#     any errors, which may appear in this code, nor does it make a commitment
#     to update the information contained herein. Avnet, Inc specifically
#     disclaims any implied warranties of fitness for a particular purpose.
#                      Copyright(c) 2020 Avnet, Inc.
#                              All rights reserved.
#
# ----------------------------------------------------------------------------

SYSROOT=~/uz3eg_fft/platform/UZ3EG_IOCC/sw/UZ3EG_IOCC/PetaLinux/sysroot/aarch64-xilinx-linux

aarch64-linux-gnu-g++ \
  -O2 \
	--sysroot=$SYSROOT \
	-D__USE_XOPEN2K8 \
	-I./src \
	-I./fftw/fftw-3.3.8/build/install/include \
	-I$SYSROOT/usr/include/xrt \
	-I$XILINX_VIVADO/include \
	-I$SYSROOT/usr/include \
	-fmessage-length=0 \
	-std=c++14 \
	src/*.cpp \
	-lxilinxopencl \
	-lpthread \
	-lrt \
	-lstdc++ \
	-lgmp \
	-lxrt_core \
	-lfftw3f \
	-L$SYSROOT/usr/lib \
	-L./fftw/fftw-3.3.8/build/install/lib \
	-o fft_test.exe 

fft_test.cpp

C/C++
/*----------------------------------------------------------------------------
**      _____
**     *     *
**    *____   *____
**   * *===*   *==*
**  *___*===*___**  AVNET
**       *======*
**        *====*
**----------------------------------------------------------------------------
**
** This design is the property of Avnet.  Publication of this
** design is not authorized without written consent from Avnet.
**
** Disclaimer:
**    Avnet, Inc. makes no warranty for the use of this code or design.
**    This code is provided  "As Is". Avnet, Inc assumes no responsibility for
**    any errors, which may appear in this code, nor does it make a commitment
**    to update the information contained herein. Avnet, Inc specifically
**    disclaims any implied warranties of fitness for a particular purpose.
**                     Copyright(c) 2020 Avnet, Inc.
**                             All rights reserved.
**
**----------------------------------------------------------------------------
**
** Create Date:         January 9, 2018
** File Name:           fft_test.cpp
**
** Tool versions:       Vitis 2020.1
**
** Description:         Single FFT size evaluation application
**
** Revision:            Jan. 09, 2018: Initial version
**                      Jan. 15, 2020: Ported from SDSoC to Vitis
**
**----------------------------------------------------------------------------*/

#include <iostream>
#include <complex>
#include <cstring>
#include <stdint.h>
#include <math.h>
#include "fft_test.h"
#include "lnx_time.h"

#define SQR(x) ((x) * (x))

using namespace std;

void fft_test(int p_fft_select)
{
  /* Declare variables */
  complex<float> *data;
  fftwf_complex *in, *out;
  fftwf_plan p;
  int fft_size;
  int fft_select;
  int num_fft;
  lnx_timer fftw_ctr, fftip_ctr;
  float fftw_plan_time;
  float fftw_exec_time;
  float xfft_init_time;
  float xfft_exec_time;
  char filename[80];

  /* Initialize timers */
  fftw_ctr.reset();
  fftip_ctr.reset();

  /* Query the user for FFT size/direction */
  if (p_fft_select == 0)
  {
    cout << "Select the FFT size to test: " << endl;
    cout << "  3  =     8-point FFT " << endl;
    cout << "  4  =    16-point FFT " << endl;
    cout << "  5  =    32-point FFT " << endl;
    cout << "  6  =    64-point FFT " << endl;
    cout << "  7  =   128-point FFT " << endl;
    cout << "  8  =   256-point FFT " << endl;
    cout << "  9  =   512-point FFT " << endl;
    cout << "  10 =  1024-point FFT " << endl;
    cout << "  11 =  2048-point FFT " << endl;
    cout << "  12 =  4096-point FFT " << endl;
    cout << "  13 =  8192-point FFT " << endl;
    cout << "  14 = 16384-point FFT " << endl;
    cin >> fft_select;
  }
  else
  {
    fft_select = p_fft_select;
  }

  /* Range check user input */
  if ((fft_select > 14) || (fft_select < 3))
  {
    cout << "Error: FFT size " << (1 << fft_select) << " is not supported." << endl;
    return;
  }

  fft_size = (1 << fft_select);
  num_fft = BUFF_SIZE / (fft_size * sizeof(complex<float>));

  /* Allocate data arrays based on user inputs */
  cout << "Allocating arrays" << endl;
  data  = (complex<float> *)malloc(sizeof(complex<float>) * fft_size);
  fftw_ctr.start();
  in    = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * num_fft * fft_size);
  out   = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * num_fft * fft_size);
  fftw_ctr.stop();

  /* Create test data pattern - complex LFM chirp */
  cout << "Loading input array" << endl;

  for (int i = 0; i < num_fft; i++)
  {
    complex_exp(FS, FREQ + 0.25e6 * i, fft_size, data);
    for (int j = 0; j < fft_size; j++)
    {
      in[i*fft_size+j][0] = data[j].real();
      in[i*fft_size+j][1] = data[j].imag();
    }
  }

  /* Perform FFTW processing */
  cout << "Creating FFTW plan for " << fft_size << "-point FFT" << endl;
  fftw_ctr.start();
  p = fftwf_plan_dft_1d(fft_size, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
  fftw_ctr.stop();
  fftw_plan_time = fftw_ctr.secs();

  cout << "Executing FFTW plan" << endl;
  fftw_ctr.reset();
  fftw_ctr.start();

  for (int i = 0; i < num_fft; i++)
  {
    fftwf_execute_dft(p, &in[i*fft_size], &out[i*fft_size]);
  }

  fftw_ctr.stop();
  fftw_exec_time = fftw_ctr.secs();

  cout << "Destroying FFTW plan" << endl;
  fftw_ctr.reset();
  fftw_ctr.start();
  fftwf_destroy_plan(p);
  fftw_ctr.stop();
  fftw_plan_time += fftw_ctr.secs();

  /* Perform XFFT processing */
  cout << "Initilizing XFFT" << endl;
  fftip_ctr.start();
  xfft fft_accel;
  fft_accel.init(fft_select, num_fft, xfft::FWD_FFT);
  fftip_ctr.stop();
  xfft_init_time = fftip_ctr.secs();

  cout << "Performing batch FFT acceleration with " << num_fft << " frames" << endl;
  memcpy(fft_accel.in_buffer(), in, sizeof(complex<float>)*num_fft*fft_size);
  fftip_ctr.reset();
  fftip_ctr.start();
  fft_accel.execute();
  fftip_ctr.stop();

  xfft_exec_time = fftip_ctr.secs();

  float speedup = fftw_exec_time / xfft_exec_time;
  cout << "FFTW configuration time was " << fftw_plan_time * 1.0e6f << " us" << endl;
  cout << "Average FFTW plan execution time was " << fftw_exec_time / (float)num_fft * 1.0e6f << " us" << endl << endl;
  cout << "XFFT configuration time was " << xfft_init_time * 1.0e6f << " us" << endl;
  cout << "Average XFFT execution time was " << xfft_exec_time / (float)num_fft * 1.0e6f << " us" << endl << endl;
  cout << "Average speedup for a " << fft_size << "-point FFT was " << speedup << "x" << endl << endl;

  /* Compare FFTW results to XFFT */
	cout << "Comparing FFTW outputs with XFFT outputs" << endl;
	complex<float> *xfft_out = (complex<float> *)fft_accel.out_buffer();
	complex<float> *fftw_out = (complex<float> *)&out[0][0];
	double sqr_err = 0.0;
	for (int i = 0; i < num_fft*fft_size; i++)
	{
    sqr_err += fabs(SQR(fftw_out[i] - xfft_out[i]));
	}

	double mse = sqr_err / (double)(num_fft * fft_size);

	cout << "Mean Square Error (MSE) = " << (float)mse << endl << endl;

  cout << "Releasing memory" << endl << endl;
  fftwf_free(in);
  fftwf_free(out);
  free(data);

  cout << "Done" << endl << endl;

}

fft_test.h

C/C++
/*----------------------------------------------------------------------------
**      _____
**     *     *
**    *____   *____
**   * *===*   *==*
**  *___*===*___**  AVNET
**       *======*
**        *====*
**----------------------------------------------------------------------------
**
** This design is the property of Avnet.  Publication of this
** design is not authorized without written consent from Avnet.
**
** Disclaimer:
**    Avnet, Inc. makes no warranty for the use of this code or design.
**    This code is provided  "As Is". Avnet, Inc assumes no responsibility for
**    any errors, which may appear in this code, nor does it make a commitment
**    to update the information contained herein. Avnet, Inc specifically
**    disclaims any implied warranties of fitness for a particular purpose.
**                     Copyright(c) 2020 Avnet, Inc.
**                             All rights reserved.
**
**----------------------------------------------------------------------------
**
** Create Date:         January 9, 2018
** File Name:           fft_test.h
**
** Tool versions:       Vitis 2020.1
**
** Description:         FFT test header file
**
** Revision:            Jan. 09, 2018: Initial version
**                      Jan. 15, 2020: Ported from SDSoC to Vitis
**
**----------------------------------------------------------------------------*/

#ifndef FFT_TEST_H_
#define FFT_TEST_H_

#include <iostream>
#include <complex>
#include <math.h>
#include <stdlib.h>
#include "fftw3.h"
#include "xfft.hpp"

/* FFT parameters */
#define BUFF_SIZE (128*1024*1024) // 128 MB Buffer

/* Test pattern parameters */
#define FS   100.0e6
#define FREQ  10.0e6

void fft_test(int p_fft_select);

static void inline complex_exp( double               sample_rate,
                                double               frequency,
                                int                  num_samples,
                                std::complex<float> *data)
{
  double inv_sample_rate = 1.0 / sample_rate;
  float  t;

  for (int i = 0; i < num_samples; i++)
  {
    t = i * inv_sample_rate;
    data[i].real(cosf(2.0f * M_PI * frequency * t));
    data[i].imag(sinf(2.0f * M_PI * frequency * t));
  }

}

#endif /* FFT_TEST_H_ */

lnx_time.h

C/C++
/*----------------------------------------------------------------------------
**      _____
**     *     *
**    *____   *____
**   * *===*   *==*
**  *___*===*___**  AVNET
**       *======*
**        *====*
**----------------------------------------------------------------------------
**
** This design is the property of Avnet.  Publication of this
** design is not authorized without written consent from Avnet.
**
** Disclaimer:
**    Avnet, Inc. makes no warranty for the use of this code or design.
**    This code is provided  "As Is". Avnet, Inc assumes no responsibility for
**    any errors, which may appear in this code, nor does it make a commitment
**    to update the information contained herein. Avnet, Inc specifically
**    disclaims any implied warranties of fitness for a particular purpose.
**                     Copyright(c) 2020 Avnet, Inc.
**                             All rights reserved.
**
**----------------------------------------------------------------------------
**
** Create Date:         July 16, 2018
** File Name:           lnx_time.h
**
** Tool versions:       SDSoC, Vitis
**
** Description:         Class for Linux time measurement
**
** Revision:            July 16, 2018: Initial version
**                      Jan. 15, 2020: Updated copyright date
**
**----------------------------------------------------------------------------*/

#ifndef LNX_TIME_H
#define LNX_TIME_H

#include <ctime>

class lnx_timer
{
  private:    
    uint64_t lnx_clock_counter()
    {
      timespec t, c;
    
      clock_gettime(CLOCK_MONOTONIC, &t);
      clock_getres(CLOCK_MONOTONIC, &c);
    
      double   secs  = ((t.tv_sec * 1e9) + t.tv_nsec);
      uint64_t ticks = secs / (double)c.tv_nsec;
    
      return ticks;
    
    }
    
    uint64_t lnx_clock_frequency()
    {
      timespec c;
      clock_getres(CLOCK_MONOTONIC, &c);
 
      return (uint64_t) ( 1.0e9 / (double)c.tv_nsec );
    }    
  
  public:
    uint64_t cnt, tot, calls;
    lnx_timer() : cnt(0), tot(0), calls(0) {};
    inline void reset(){ tot = cnt = calls = 0; };
    inline void start(){ cnt = lnx_clock_counter(); calls++; };
    inline void stop(){ tot += (lnx_clock_counter() - cnt); };
    inline uint64_t avg() {return (tot / calls);};
    inline float secs(){ return (float)((double)tot / (double)lnx_clock_frequency()); };
    inline float avg_secs(){ return (float)((double)avg() / (double)lnx_clock_frequency()); };   
};

#endif

main.cpp

C/C++
/*----------------------------------------------------------------------------
**      _____
**     *     *
**    *____   *____
**   * *===*   *==*
**  *___*===*___**  AVNET
**       *======*
**        *====*
**----------------------------------------------------------------------------
**
** This design is the property of Avnet.  Publication of this
** design is not authorized without written consent from Avnet.
**
** Disclaimer:
**    Avnet, Inc. makes no warranty for the use of this code or design.
**    This code is provided  "As Is". Avnet, Inc assumes no responsibility for
**    any errors, which may appear in this code, nor does it make a commitment
**    to update the information contained herein. Avnet, Inc specifically
**    disclaims any implied warranties of fitness for a particular purpose.
**                     Copyright(c) 2020 Avnet, Inc.
**                             All rights reserved.
**
**----------------------------------------------------------------------------
**
** Create Date:         January 9, 2018
** File Name:           main.cpp
**
** Tool versions:       Vitis 2020.1
**
** Description:         Top-level application file for FFT testing
**
** Revision:            Jan. 09, 2018: Initial version
**                      Jan. 15, 2020: Ported from SDSoC to Vitis
**
**----------------------------------------------------------------------------*/

#include <iostream>
#include "fft_test.h"

using namespace std;

int main ( int p_argc, char *p_argv[] )
{
  int test = 0;
  
  cout << "What test would you like to run (0 = single FFT size, 1 = FFT benchmark)" << endl;
  cin >> test;

  if (test == 0)
  {
    fft_test(0);
  }
  else
  {
    for (int i = 3; i <= 14; i++)
    {
      cout << "----------------------------------------------------------" << endl;
      cout << "Performing benchmark for " << (1 << i) << "-point FFT" << endl;
      cout << "----------------------------------------------------------" << endl << endl;
      fft_test(i);
    }
  }

  return 0;
}

xcl2.cpp

C/C++
/**********
Copyright (c) 2020, Xilinx, Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********/

#include "xcl2.hpp"
#include <climits>
#include <sys/stat.h>
#if defined(_WINDOWS)
#include <io.h>
#else
#include <unistd.h>
#endif

namespace xcl {
std::vector<cl::Device> get_devices(const std::string &vendor_name) {
  size_t i;
  cl_int err;
  std::vector<cl::Platform> platforms;
  OCL_CHECK(err, err = cl::Platform::get(&platforms));
  cl::Platform platform;
  for (i = 0; i < platforms.size(); i++) {
    platform = platforms[i];
    OCL_CHECK(err, std::string platformName =
                       platform.getInfo<CL_PLATFORM_NAME>(&err));
    if (platformName == vendor_name) {
//      std::cout << "Found Platform" << std::endl;
//      std::cout << "Platform Name: " << platformName.c_str() << std::endl;
      break;
    }
  }
  if (i == platforms.size()) {
    std::cout << "Error: Failed to find Xilinx platform" << std::endl;
    exit(EXIT_FAILURE);
  }
  // Getting ACCELERATOR Devices and selecting 1st such device
  std::vector<cl::Device> devices;
  OCL_CHECK(err,
            err = platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices));
  return devices;
}

std::vector<cl::Device> get_xil_devices() { return get_devices("Xilinx"); }

std::vector<unsigned char>
read_binary_file(const std::string &xclbin_file_name) {
//  std::cout << "INFO: Reading " << xclbin_file_name << std::endl;
  FILE *fp;
  if ((fp = fopen(xclbin_file_name.c_str(), "r")) == NULL) {
    printf("ERROR: %s xclbin not available please build\n",
           xclbin_file_name.c_str());
    exit(EXIT_FAILURE);
  }
  // Loading XCL Bin into char buffer
//  std::cout << "Loading: '" << xclbin_file_name.c_str() << "'\n";
  std::ifstream bin_file(xclbin_file_name.c_str(), std::ifstream::binary);
  bin_file.seekg(0, bin_file.end);
  auto nb = bin_file.tellg();
  bin_file.seekg(0, bin_file.beg);
  std::vector<unsigned char> buf;
  buf.resize(nb);
  bin_file.read(reinterpret_cast<char *>(buf.data()), nb);
  return buf;
}

bool is_emulation() {
  bool ret = false;
  char *xcl_mode = getenv("XCL_EMULATION_MODE");
  if (xcl_mode != NULL) {
    ret = true;
  }
  return ret;
}

bool is_hw_emulation() {
  bool ret = false;
  char *xcl_mode = getenv("XCL_EMULATION_MODE");
  if ((xcl_mode != NULL) && !strcmp(xcl_mode, "hw_emu")) {
    ret = true;
  }
  return ret;
}

bool is_xpr_device(const char *device_name) {
  const char *output = strstr(device_name, "xpr");

  if (output == NULL) {
    return false;
  } else {
    return true;
  }
}
}; // namespace xcl

xcl2.hpp

C/C++
/**********
Copyright (c) 2018, Xilinx, Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********/

#pragma once

#define CL_HPP_CL_1_2_DEFAULT_BUILD
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS

// OCL_CHECK doesn't work if call has templatized function call
#define OCL_CHECK(error, call)                                                 \
  call;                                                                        \
  if (error != CL_SUCCESS) {                                                   \
    printf("%s:%d Error calling " #call ", error code is: %d\n", __FILE__,     \
           __LINE__, error);                                                   \
    exit(EXIT_FAILURE);                                                        \
  }

#include <CL/cl2.hpp>
#include <CL/cl_ext_xilinx.h>
#include <fstream>
#include <iostream>
// When creating a buffer with user pointer (CL_MEM_USE_HOST_PTR), under the
// hood
// User ptr is used if and only if it is properly aligned (page aligned). When
// not
// aligned, runtime has no choice but to create its own host side buffer that
// backs
// user ptr. This in turn implies that all operations that move data to and from
// device incur an extra memcpy to move data to/from runtime's own host buffer
// from/to user pointer. So it is recommended to use this allocator if user wish
// to
// Create Buffer/Memory Object with CL_MEM_USE_HOST_PTR to align user buffer to
// the
// page boundary. It will ensure that user buffer will be used when user create
// Buffer/Mem Object with CL_MEM_USE_HOST_PTR.
template <typename T> struct aligned_allocator {
  using value_type = T;

  aligned_allocator() {}

  aligned_allocator(const aligned_allocator &) {}

  template <typename U> aligned_allocator(const aligned_allocator<U> &) {}

  T *allocate(std::size_t num) {
    void *ptr = nullptr;

#if defined(_WINDOWS)
    {
      ptr = _aligned_malloc(num * sizeof(T), 4096);
      if (ptr == NULL) {
        std::cout << "Failed to allocate memory" << std::endl;
        exit(EXIT_FAILURE);
      }
    }
#else
    {
      if (posix_memalign(&ptr, 4096, num * sizeof(T)))
        throw std::bad_alloc();
    }
#endif
    return reinterpret_cast<T *>(ptr);
  }
  void deallocate(T *p, std::size_t num) {
#if defined(_WINDOWS)
    _aligned_free(p);
#else
    free(p);
#endif
  }
};

namespace xcl {
std::vector<cl::Device> get_xil_devices();
std::vector<cl::Device> get_devices(const std::string &vendor_name);
std::vector<unsigned char>
read_binary_file(const std::string &xclbin_file_name);
bool is_emulation();
bool is_hw_emulation();
bool is_xpr_device(const char *device_name);
class Stream {
public:
  static decltype(&clCreateStream) createStream;
  static decltype(&clReleaseStream) releaseStream;
  static decltype(&clReadStream) readStream;
  static decltype(&clWriteStream) writeStream;
  static decltype(&clPollStreams) pollStreams;
  static void init(const cl_platform_id &platform) {
    void *bar =
        clGetExtensionFunctionAddressForPlatform(platform, "clCreateStream");
    createStream = (decltype(&clCreateStream))bar;
    bar = clGetExtensionFunctionAddressForPlatform(platform, "clReleaseStream");
    releaseStream = (decltype(&clReleaseStream))bar;
    bar = clGetExtensionFunctionAddressForPlatform(platform, "clReadStream");
    readStream = (decltype(&clReadStream))bar;
    bar = clGetExtensionFunctionAddressForPlatform(platform, "clWriteStream");
    writeStream = (decltype(&clWriteStream))bar;
    bar = clGetExtensionFunctionAddressForPlatform(platform, "clPollStreams");
    pollStreams = (decltype(&clPollStreams))bar;
  }
};
class P2P {
public:
  static decltype(&xclGetMemObjectFd) getMemObjectFd;
  static decltype(&xclGetMemObjectFromFd) getMemObjectFromFd;
  static void init(const cl_platform_id &platform) {
    void *bar =
        clGetExtensionFunctionAddressForPlatform(platform, "xclGetMemObjectFd");
    getMemObjectFd = (decltype(&xclGetMemObjectFd))bar;
    bar = clGetExtensionFunctionAddressForPlatform(platform, "xclGetMemObjectFromFd");
    getMemObjectFromFd = (decltype(&xclGetMemObjectFromFd))bar;
}
};
class Ext {
public:
  static decltype(&xclGetComputeUnitInfo) getComputeUnitInfo;
  static void init(const cl_platform_id &platform) {
    void *bar =
        clGetExtensionFunctionAddressForPlatform(platform, "xclGetComputeUnitInfo");
    getComputeUnitInfo = (decltype(&xclGetComputeUnitInfo))bar;
}
};
}

xfft.hpp

C/C++
/*----------------------------------------------------------------------------
**      _____
**     *     *
**    *____   *____
**   * *===*   *==*
**  *___*===*___**  AVNET
**       *======*
**        *====*
**----------------------------------------------------------------------------
**
** This design is the property of Avnet.  Publication of this
** design is not authorized without written consent from Avnet.
**
** Disclaimer:
**    Avnet, Inc. makes no warranty for the use of this code or design.
**    This code is provided  "As Is". Avnet, Inc assumes no responsibility for
**    any errors, which may appear in this code, nor does it make a commitment
**    to update the information contained herein. Avnet, Inc specifically
**    disclaims any implied warranties of fitness for a particular purpose.
**                     Copyright(c) 2020 Avnet, Inc.
**                             All rights reserved.
**
**----------------------------------------------------------------------------
**
** Create Date:         January 15, 2020
** File Name:           xfft.hpp
**
** Tool versions:       Vitis 2020.1
**
** Description:         Class to encapsulate OpenCL code that executes a Xilinx 
**                      FFT accelerator in programmable logic.
**
** Revision:            Jan. 15, 2020: Initial version
**
**----------------------------------------------------------------------------*/

#ifndef XFFT_H_
#define XFFT_H_

#include <complex>
#include <vector>
#include "xcl2.hpp"

class xfft
{
  public:
   
    xfft()
    {
      /* Open device */
      auto devices = xcl::get_xil_devices();
      auto device  = devices[0];
     
      /* Create command queue */
      OCL_CHECK(err, context = cl::Context({device}, NULL, NULL, NULL, &err));
      OCL_CHECK(err, q = cl::CommandQueue(context, {device}, CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err));
      
      /* Load kernel */
      auto xclbin_file = xcl::read_binary_file("fft.xclbin");
      cl::Program::Binaries bins{{xclbin_file.data(), xclbin_file.size()}};
      OCL_CHECK(err, cl::Program program(context, {device}, bins, NULL, &err));

      if (err != CL_SUCCESS)
      {
        std::cout << "Failed to program device with xclbin file!\n";
        exit(EXIT_FAILURE);
      }
      else
      {
        OCL_CHECK(err, krnl_fft = cl::Kernel(program, "fft_infc", &err));
      }
    }

    ~xfft()
    {
      /* Unmap memory objects */
      OCL_CHECK(err, err = q.enqueueUnmapMemObject(buffer_in[0], wr_buff));
      OCL_CHECK(err, err = q.enqueueUnmapMemObject(buffer_out[0], rd_buff));
      q.finish();
      buffer_in.clear();
      buffer_out.clear();
    }

    void init( int fft_select, int num_fft, int fft_dir )
    {
      l_buffer_size = sizeof(std::complex<float>) * (num_fft << fft_select);

      /* Allocate device memory */
      OCL_CHECK(err, buffer_in.push_back(cl::Buffer(context, CL_MEM_READ_ONLY, l_buffer_size, NULL, &err)));
      OCL_CHECK(err, buffer_out.push_back(cl::Buffer(context, CL_MEM_WRITE_ONLY, l_buffer_size, NULL, &err)));
      OCL_CHECK(err, wr_buff = (cfloat_t *)q.enqueueMapBuffer(buffer_in[0], CL_TRUE, CL_MAP_WRITE, 0, l_buffer_size, NULL, NULL, &err));
      OCL_CHECK(err, rd_buff = (cfloat_t *)q.enqueueMapBuffer(buffer_out[0], CL_TRUE, CL_MAP_READ, 0, l_buffer_size, NULL, NULL, &err));      
      
      /* Store parameters */
      l_fft_select = fft_select;
      l_num_fft    = num_fft;
      l_fft_dir    = fft_dir;
    }

    void* in_buffer()
    {
      return (void *)wr_buff;
    }

    void* out_buffer()
    {
      return (void *)rd_buff;
    }

    void execute()
    {
      /* Setup kernel parameters */
      OCL_CHECK(err, err = krnl_fft.setArg(0, l_fft_select));
      OCL_CHECK(err, err = krnl_fft.setArg(1, l_num_fft));
      OCL_CHECK(err, err = krnl_fft.setArg(2, l_fft_dir));
      OCL_CHECK(err, err = krnl_fft.setArg(3, buffer_in[0]));
      OCL_CHECK(err, err = krnl_fft.setArg(4, buffer_out[0]));
      
      /* Enqueue the kernel task */
      cl::Event event_sp;
      OCL_CHECK(err, err = q.enqueueTask(krnl_fft, NULL, &event_sp));
      
      /* Wait for completion */
      clWaitForEvents(1, (const cl_event *)&event_sp);
    }

    typedef std::complex<float> cfloat_t;

    const static int FWD_FFT = 1;
    const static int INV_FFT = 0;

  private:
    
    int l_fft_select;
    int l_num_fft;
    int l_fft_dir;
    cfloat_t *wr_buff;
    cfloat_t *rd_buff;

    size_t l_buffer_size;
    cl_int err;
    cl::Context context;
    cl::Kernel krnl_fft;
    cl::CommandQueue q;

    std::vector<cl::Buffer> buffer_in;
    std::vector<cl::Buffer> buffer_out;    
};

#endif // XFFT_H_

connections.cfg

Tex
# ----------------------------------------------------------------------------
#
#        ** **        **          **  ****      **  **********  ********** ®
#       **   **        **        **   ** **     **  **              **
#      **     **        **      **    **  **    **  **              **
#     **       **        **    **     **   **   **  *********       **
#    **         **        **  **      **    **  **  **              **
#   **           **        ****       **     ** **  **              **
#  **  .........  **        **        **      ****  **********      **
#     ...........
#                                     Reach Further™
#
# ----------------------------------------------------------------------------
#
#  This design is the property of Avnet.  Publication of this
#  design is not authorized without written consent from Avnet.
#
#  For product information and support questions:
#     https://www.element14.com/community/community/designcenter/zedboardcommunity
#
#  Disclaimer:
#     Avnet, Inc. makes no warranty for the use of this code or design.
#     This code is provided  "As Is". Avnet, Inc assumes no responsibility for
#     any errors, which may appear in this code, nor does it make a commitment
#     to update the information contained herein. Avnet, Inc specifically
#     disclaims any implied warranties of fitness for a particular purpose.
#                      Copyright(c) 2020 Avnet, Inc.
#                              All rights reserved.
#
# ----------------------------------------------------------------------------

[clock]
#########################
# clock id  0 = 150 MHz #
# clock id  1 = 300 MHz #
# clock id  2 =  75 MHz #
# clock id  3 = 100 MHz #
# clock id  4 = 200 MHz #
# clock id  5 = 400 MHz #
# clock id  6 = 600 MHz #
#########################

id=1:fft_infc_1.ap_clk
id=1:fft_1.ap_clk

[connectivity]
################################
# AXI-MM Interfaces            #
################################
sp=fft_infc_1.m_axi_gmem1:HP0
sp=fft_infc_1.m_axi_gmem2:HP1

######################################
# Kernel-to-kernel Stream Interfaces #
######################################
sc=fft_infc_1.config:fft_1.S_AXIS_CONFIG
sc=fft_infc_1.strm_out:fft_1.S_AXIS_DATA
sc=fft_1.M_AXIS_DATA:fft_infc_1.strm_in

[advanced]
misc=:solution_name=link
param=compiler.addOutputTypes=sd_card

[vivado]
prop=run.impl_1.strategy=Performance_Explore

Credits

Tom Simpson

Tom Simpson

5 projects • 71 followers
DSP & Machine Learning specialist at Avnet

Comments