This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

TMDSEVM6678: is there any tutorial explaining OMPDSP/fftlib/src/fft_omp_sp_1d_c2c/fft_omp_sp_1d_c2c_d.c ?

Part Number: TMDSEVM6678
Other Parts Discussed in Thread: FFTLIB, TEST

Hi All,

Is there any tutorial or sth that explains the below example? I only know C/C++ pretty well and a little bit of hardware architecture (I know the concepts to some extent but haven't programmed for HW myself ie. EDMA, L1/L2 Cache, Shared memory etc. ) .... I need to fully understand how take fft of a signal very fast .... so I am checking unit tests developed for the fftlib ....

Unit Test Example: OMPDSP/fftlib/src/fft_omp_sp_1d_c2c/fft_omp_sp_1d_c2c_d.c ?

Does any have any tutorial or a step by step manual to clarify steps of the fftlib unit tests in a more detailed fashion? (Of course I can follow the function documents and read here and there)

For eg: what does fft_omp_assign_edma_resources and fft_omp_free_edma_resources would do ? 

Thanks so much,

Mike

    /* --------------------------------------------------------------------- */
    /* intialize hardware timers                                             */
    /* ---------------------------------------------------float------------ */
    TSCL=0;TSCH=0;

    /* initalize callout functions */
    plan_fxns.memoryRequest   = NULL;
    plan_fxns.memoryRelease   = NULL;
    plan_fxns.ecpyRequest     = fft_omp_assign_edma_resources;
    plan_fxns.ecpyRelease     = fft_omp_free_edma_resources;

here is the whole code 

/* ======================================================================= */
/*  TEXAS INSTRUMENTS, INC.                                                */
/*                                                                         */
/*  FFTLIB  FFT Library                                                    */
/*                                                                         */
/* Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/  */ 
/*                                                                         */
/*                                                                         */
/*  Redistribution and use in source and binary forms, with or without     */
/*  modification, are permitted provided that the following conditions     */
/*  are met:                                                               */
/*                                                                         */
/*    Redistributions of source code must retain the above copyright       */
/*    notice, this list of conditions and the following disclaimer.        */
/*                                                                         */
/*    Redistributions in binary form must reproduce the above copyright    */
/*    notice, this list of conditions and the following disclaimer in the  */
/*    documentation and/or other materials provided with the               */
/*    distribution.                                                        */
/*                                                                         */
/*    Neither the name of Texas Instruments Incorporated nor the names of  */
/*    its contributors may be used to endorse or promote products derived  */
/*    from this software without specific prior written permission.        */
/*                                                                         */
/*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS    */
/*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT      */
/*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR  */
/*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT   */
/*  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,  */
/*  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT       */
/*  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  */
/*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  */
/*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT    */
/*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  */
/*  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.   */
/*                                                                         */
/* ======================================================================= */

#include <xdc/std.h>
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <limits.h>
#include <math.h>
#include <c6x.h>

#include <ti/fftlib/src/common/omp/omp_config.h>
#include <ti/fftlib/src/common/fft_common_d.h>
#include "fft_omp_sp_1d_c2c.h"
#include <ti/dsplib/src/DSPF_sp_fftSPxSP/DSPF_sp_fftSPxSP.h>
#include <ti/runtime/openmp/omp.h>

/* calculate radix for 1D array */
static void calculate_rad (int N, int *n1, int *n2, fft_para_mix_bs_t *fft_para1,
                    fft_para_mix_bs_t *fft_para2, int *use_bs)
{
  int i, j;
  int s_r3, s_r5, s_r3_2, s_r5_2, dum, dum1;

  *use_bs = 1;

  s_r3 = 0;
  s_r5 = 0;

  dum = N;

  while (dum/3*3 == dum) {
    s_r3++;
    dum /= 3;
  }

  while (dum/5*5 == dum) {
    s_r5++;
    dum /= 5;
  }

  dum1 = dum >> 1;
  j    = 0;
  while (dum1 > 0) {
    dum1 = dum1 >> 1;
    j++;
  }
  j = 30 - _norm(dum);

  if ((dum >= 64) && (dum == (1 << j))) {
    *use_bs = 0;
    *n1 = 1 << (j-j/2);
    *n2 = 1 << (j/2);

    s_r5_2 = s_r5/2;
    s_r3_2 = s_r3/2;
    s_r3   = s_r3 - s_r3_2;
    s_r5   = s_r5 - s_r5_2;

    fft_para1->N_p2 = *n1;
    for (i = 0; i < s_r3; i++)
      *n1 *= 3;
    for (i = 0; i < s_r5; i++)
      *n1 *= 5;

    fft_para2->N_p2 = *n2;
    for (i = 0; i < s_r3_2; i++)
      *n2 *= 3;
    for (i = 0; i < s_r5_2; i++)
      *n2 *= 5;

    if(*n1 < *n2){
      dum = *n1;
      *n1 = *n2;
      *n2 = dum;
      dum = fft_para1->N_p2;
      fft_para1->N_p2 = fft_para2->N_p2;
      fft_para2->N_p2 = dum;
    }

    fft_para1->s_r3 = s_r3;
    fft_para1->s_r5 = s_r5;

    fft_para2->s_r3 = s_r3_2;
    fft_para2->s_r5 = s_r5_2;
  } else {
    //not supported, will trigger a plan failure at this point
    return;
  }

} /*calculate_rad ()*/

/* calculate twiddle size, Bn buffer size and workbuffer size */
static void calculate_mem_size(int N, int s_r3, int s_r5, int N_p2, int *twsize)
{
  int dum, n, i;

  /* Please note that Bn buffer and Workbuf are only used when doing Bluestein
   * meaning when use_bs == 1 */
    if ((s_r3 == 0) && (s_r5 == 0)) {
    /* When signal is power of 2 */
      *twsize   = 2;
    }
    else {
    /* When signal is power of 3 or 5 */
      dum = 0;
      n  = 4*N;
      for (i = 0; i < s_r3; i++) {
        dum += n/3;
        n  /= 3;
      }
      n *= 2;  /* radix-5 has 8 twiddle vs radix-3 4 twiddles */
      for (i = 0; i < s_r5; i++) {
        dum += n/5;
        n  /= 5;
      }
      *twsize = (2+dum);
    }
} /* calculate_mem_size()*/

void tw_gen_cn (float *w, int n);
void dft_sp (int N, float x[], float y[], int N1);

/* ======================================================================== */
/*  Kernel-specific alignments                                              */
/* ======================================================================== */
#pragma DATA_SECTION(x_i, ".mem_ddr");
#pragma DATA_SECTION(y_i, ".mem_ddr");
#pragma DATA_SECTION(w_i, ".mem_ddr");
#pragma DATA_SECTION(x_cn, ".mem_ddr");
#pragma DATA_SECTION(y_cn, ".mem_ddr");
#pragma DATA_SECTION(w_cn, ".mem_ddr");

#pragma DATA_ALIGN(x_i,  8);
#pragma DATA_ALIGN(x_cn, 8);

#pragma DATA_ALIGN(w_i,  8);
#pragma DATA_ALIGN(w_cn, 8);

#pragma DATA_ALIGN(y_i,  8);
#pragma DATA_ALIGN(y_cn, 8);

#pragma DATA_SECTION(local_work, ".mem_l2");

#pragma DATA_ALIGN(local_work,  64);

/* ======================================================================== */
/*  Parameters of fixed dataset.                                            */
/* ======================================================================== */

#ifdef FFT_MEM_MODEL_LG
#define MAXN  (1024*1024)
#define M_i   (8*1024)
#else
# ifdef FFT_MEM_MODEL_MED
# define MAXN  (512*512)
# define M_i   (8*512)
# else
#   ifdef FFT_MEM_MODEL_SM
#   define MAXN  (512*512)
#   define M_i   (4*512)
#   else
#     error "Unsupported MEM MODEL!"
#   endif
# endif
#endif
#define M     (2*MAXN)
#define PAD   (0)

/* ======================================================================== */
/*  Initialized arrays with fixed test data.                                */
/* ======================================================================== */

float x_i [M + 2 * PAD];
float x_cn[M + 2 * PAD];

float y_i [M + 2 * PAD];
float y_cn[M + 2 * PAD];

float w_i [2*2048 + 4 + 2 * PAD];
float w_cn[M + 2 * PAD];

#ifdef FFT_MEM_MODEL_LG
float local_work [16432*2 + M_i*10 + 1024*2 + 2 * PAD];
#else
# ifdef FFT_MEM_MODEL_MED
  float local_work [M_i*10 + 1024*2 + 2 * PAD];
# else
#   ifdef FFT_MEM_MODEL_SM
    float local_work [M_i*10 + 1024*2 + 2 * PAD];
#   else
#     error "Unsupported MEM MODEL!"
#   endif
# endif
#endif

/* ======================================================================== */
/*  Generate pointers to skip beyond array padding                          */
/* ======================================================================== */
float *const ptr_x_i  = x_i  + PAD;
float *const ptr_x_cn = x_cn + PAD;

float *const ptr_w_i  = w_i  + PAD;
float *const ptr_w_cn = w_cn + PAD;

float *const ptr_y_i  = y_i  + PAD;
float *const ptr_y_cn = y_cn + PAD;

float *const ptr_local_work  = local_work + PAD;

/* ======================================================================== */
/*  MAIN -- Top level driver for the test.                                  */
/* ======================================================================== */

int main ()
{
    int    i, j, N, rad_cn;
    int    n1, n2;
    int    int_tw_size, ext_tw_size, twsize, localsize;
    int    use_bs = 0;
    int    s_r3, s_r5, N_p2;

    /* BaseN calculation based upon number of cores & cache lines for 4step fft */
    int     baseN = FFT_OMP_SP_1D_C2C_NUMOFLINEBUFS*FFT_OMP_SP_1D_C2C_NUMOFLINEBUFS;
    int     rad3 = 1, rad5 = 1, rad15 = 1;

    clock_t t_start, t_stop, t_overhead, t_opt;
    float  diff, max_diff = 0;
    fft_plan_t p;
    fft_callout_t plan_fxns;
    size_t  l2_SRAM_size_orig;
    uint32_t *temp;
    lib_memdscr_t **fft_mem_handle = fftGetMemHandle();

    /* --------------------------------------------------------------------- */
    /* intialize hardware timers                                             */
    /* ---------------------------------------------------float------------ */
    TSCL=0;TSCH=0;

    /* initalize callout functions */
    plan_fxns.memoryRequest   = NULL;
    plan_fxns.memoryRelease   = NULL;
    plan_fxns.ecpyRequest     = fft_omp_assign_edma_resources;
    plan_fxns.ecpyRelease     = fft_omp_free_edma_resources;
    
    omp_set_num_threads (OMP_MAX_NUM_CORES);
    fft_config_memory (&l2_SRAM_size_orig);
    temp = (uint32_t *)lib_smem_falloc(fft_mem_handle, 256*6*sizeof(uint32_t), 3);
#if 1
    if(temp==NULL) {
        printf("Memory allocation error!\n");
        return;
    }
#endif

    /* initialize ECPY */
    #pragma omp parallel
    {
      fft_assert( (lib_emt_init() == LIB_EMT_SUCCESS), DNUM, "lib_emt_init() return error!");
      fftEdmaState[DNUM] = FFT_EDMA_STATE_INIT;
    }
#if 1
    /* radix 2&4 testing */
    for (N = 4096; N <= MAXN; N = N*2)
    {

        memset (x_i,  0x55, sizeof (x_i) );
        memset (x_cn, 0x55, sizeof (x_cn));

        /* ---------------------------------------------------------------- */
        /* Initialize input vector temporarily.                             */
        /* ---------------------------------------------------------------- */

        for (i = 0; i < N; i++) {
          x_cn[PAD + 2*i  ] = sin (2 * 3.1415 * 50 * i / (double) N);
          x_cn[PAD + 2*i+1] = sin (2 * 3.1415 * 60 * i / (float) N);
        }
        for (j = 0; j < 2*N; j++) {
          x_i[PAD + j] = x_cn[PAD + j];
        }
        
        /* ---------------------------------------------------------------- */
        /* Force uninitialized arrays to fixed values.                      */
        /* ---------------------------------------------------------------- */
        memset (y_i,  0xA5, sizeof (y_i) );
        memset (y_cn, 0xA5, sizeof (y_cn));

        /* ---------------------------------------------------------------- */
        /* Generate twiddle factors.                                        */
        /* ---------------------------------------------------------------- */
        j = 0;
        for (i = 0; i <= 31; i++)
            if ((N & (1 << i)) == 0)
                j++;
            else
                break;

        if (j % 2 == 0) {
            rad_cn = 4;
        }
        else {
            rad_cn = 2;
        }

        tw_gen_cn (ptr_w_cn, N);

//        dft_sp (N, ptr_x_cn, ptr_y_cn, N);
        DSPF_sp_fftSPxSP (N, ptr_x_cn, ptr_w_cn, ptr_y_cn, NULL, rad_cn, 0, N);

        /* ARM part of plan */
        /* determine the rad for the first  & second dimensions */
        calculate_rad(N, &n1, &n2, &p.u.sp_1d_c2c_e.para1, &p.u.sp_1d_c2c_e.para2, &use_bs);
        p.u.sp_1d_c2c_e.n1 = n1;
        p.u.sp_1d_c2c_e.n2 = n2;
        /* not supported, plan failed */
        if (use_bs == 1) {
        }

        /* Calculate mem size for 2 dimensions for ECPY */
        /* evaluate the  1st dimension */
        s_r3 = p.u.sp_1d_c2c_e.para1.s_r3;
        s_r5 = p.u.sp_1d_c2c_e.para1.s_r5;
        N_p2 = p.u.sp_1d_c2c_e.para1.N_p2;
        calculate_mem_size(n1, s_r3, s_r5, N_p2, &twsize);
        p.u.sp_1d_c2c_e.para1.twsize = twsize;
        int_tw_size = ext_tw_size = twsize + 2*n2;

        /* evaluate the 2nd dimension */
        s_r3 = p.u.sp_1d_c2c_e.para2.s_r3;
        s_r5 = p.u.sp_1d_c2c_e.para2.s_r5;
        N_p2 = p.u.sp_1d_c2c_e.para2.N_p2;
        calculate_mem_size(n2, s_r3, s_r5, N_p2, &twsize);
        p.u.sp_1d_c2c_e.para2.twsize = twsize;
        int_tw_size  = (int_tw_size > twsize) ? int_tw_size : twsize;
        ext_tw_size += twsize;

        /* calculate local memory requirements */
#ifdef FFT_MEM_MODEL_LG
        localsize = sizeof(float)*(1024*2 + 10*n1*FFT_OMP_SP_1D_C2C_NUMOFLINEBUFS + int_tw_size);
#else
# ifdef FFT_MEM_MODEL_MED
        localsize = sizeof(float)*(1024*2 + 10*n1*FFT_OMP_SP_1D_C2C_NUMOFLINEBUFS);
# else
#   ifdef FFT_MEM_MODEL_SM
        localsize = sizeof(float)*(1024*2 + 10*n1*FFT_OMP_SP_1D_C2C_NUMOFLINEBUFS);
#   else
#     error "Unsupported MEM MODEL!"
#   endif
# endif
#endif
        printf("n1 = %d, n2 = %d, localsize = %d, ext_tw_size = %d\n", n1, n2, localsize, ext_tw_size);

        fft_omp_sp_plan_1d_c2c (N, FFT_ECPY, plan_fxns, &p, ptr_x_i ,ptr_y_i, ptr_w_i);

        /* ---------------------------------------------------------------- */
        /* Compute the overhead of allocating and freeing EDMA              */
        /* ---------------------------------------------------------------- */
        p.edmaState = (*p.fftcout.ecpyRequest)(temp, FFT_NUM_EDMA_CH*FFT_MAX_EDMA_LINKS_3D*sizeof(float));
        (*p.fftcout.ecpyRelease)(p.edmaState);
        p.edmaState = (*p.fftcout.ecpyRequest)(temp, FFT_NUM_EDMA_CH*FFT_MAX_EDMA_LINKS_3D*sizeof(float));
        (*p.fftcout.ecpyRelease)(p.edmaState);
        t_start = _itoll(TSCH, TSCL);
        p.edmaState = (*p.fftcout.ecpyRequest)(temp, FFT_NUM_EDMA_CH*FFT_MAX_EDMA_LINKS_3D*sizeof(float));
        (*p.fftcout.ecpyRelease)(p.edmaState);
        t_stop  = _itoll(TSCH, TSCL);
        t_overhead = t_stop - t_start;

        /* ---------------------------------------------------------------------- */
        /* Set the number of cores used                                           */
        /* ---------------------------------------------------------------------- */
        p.actualCoreNum = OMP_MAX_NUM_CORES;

        /***************************************
         * ecpy fft test
         ***************************************/
        t_start = _itoll(TSCH, TSCL);
        p.local = local_work;
        fft_execute (p);
        t_stop = _itoll(TSCH, TSCL);
//        fft_destroy_plan (p);
        t_opt  = (t_stop - t_start) - t_overhead;

        /* ---------------------------------------------------------------- */
        /* compute difference and track max difference                      */  
        /* ---------------------------------------------------------------- */
        diff = 0; max_diff = 0;
        for(i=0; i<2*N; i++) {
          diff = _fabs(ptr_y_cn[i] - ptr_x_i[i]);
          if (diff > max_diff) max_diff = diff;
        }
        
        printf("fft_omp_sp_1d_c2c_ecpy\tsize= %d\n", N);
        printf("max_diff = %f", max_diff);
        printf("\tN = %d\tCycle: %d\n\n", N, t_opt);
    }
#endif
/* Setup initial starting points for MC ECPY */
while (rad3*baseN  < FFT_OMP_SP_1D_C2C_4STEP_MIN_SIZE) rad3*=3;
while (rad5*baseN  < FFT_OMP_SP_1D_C2C_4STEP_MIN_SIZE) rad5*=5;
while (rad15*baseN < FFT_OMP_SP_1D_C2C_4STEP_MIN_SIZE) rad15*=15;
    /* radix 3 & 5 testing */
    while ( rad15*baseN <= 100000)
    {
      /*generate a sequence of numbers guaranteed to be radix 3, 5 or 3 & 5 */
      if ( rad3*baseN < 50000){
        N = baseN*rad3;
        rad3*=3;
      } else if ( rad5*baseN < 50000) {
        N = baseN*rad5;
        rad5*=5;
      } else {
        N = baseN*rad15;
        rad15*=15;
      }
        while (N < 1024)
          N = N*3*2;

        memset (x_i,  0x55, sizeof (x_i) );
        memset (x_cn, 0x55, sizeof (x_cn));
        memset (w_i,  0x00, sizeof (w_i) );

        /* ---------------------------------------------------------------- */
        /* Initialize input vector temporarily.                             */
        /* ---------------------------------------------------------------- */

        for (i = 0; i < N; i++) {
          x_cn[PAD + 2*i  ] = sin (2 * 3.1415 * 50 * i / (float) N);
          x_cn[PAD + 2*i+1] = sin (2 * 3.1415 * 60 * i / (float) N);
        }
        for (j = 0; j < 2*N; j++) {
          x_i[PAD + j] = x_cn[PAD + j];
        }
        
        /* ---------------------------------------------------------------- */
        /* Force uninitialized arrays to fixed values.                      */
        /* ---------------------------------------------------------------- */
        memset (y_i,  0xA5, sizeof (y_i) );
        memset (y_cn, 0xA5, sizeof (y_cn));

        dft_sp (N, ptr_x_cn, ptr_y_cn, N);

        printf("done DFT calculation \n");

        /* ARM part of plan */
        /* determine the rad for the first  & second dimensions */
        calculate_rad(N, &n1, &n2, &p.u.sp_1d_c2c_e.para1, &p.u.sp_1d_c2c_e.para2, &use_bs);
        p.u.sp_1d_c2c_e.n1 = n1;
        p.u.sp_1d_c2c_e.n2 = n2;
        /* not supported, plan failed */
        if (use_bs == 1) {
        }

        /* Calculate mem size for 2 dimensions for ECPY */
        /* evaluate the  1st dimension */
        s_r3 = p.u.sp_1d_c2c_e.para1.s_r3;
        s_r5 = p.u.sp_1d_c2c_e.para1.s_r5;
        N_p2 = p.u.sp_1d_c2c_e.para1.N_p2;
        calculate_mem_size(n1, s_r3, s_r5, N_p2, &twsize);
        p.u.sp_1d_c2c_e.para1.twsize = twsize;
        int_tw_size = ext_tw_size = twsize + 2*n2;

        /* evaluate the 2nd dimension */
        s_r3 = p.u.sp_1d_c2c_e.para2.s_r3;
        s_r5 = p.u.sp_1d_c2c_e.para2.s_r5;
        N_p2 = p.u.sp_1d_c2c_e.para2.N_p2;
        calculate_mem_size(n2, s_r3, s_r5, N_p2, &twsize);
        p.u.sp_1d_c2c_e.para2.twsize = twsize;
        int_tw_size  = (int_tw_size > twsize) ? int_tw_size : twsize;
        ext_tw_size += twsize;

        /* calculate local memory requirements */
#ifdef FFT_MEM_MODEL_LG
        localsize = sizeof(float)*(1024*2 + 10*n1*FFT_OMP_SP_1D_C2C_NUMOFLINEBUFS + int_tw_size);
#else
# ifdef FFT_MEM_MODEL_MED
        localsize = sizeof(float)*(1024*2 + 10*n1*FFT_OMP_SP_1D_C2C_NUMOFLINEBUFS);
# else
#   ifdef FFT_MEM_MODEL_SM
        localsize = sizeof(float)*(1024*2 + 10*n1*FFT_OMP_SP_1D_C2C_NUMOFLINEBUFS);
#   else
#     error "Unsupported MEM MODEL!"
#   endif
# endif
#endif

        printf("n1 = %d, n2 = %d, localsize = %d, ext_tw_size = %d\n", n1, n2, localsize, ext_tw_size);

        fft_omp_sp_plan_1d_c2c (N, FFT_ECPY, plan_fxns, &p, ptr_x_i ,ptr_y_i, ptr_w_i);

        /* ---------------------------------------------------------------- */
        /* Compute the overhead of allocating and freeing EDMA              */
        /* ---------------------------------------------------------------- */
        p.edmaState = (*p.fftcout.ecpyRequest)(temp, FFT_NUM_EDMA_CH*FFT_MAX_EDMA_LINKS_3D*sizeof(float));
        (*p.fftcout.ecpyRelease)(p.edmaState);
        p.edmaState = (*p.fftcout.ecpyRequest)(temp, FFT_NUM_EDMA_CH*FFT_MAX_EDMA_LINKS_3D*sizeof(float));
        (*p.fftcout.ecpyRelease)(p.edmaState);
        t_start = _itoll(TSCH, TSCL);
        p.edmaState = (*p.fftcout.ecpyRequest)(temp, FFT_NUM_EDMA_CH*FFT_MAX_EDMA_LINKS_3D*sizeof(float));
        (*p.fftcout.ecpyRelease)(p.edmaState);
        t_stop  = _itoll(TSCH, TSCL);
        t_overhead = t_stop - t_start;

        /* ---------------------------------------------------------------------- */
        /* Set the number of cores used                                           */
        /* ---------------------------------------------------------------------- */
        p.actualCoreNum = OMP_MAX_NUM_CORES;

        /***************************************
         * ecpy fft test
         ***************************************/
        t_start = _itoll(TSCH, TSCL);
        p.local = local_work;
        fft_execute (p);
        t_stop = _itoll(TSCH, TSCL);
//        fft_destroy_plan (p);
        t_opt  = (t_stop - t_start) - t_overhead;

        /* ---------------------------------------------------------------- */
        /* compute difference and track max difference                      */  
        /* ---------------------------------------------------------------- */
        diff = 0; max_diff = 0;
        for(i=0; i<2*N; i++) {
          diff = _fabs(ptr_y_cn[i] - ptr_x_i[i]);
          if (diff > max_diff) max_diff = diff;
        }
        
        printf("fft_omp_sp_1d_c2c_ecpy\tsize= %d\n", N);
        printf("max_diff = %f", max_diff);
        printf("\tN = %d\tCycle: %d\n\n", N, t_opt);
    }
}

/* Function for generating Specialized sequence of twiddle factors */
void tw_gen_cn (float *w, int n)
{
    int i, j, k;
    const double PI = 3.141592654;

    for (j = 1, k = 0; j <= n >> 2; j = j << 2)
    {
        for (i = 0; i < n >> 2; i += j)
        {
#ifdef _LITTLE_ENDIAN
            w[k]     = (float) sin (2 * PI * i / n);
            w[k + 1] = (float) cos (2 * PI * i / n);
            w[k + 2] = (float) sin (4 * PI * i / n);
            w[k + 3] = (float) cos (4 * PI * i / n);
            w[k + 4] = (float) sin (6 * PI * i / n);
            w[k + 5] = (float) cos (6 * PI * i / n);
#else
            w[k]     = (float)  cos (2 * PI * i / n);
            w[k + 1] = (float) -sin (2 * PI * i / n);
            w[k + 2] = (float)  cos (4 * PI * i / n);
            w[k + 3] = (float) -sin (4 * PI * i / n);
            w[k + 4] = (float)  cos (6 * PI * i / n);
            w[k + 5] = (float) -sin (6 * PI * i / n);
#endif
            k += 6;
        }
    }
}

/* Function for calculating any size DFT */
void dft_sp (int N, float x[], float y[], int N1)
{
    int k, i, index;
    const float PI = 3.14159265358979323846;
    float *p_x;
    float arg, fx_0, fx_1, fy_0, fy_1, co, si;

    for (k = 0; k < N1; k++)
    {
        p_x = x;

        fy_0 = 0;
        fy_1 = 0;
        for (i = 0; i < N; i++)
        {
            fx_0 = p_x[0];
            fx_1 = p_x[1];
            p_x += 2;
            index = (i * k) % N;
            arg = 2 * PI * index / N;
            co = cos (arg);
            si = -sin (arg);
            fy_0 += ((fx_0 * co) - (fx_1 * si));
            fy_1 += ((fx_1 * co) + (fx_0 * si));
        }
        y[2 * k] = fy_0;
        y[2 * k + 1] = fy_1;
    }
}

/* ======================================================================== */
/*  End of file:  fft_omp_sp_1d_c2c_d.c                                     */
/* ------------------------------------------------------------------------ */
/*            Copyright (c) 2013 Texas Instruments, Incorporated.           */
/*                           All Rights Reserved.                           */
/* ======================================================================== */

  • Mike,

    Other than the FFTLIB documentation and test code, there is currently no tutorial for the package that we currently have. The library was part of our high performance compute offering that is currently in maintenance mode so there is no active development ongoing with this package.

    Regards,

    Rahul