

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <string.h>
#include <math.h>

#include <CL/cl_ext.h>
#include <CL/cl.h>

#include "cblas.h"

#define bool    int
#define true    1
#define false   0

float alpha                         = 1.0f;
float beta                          = 1.0f;
enum CBLAS_ORDER order              = CblasRowMajor;//CblasColMajor;//Set here!!!
typedef enum cblas_tp
{
    TP_NN = 0,
    TP_NT,
    TP_TN,
    TP_TT,
} CBLAS_TP;

CBLAS_TP transpose = TP_NN;			//Set here!!!
enum CBLAS_TRANSPOSE OPA = CblasNoTrans;
enum CBLAS_TRANSPOSE OPB = CblasNoTrans;

bool check                          =true;//false	//Set here!!!
bool random_in                      = false;//Set here!!!
bool calc_check                     = false;

bool read_txt                         =false;//true	//Set here!!!

int M                               = 16;		//Set here!!!
int N                               = 200704;//80000;;		//Set here!!!
int K                               = 27;		//Set here!!!

float *__dsp_malloc(int n, cl_mem_flags type, void** cl_buf);
void __dsp_free(float *x_dsp, void* cl_buf);
void __cblas_sgemm(int TA, int TB, int M, int N, int K, float ALPHA, 
        float* A, int lda, 
        float* B, int ldb,
        const float BETA,
        float* C, int ldc);
void dsp_release_opencl();
int dsp_init_opencl();


void read_float(char* file_name, float** data, int read_data_size, int* data_size)
{
    FILE* file;
    float* currentdata;
    float tempdata = 0;
    int data_num = 0;
    int size = 0;
    int ret, i;
    char str[32] = {0};

    printf("\n Reading %s ... \n", file_name);
    
    file = fopen(file_name, "r+");
    if (!file)
        return;
#if 0
    while (1)
    {
        ret = fscanf(file, "%lf", &tempdata);
        if (ret !=0 && ret != 1) break;
//        fgetc(file);
        printf("[%d]data = %lf \n",data_num, tempdata);
        data_num++;
    }
#endif

    //get line number
    while (!feof(file))
    {
        if (fscanf(file, "%s", str) == -1)
            break;
        data_num++;
        //printf("[%d]data = %s \n",data_num, str);
    }

    *data_size = data_num;
    printf("\ndata num is %d\n", *data_size);

    if (data_num == 0)
        goto End;

    //initial buffer
    if (read_data_size && read_data_size <= data_num)
        size = read_data_size;
    else
        size = data_num;
        
    currentdata = *data = (float*)malloc(size * sizeof(float));
    if (!(*data))
        goto End;
        
    memset(*data, 0, size * sizeof(float));
    rewind(file);

    //The content of 1st line must be "row,col,alpha,beta"
//    if (fscanf(file, "%d,%d,%lf,%lf", row, col, alpha, beta) == -1)
//            goto End;

    //copy data into buffer
//    while (!feof(file))
    while (size)
    {
        if (fscanf(file, "%f", &tempdata) == -1)
            break;

        *currentdata = tempdata;
        currentdata++;
        size--;
        //printf("\ndata = %lf",tempdata);
    }

//    printf("\nrow is %d, col is %d\n", *row, *col);
#if 0
    //show result
    currentdata = data;
    printf("\nread data from %s:\n", file_name);
    for(i = 0; i < data_num; i++)
    {
        printf("[%d]%lf\n", i+1,*currentdata);
        currentdata++;
    }
#endif

End:
    fclose(file);
}

float *dsp_make_array(float *x, int n, cl_mem_flags type, void** cl_buf)
{
    float *x_dsp = NULL;
    size_t size = sizeof(float) * n;
#if 1//ndef USE_LINALG
    x_dsp = __dsp_malloc(n, type, cl_buf);
#else
    x_dsp = (float *) __malloc_ddr(size);
#endif

    if(!x_dsp)
    {
        printf("OpenCL malloc failed\n");
        return NULL;
    }
    if(x)
    {
        memcpy(x_dsp, x, size);
    }
    
    return x_dsp;
}

/*x_dsp could be NULL*/
void dsp_free(float *x_dsp, void* cl_buf)
{
#if 1//ndef USE_LINALG
    if (x_dsp)    __dsp_free(x_dsp, cl_buf);
#else
    if (x_dsp)    __free_ddr(x_dsp);
#endif
}

int main()
{
    printf("first line for the project!!!!\n");

    void* A_cl;
    void* B_cl;
    void* C_cl;
    int VALRANGE = 17;

    (void)dsp_init_opencl();

    float* A = malloc(M*K*sizeof(float));
    float* B = malloc(N*K*sizeof(float));
    float* C = malloc(N*M*sizeof(float));
    
    if (read_txt)
    {
        float* data = NULL;
        int data_size = 0;
        read_float("/tmp/A.txt", &data, M*K, &data_size);
        memcpy(A, data, M*K*sizeof(float));
        free(data);
        read_float("/tmp/B.txt", &data, N*K, &data_size);
        memcpy(B, data, N*K*sizeof(float));
        free(data);
        memset(C, 0, M*N*sizeof(float));
    }
    else
    {
        printf("Generating Input Data ...");
        for (int i = 0; i < M*K; ++i)
           A[i] = random_in ? (float)(rand() % VALRANGE + 1) : 1 + (i & 7);
        for (int i = 0; i < K*N; ++i)
           B[i] = random_in ? (float)(rand() % VALRANGE + 1) : 1 + (i & 11);
        for (int i = 0; i < M*N; ++i)
           C[i] = random_in ? (float)(rand() % VALRANGE + 1) : 1 + (i & 5);
    }

    //float* A_dsp = dsp_make_array(A, M*K, CL_MEM_READ_ONLY, &A_cl);
    //float* B_dsp = dsp_make_array(B, N*K, CL_MEM_READ_ONLY, &B_cl);
    //float* C_dsp = dsp_make_array(C, M*N, CL_MEM_READ_WRITE, &C_cl);

    printf("C[%d,%d] = alpha * A[%d,%d] * B[%d,%d] + beta * C[%d,%d], %s\n",
    	  M,N,M,K,K,N, M,N,
    	  (order == CblasRowMajor ? "use row-major storage"
    							  : "use col-major storage"));
    printf("alpha=%f, beta=%f\n\n", alpha, beta);

    //__cblas_sgemm(0, 0, M,N,K,alpha,A_dsp,K,B_dsp,N,beta,C_dsp,N);
    __cblas_sgemm(0, 0, M,N,K,alpha,A,K,B,N,beta,C,N);

#if 0
    dsp_free(A_dsp, A_cl);
    dsp_free(B_dsp, B_cl);
    dsp_free(C_dsp, C_cl);
#endif
    free(A);
    free(B);
    free(C);

    dsp_release_opencl();

    return 0;
}

void MatmulHost_ATLAS(enum CBLAS_ORDER mem_order,
                const float*A, const float *B, float *C, int M, int N, int K,
                float alpha, float beta)
{
#ifndef _TI_RTOS
    if (mem_order == CblasRowMajor)
    {
        cblas_sgemm(mem_order, OPA, OPB,
                    M, N, K, alpha,
                    A, /* lda = */ K,
                    B, /* ldb = */ N,
                    beta,
                    C, /* ldc = */ N
                   );
    } else {
        cblas_sgemm(mem_order, OPA, OPB,
                    M, N, K, alpha,
                    A, /* lda = */ M,
                    B, /* ldb = */ K,
                    beta,
                    C, /* ldc = */ M
                   );
    }
#endif
}




