/*
 * main.c
 */
#include <ti/sysbios/BIOS.h>
#include <ti/sysbios/knl/Task.h>
#include "Profile.h"


#define height 62
#define width 62
#define L 256

int smooth( int c_input[height][width],int c_output[height][width],int ker[3][3])
{
	int i,j,res;
	int buff[height+2][width+2];
	for(i=0;i<(height+2);i++)
	{
		for(j=0;j<(width+2);j++)
		{
			buff[i][j]=0;
		}
	}
	for(i=0;i<height;i++)
	{
		for(j=0;j<width;j++)
		{
			buff[i+1][j+1]=(c_input[i][j]);
		}
	}

	for(i=0;i<height;i++)
    {
    	for(j=0;j<width;j++)
    	{
    		res=0;
    		res+=buff[i][j]*ker[0][0];
    		res+=buff[i][j+1]*ker[0][1];
    		res+=buff[i][j+2]*ker[0][2];
    		res+=buff[i+1][j]*ker[1][0];
    		res+=buff[i+1][j+1]*ker[1][1];
    		res+=buff[i+1][j+2]*ker[1][2];
    		res+=buff[i+2][j]*ker[2][0];
    		res+=buff[i+2][j+1]*ker[2][1];
    		res+=buff[i+2][j+2]*ker[2][2];
            c_output[i][j]=res/9;
    	}
    }
    return 0;

}

int smoothening_neon( int16_t n_input[height][width],int16_t kernal[3][3],int16_t n_output[height][width])
{


    int i,j;


	int16x8_t NeonK1,NeonK2,NeonK3,NeonK4,NeonK5,NeonK6,NeonK7,NeonK8,NeonK9;
	int16x8_t NeonB1,NeonB2,NeonB3,NeonB4,NeonB5,NeonB6,NeonB7,NeonB8,NeonB9;
	int16x8_t result;
	int a=height+2;
	int b=width+2;
	 int16_t* ptr[a][b];
	 int16_t vbuff[a][b];


	 	for(i=0;i<height+2;i++)
	         {
	 			for(j=0;j<width+2;j++)
	 			{
	 				vbuff[i][j]=0;
	 			}
	          }
	 	for(i=0;i<height;i++)
	 		{

	 			for(j=0;j<width;j++)
	 			{
	 				vbuff[i+1][j+1]=n_input[i][j];

	 			}

	 		}


	        NeonK1=vdupq_n_s16(kernal[0][0]);
			NeonK2=vdupq_n_s16(kernal[0][1]);
			NeonK3=vdupq_n_s16(kernal[0][2]);
			NeonK4=vdupq_n_s16(kernal[1][0]);
			NeonK5=vdupq_n_s16(kernal[1][1]);
			NeonK6=vdupq_n_s16(kernal[1][2]);
			NeonK7=vdupq_n_s16(kernal[2][0]);
			NeonK8=vdupq_n_s16(kernal[2][1]);
			NeonK9=vdupq_n_s16(kernal[2][2]);

			for(i=0;i<height;i++)
			{
			 j=0;
             while(j<width)

				{
                      int16_t zero1=0;
					    result=vdupq_n_s16(zero1);

					                           ptr[i][j+0]=&vbuff[i][j+0];
					    					   ptr[i][j+1]=&vbuff[i][j+1] ;
					    					   ptr[i][j+2]=&vbuff[i][j+2];
					    					   ptr[i][j+3]=&vbuff[i+1][j+0];
					    					   ptr[i][j+4]=&vbuff[i+1][j+1] ;
					    					   ptr[i][j+5]=&vbuff[i+1][j+2] ;
					    					   ptr[i][j+6]=&vbuff[i+2][j+0];
					    					   ptr[i][j+7]=&vbuff[i+2][j+1];
					    					   ptr[i][j+8]=&vbuff[i+2][j+2];

						    NeonB1=vld1q_s16(ptr[i][j+0]);

							NeonB2=vld1q_s16(ptr[i][j+1]);
							NeonB3=vld1q_s16(ptr[i][j+2]);
						//	NeonB2=vextq_s16(NeonB1,NeonB3,2);
						  //  NeonB2=vextq_s16(NeonB3,NeonB2,1);
							NeonB4=vld1q_s16(ptr[i][j+3]);
						   NeonB5=vld1q_s16(ptr[i][j+4]);
							NeonB6=vld1q_s16(ptr[i][j+5]);
						//	NeonB5=vextq_s16(NeonB4,NeonB6,2);
						 //  NeonB5=vextq_s16(NeonB6,NeonB5,1);
							NeonB7=vld1q_s16(ptr[i][j+6]);


							NeonB8=vld1q_s16(ptr[i][j+7]);
							NeonB9=vld1q_s16(ptr[i][j+8]);
						// NeonB8=vextq_s16(NeonB7,NeonB9,2);
						//NeonB8=vextq_s16(NeonB9,NeonB8,1);

							    result=vmulq_s16 (NeonB1,NeonK1);
							    result=vmlaq_s16(result,NeonB2,NeonK2);
								result=vmlaq_s16(result,NeonB3,NeonK3);
								result=vmlaq_s16(result,NeonB4,NeonK4);
								result=vmlaq_s16(result,NeonB5,NeonK5);
								result=vmlaq_s16(result,NeonB6,NeonK6);
								result=vmlaq_s16(result,NeonB7,NeonK7);
								result=vmlaq_s16(result,NeonB8,NeonK8);
								result=vmlaq_s16(result,NeonB9,NeonK9);
                                result=result/9;
							    vst1q_s16 (&n_output[i][j],result);
                                j=j+8;


				}

}

return 0;
			}

void taskMain(void)
{
	init_Profile();
	int c_input[height][width];
	int result =0;
	sint16 n_output[height][width];
	sint16 n_input[height][width];
	int val=1, i,j;
	for(i=0;i<height;i++)
	{
		for(j=0;j<width;j++)
		{
			c_input[i][j]=val;
			n_input[i][j]=val;
			val=val+1;
		}
	}
	int ker[3][3]={
			{1,1,1},
			{1,1,1},
			{1,1,1}};
	int  c_output[height][width];
	Start_Profile(Smooth_c);
	result =smooth( c_input,c_output,ker);
	End_Profile(Smooth_c);
	Start_Profile(Smooth_neon);
	smoothening2(n_input,ker,n_output);
	End_Profile(Smooth_neon);
}



int main(void){
	BIOS_start();
	return 0;
}
