This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

CCS/TMS320C6748: is there any restrictions on calling linear assembly functions in C?

Part Number: TMS320C6748

Tool/software: Code Composer Studio

I write an IIR HPF in C language as bellow:

void iir_2nd_filter(int *d_x, int *d_y, int *x, int *y,
                int factor1, int factor2, int ch_offset, int step, int nbytes)
{
    int i;
    y[ch_offset] = (x[ch_offset] - d_x[1])
                 - (d_x[1] - d_x[0])
                 + d_y[1]
                 +((d_y[1]>>10)*factor1 - (d_y[0]>>10)*factor2);
    y[step + ch_offset] = (x[step + ch_offset] - x[ch_offset])
                 - (x[ch_offset] - d_x[1])
                 + y[ch_offset]
                 +((y[ch_offset]>>10)*factor1 - (d_y[1]>>10)*factor2);
    nbytes = nbytes/4;

    for(i=step*2+ch_offset;  i<nbytes;  i+=step) {
        y[i] = (x[i] - x[i-step])
             - (x[i-step] - x[i-(step<<1)])
             + y[i-step]
             + ((y[i-step]>>10)*factor1 - (y[i-(step<<1)]>>10)*factor2);
    }
    d_x[0] = x[i-(step<<1)];
    d_x[1] = x[i-step];

    d_y[0] = y[i-(step<<1)];
    d_y[1] = y[i-step];
}

And then write the same function in Linear Assembly:

;extern void iir_2nd_Assembly(int *d_x, int *d_y, int *x, int *y,
;                			  int factor1, int factor2, int ch_offset, int step, int nbytes);
    .global		iir_2nd_Assembly

;二阶IIR高通滤波器
iir_2nd_Assembly:	.cproc d_x, d_y, x, y, factor1, factor2, ch_offset, step, nbytes  ;线性汇编程序开始
; 变量声明
	.reg	xn,xn1,xn2,yn,yn1
	.reg	sum,tmp1,tmp2

	SHL		ch_offset, 2, ch_offset	;ch_offset = ch_offset*4
	SHL		step, 2, step			;step = step*4

	LDW		*d_x++, xn	;xn  = x[n]
	LDW		*d_x, xn1	;xn1 = x[n+1]
	LDW		*d_y++, yn	;yn  = y[n]
	LDW		*d_y, yn1	;yn1 = y[n+1]

	ADD		x, ch_offset, x	;
	LDW		*x, xn2		;xn2 = x[n+2]

	SUB		xn2, xn1, tmp1		;tmp1 = xn2 - xn1
	SUB		xn1, xn, tmp2		;tmp2 = xn1 - xn
	SUB		tmp1, tmp2, sum		;sum = tmp1 - tmp2	= (xn2-xn1) - (xn1-xn)
	ADD		sum, yn1, sum		;sum = (xn2-xn1) - (xn1-xn) + yn1

	SHR		yn1, 10, tmp1		;tmp1 = yn1>>10
	MPY32	tmp1, factor1, tmp1	;tmp1 = factor1*tmp1 = (yn1>>10)*factor1

	SHR		yn, 10, tmp2		;tmp2 = yn>>10
	MPY32	tmp2, factor2, tmp2	;tmp2 = factor2*tmp2 = (yn>>10)*factor2
	MV		yn1,  yn			;yn = yn1

	SUB		tmp1, tmp2, tmp1	;tmp1 = tmp1 - tmp2	= (yn1>>10)*factor1 - (yn>>10)*factor2
	ADD		sum, tmp1, yn1		;yn1 = sum + tmp1   = (xn2-xn1) - (xn1-xn) + yn1
								;					 + ((yn1>>10)*factor1 - (yn>>10)*factor2)

	ADD		y, ch_offset, y
	STW		yn1, *y				;y[n+2] = yn1

	MV		xn1,  xn	;xn  = x[n+1]
	MV		xn2,  xn1	;xn1 = x[n+2]

	SHR		nbytes, 2, nbytes		;nbytes = nbytes/4
 [nbytes] SUB nbytes, 1, nbytes 	;nbytes -= 1

LOOP3:	.trip	1		;for循环起始位置
	ADD		x, step, x
	LDW		*x, xn2		;xn2 = x[n+2]

	SUB		xn2, xn1, tmp1		;tmp1 = xn2 - xn1
	SUB		xn1, xn, tmp2		;tmp2 = xn1 - xn
	SUB		tmp1, tmp2, sum		;sum = tmp1 - tmp2	= (xn2-xn1) - (xn1-xn)
	ADD		sum, yn1, sum		;sum = (xn2-xn1) - (xn1-xn) + yn1

	SHR		yn1, 10, tmp1		;tmp1 = yn1>>10
	MPY32	tmp1, factor1, tmp1	;tmp1 = factor1*tmp1 = (yn1>>10)*factor1

	SHR		yn, 10, tmp2		;tmp2 = yn>>10
	MPY32	tmp2, factor2, tmp2	;tmp2 = factor2*tmp2 = (yn>>10)*factor2
	MV		yn1,  yn			;yn = yn1

	SUB		tmp1, tmp2, tmp1	;tmp1 = tmp1 - tmp2	= (yn1>>10)*factor1 - (yn>>10)*factor2
	ADD		sum, tmp1, yn1		;yn1 = sum + tmp1   = (xn2-xn1) - (xn1-xn) + yn1
								;					 + ((yn1>>10)*factor1 - (yn>>10)*factor2)

	ADD		y, step, y
	STW		yn1, *y				;y[n+2] = yn1

	MV		xn1,  xn	;xn  = x[n+1]
	MV		xn2,  xn1	;xn1 = x[n+2]

 [nbytes] SUB nbytes, 1, nbytes 	; nbytes -= 1
 [nbytes] B LOOP3 					; if (nbytes!=0) goto loop ;for循环结束位置

	STW		xn1, *d_x--
	STW		xn,  *d_x
	STW		yn1, *d_y--
	STW		yn,  *d_y

    .endproc											; 线性汇编程序结束

the two function are called in C file as bellow:

#define Tn  1024

int IIR_In[Tn+4]; int IIR_Out[Tn+4]; int delay_x[2] = {0,0}; int delay_y[2] = {0,0};
extern void iir_2nd_Assembly(int *d_x, int *d_y, int *x, int *y,
int factor1, int factor2, int ch_offset, int step, int nbytes);

int iir_lowcut_test(void) { unsigned int bytes_read = 0, bytes_writed = 0; int res = FR_OK; res = f_open(&rec_file_ch1,"FileIn.wav", FA_READ); res |= f_lseek(&rec_file_ch1, 44); //pionte to the first data if (FR_OK != res) { return res; } res |= rec_wav_file_create("FileOut.wav", &rec_file_ch2, 48000, 32, 1); if (FR_OK != res) { return res; } while(!f_eof(&rec_file_ch1)) { res = f_read(&rec_file_ch1, IIR_In, Tn*4, &bytes_read); #if 1 iir_2nd_filter(delay_x, delay_y, IIR_In, IIR_Out, 967, 969, 0, 1, Tn*4); #else iir_2nd_Assembly(delay_x, delay_y, IIR_In, IIR_Out, 967, 969, 0, 1, Tn*4); #endif res |= f_write(&rec_file_ch2, IIR_Out, Tn*4, &bytes_writed); if (FR_OK != res) { break; } } f_close(&rec_file_ch1); rec_wav_file_close(&rec_file_ch2); return res; }

Note: FileIn.wav is a audio file with ONE channel, 48kHz/32bit.

967 and 969, are the IIR HPF parameters for Fs = 48kHz, Fc = 300Hz.

In the function iir_lowcut_test(),Calling for iir_2nd_filter() and iir_2nd_Assembly() have the same result.

BUT,  there is ERR occur when I call iir_2nd_Assembly() in a SWI thread, while call  iir_2nd_filter() is all right.

IS there any restrictions on calling linear assembly functions in C?

  • Hi, Shide,

    This is a generic C programming question and not related to TI software. There are a lot of discussion on internet. Please see if you can find answers there. Here are just a few links which I get from googling the subject:

    https://stackoverflow.com/questions/13901261/calling-assembly-function-from-c

    https://www.devdungeon.com/content/how-mix-c-and-assembly

    http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.100748_0606_00_en/lmi1470147220260.html

    Rex

  • Shide Lu said:
    IS there any restrictions on calling linear assembly functions in C?

    No.

    Shide Lu said:
    In the function iir_lowcut_test(),Calling for iir_2nd_filter() and iir_2nd_Assembly() have the same result.

    That's a good start.  Do you have benchmarks for the two cases?  Was your linear assembly version any faster?  Have you tried using the "restrict" keyword in conjunction with your C code?  For example, let's take this variable from your function declaration:

    int *d_x

    If none of the other pointers can point to anywhere in that same array, then you can declare it as:

    int * restrict d_x

    That will allow the compiler to do a much better job with the optimization if it knows the relationships among the pointers.  I suspect that this one change might make the difference where your C code actually performs better than your linear assembly and renders the entire question irrelevant.

    Shide Lu said:
    BUT,  there is ERR occur when I call iir_2nd_Assembly() in a SWI thread, while call  iir_2nd_filter() is all right.

    What was the error?  Was it an exception?  What was the address of the exception?

    Also, as an experiment, can you try disabling interrupts prior to calling the function and then restoring them after?

    What compiler options did you use to compile your serial assembly?  (It looks strange to talk about compiling assembly code, but it is actually correct with respect to linear assembly since it gets "scheduled" by the compiler.)

  • Thanks a lot for your reply, Brad.

    1.Linear Assembly version is about 20% faster than C version.

    Speed is important to my program.I want to optimize other C functions with Linear Assembly as well.

    2.I'm not sure the ERR is an exception or not, because my hardware does not support debug at present.

    But the calling of iir_2nd_Assembly() cause the Error of SD card writing.

    3.I use the default options for Assebler:(No options added)

    Optimization Options as below:

    I worry that disabling / restoring interrupts will cause other issues, because there are other higher priority transactions in the program.

    I will try another way, Create a static library containing the IIR algorithm. If any progress is made, I will post it here.

    If you have a better solution, please let me know.

    BR.

    Shide Lu

  • There is a similar post on the E2E Chinese forum.

    The member QINGTIAN HU, wrote a FFT program in Linear Assembly. It works well in Individual debugging.But when adds to his program,it didnot works well.

    He thinks that the system interruption has an impact on linear assembly

  • Another question,is that need to maintain the stack, interrupt flags, etc. in linear assembly functions?

  • Why do you have compiler optimization (--opt_level) set to "off"?  I suggest setting to to -o2.

    In my opinion/experience, the "linear assembly" code is not the best way to improve your code.  My recommendation is:

    1. Start by simply using -o2 to see if the performance is sufficient.
    2. If performance is still not good enough, you can use "restrict" keyword as appropriate as well as pragmas like MUST_ITERATE to give the compiler additional info to help it better optimize the code.
    3. Finally if specific instructions are needed (this is especially the case for "packed math" SIMD operations) then you can use compiler intrinsics to directly utilize a specific instruction from C.

    If you've not read it, I highly recommend the following app note:

    Hand-Tuning Loops and Control Code on the TMS320C6000
    http://www.ti.com/lit/spra666

    Shide Lu said:
    I worry that disabling / restoring interrupts will cause other issues, because there are other higher priority transactions in the program.

    Please try it just so we can understand if interrupts are part of the issue, or if it is something else entirely.  Though to my earlier point, I recommend trying to improve your performance without having to use linear assembly.

    Best regards,

    Brad

  • Shide Lu said:

    Another question,is that need to maintain the stack, interrupt flags, etc. in linear assembly functions?

    Stack management is handled for you in the context of .cproc/.endproc segment of code.  I wouldn't expect anything interrupt-related to be done in the code, i.e. that would be the responsibility of the ISR that pre-empts the code.

  • Brad Griffis said:

    Why do you have compiler optimization (--opt_level) set to "off"?  I suggest setting to to -o2.

    In my opinion/experience, the "linear assembly" code is not the best way to improve your code.  My recommendation is:

    1. Start by simply using -o2 to see if the performance is sufficient.

    I set compiler optimization to -O2.And the function  iir_2nd_filter() speed increased by more than 60%,so dose the iir_2nd_Assembly().The performance is sufficient.

    Maybe I should give up linear assembly.

    Thanks again.

    Best regards.

    Shide