This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

Compiler/F28M35H52C: Convert inline extended asm to standalone asm

Part Number: F28M35H52C

Tool/software: TI C/C++ Compiler

Hi,

 I would like to ask for help to convert inline assembler code that uses Extended Asm to a standalone assembler file, as Extended Asm isn’t supported from the ARM compiler (see spnu151i, page 116).

It is a part from a big number multiply library, which provide a part of the code as C and Extended Asm. With the C code everything runs fine but a bit slow. So I want replace the this C code with the assembler code that I need to convert since Extended Asm isn’t supported

Used CPU is F28M35H52C

This is the function from the library :

void mpi_mul_hlp( size_t i, mbedtls_mpi_uint *s, mbedtls_mpi_uint *d, mbedtls_mpi_uint b )					
{
    mbedtls_mpi_uint c = 0, t = 0;


    for( ; i >= 16; i -= 16 )
    {
        MULADDC_INIT
        MULADDC_CORE   MULADDC_CORE
        MULADDC_CORE   MULADDC_CORE
        MULADDC_CORE   MULADDC_CORE
        MULADDC_CORE   MULADDC_CORE

        MULADDC_CORE   MULADDC_CORE
        MULADDC_CORE   MULADDC_CORE
        MULADDC_CORE   MULADDC_CORE
        MULADDC_CORE   MULADDC_CORE
	MULADDC_STOP
    }

    for( ; i >= 8; i -= 8 )
    {
        MULADDC_INIT
        MULADDC_CORE   MULADDC_CORE
        MULADDC_CORE   MULADDC_CORE

        MULADDC_CORE   MULADDC_CORE
        MULADDC_CORE   MULADDC_CORE
        MULADDC_STOP
    }

    for( ; i > 0; i-- )
    {
        MULADDC_INIT
        MULADDC_CORE
        MULADDC_STOP
    }

    t++;

    do {
        *d += c; c = ( *d < c ); d++;
    }
    while( c != 0 );
}

( mbedtls_mpi_uint is a uint32_t )

This is the Extended Asm code:

#define MULADDC_INIT                                    \
		asm (																								\
            "ldr    r0, %3                      \n\t"   \
            "ldr    r1, %4                      \n\t"   \
            "ldr    r2, %5                      \n\t"   \
            "ldr    r3, %6                      \n\t"


#define MULADDC_CORE                                    \
            "ldr    r4, [r0], #4                \n\t"   \
            "mov    r5, #0                      \n\t"   \
            "ldr    r6, [r1]                    \n\t"   \
            "umlal  r2, r5, r3, r4              \n\t"   \
            "adds   r7, r6, r2                  \n\t"   \
            "adc    r2, r5, #0                  \n\t"   \
            "str    r7, [r1], #4                \n\t"


#define MULADDC_STOP                                    \
            "str    r2, %0                      \n\t"   \
            "str    r1, %1                      \n\t"   \
            "str    r0, %2                      \n\t"   \
         : "=m" (c),  "=m" (d), "=m" (s)        \
         : "m" (s), "m" (d), "m" (c), "m" (b)   \
         : "r0", "r1", "r2", "r3", "r4", "r5",  \
           "r6", "r7", "cc"                     \
         );

This is the C code:

#define MULADDC_INIT                    \
{                                       \
    mbedtls_mpi_uint s0, s1, b0, b1;              \
    mbedtls_mpi_uint r0, r1, rx, ry;              \
    b0 = ( b << biH ) >> biH;           \
    b1 = ( b >> biH );

#define MULADDC_CORE                    \
    s0 = ( *s << biH ) >> biH;          \
    s1 = ( *s >> biH ); s++;            \
    rx = s0 * b1; r0 = s0 * b0;         \
    ry = s1 * b0; r1 = s1 * b1;         \
    r1 += ( rx >> biH );                \
    r1 += ( ry >> biH );                \
    rx <<= biH; ry <<= biH;             \
    r0 += rx; r1 += (r0 < rx);          \
    r0 += ry; r1 += (r0 < ry);          \
    r0 +=  c; r1 += (r0 <  c);          \
    r0 += *d; r1 += (r0 < *d);          \
    c = r1; *(d++) = r0;

#define MULADDC_STOP                    \
}

The first step is to try to replace the last part. So I defined the needed variables as global to use them in the assembler code.

 Changed last part of the C code:

    s_glo = s;
    d_glo = d;
    b_glo = b;
    c_glo = c;
    for( ; i > 0; i-- )
    {
    	test_funktion();
    }
    b = b_glo;
    c = c_glo;

Changed assembler code:

.text



				.global s_glo
s_glo_add			.uword  s_glo
				.global d_glo

				.global b_glo
b_glo_add			.uword  b_glo
				.global c_glo
c_glo_add			.uword  c_glo

				.global test_funktion


test_funktion:		PUSH	{R4-R11}
					ldr	 R0, s_glo_add
					ldr	 R1, d_glo_add
					ldr	 R2, c_glo_add
					ldr	 R3, b_glo_add
					; Start MULADDC_CORE
					ldr    r4, [r0], #4
					mov    r5, #0
					ldr    r6, [r1]
					umlal  r2, r5, r3, r4
					adds   r7, r6, r2
					adc    r2, r5, #0
					str    r7, [r1], #4
					; End MULADDC_CORE
					ldr	 R8, c_glo_add
					str R2, [R8]
					ldr	 R8, d_glo_add
					str R1, [R8]
					ldr	 R8, s_glo_add
					str R0, [R8]
					POP	{R4-R11}
					BX LR

d_glo_add			.uword  d_glo

Unfortunately the code don’t work as intended (Result from C Code and assembler code is completely different)  and as I don’t have any experience with assembler, I don’t have any idea left what is wrong.

 

Hopefully someone can help me with this.