This thread has been locked.
If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.
Hi:
I need to port this code for TI 5.2.3 c compiler, the inline assembler code is made for another compiler and I have no previous experience using assembler. It is to speed up some instructions in my c code. Someone can point me to an example to convert these simple assembler instructions to TI compiler syntax?:
//-----------------------------------------------------------------------------------------
static __inline int CLZ(int x)
{
int numZeros;
__asm__ ("clz %0, %1" : "=r" (numZeros) : "r" (x) : "cc");
return numZeros;
}
//-----------------------------------------------------------------------------------------
typedef union _U64 {
Word64 w64;
struct {
// ARM ADS = little endian
unsigned int lo32;
signed int hi32;
} r;
} U64;
static __inline Word64 MADD64(Word64 sum64, int x, int y)
{
U64 u;
u.w64 = sum64;
__asm__ volatile ("smlal %0,%1,%2,%3" : "+&r" (u.r.lo32), "+&r" (u.r.hi32) : "r" (x), "r" (y) : "cc");
return u.w64;
}
//--------------------------------------------------------------------------------------------
static __inline__ int MULSHIFT32(int x, int y)
{
int zlow;
__asm__ volatile ("smull %0,%1,%2,%3" : "=&r" (zlow), "=r" (y) : "r" (x), "1" (y) : "cc");
return y;
}
//-----------------------------------------------------------------------------------------
Jose.
It probably makes more sense to change these functions to use intrinsics. Intrinsics act like a function call, but are (usually) implemented in one assembly instruction. See the section titled ARM Instruction Intrinsics in the ARM compiler manual.
Thanks and regards,
-George
HI George, thanks for your tip.
I advanced a bit using intrinsics, I got 2 lines working but still have one left:
//----------------------------------------------------------------
static __inline int MULSHIFT32(int x, int y)
{
y = _smmul( x, y); //intrinsics
return y;
}
//----------------------------------------------------------------
static __inline int CLZ(int x)
{
return (__clz(x)); //intrinsics
}
//----------------------------------------------------------------
static __inline long long MADD64(long long sum64, int x, int y)
{
// __asm__ volatile ("smlal %0,%1,%2,%3" : "+&r" (u.r.lo32), "+&r" (u.r.hi32) : "r" (x), "r" (y) : "cc");//exact equivalent intrinsics for this not found by now
//I need one that makes: ///sum64 = (sum64 + ((long long)x * y));
return sum64;
}
Thanks, anyway.
Jose.
Agree that the TI ARM compiler v5.2.2 doesn't have an intrinsic for smlal.Escalator said:static __inline long long MADD64(long long sum64, int x, int y)
{
// __asm__ volatile ("smlal %0,%1,%2,%3" : "+&r" (u.r.lo32), "+&r" (u.r.hi32) : "r" (x), "r" (y) : "cc");//exact equivalent intrinsics for this not found by now
//I need one that makes: ///sum64 = (sum64 + ((long long)x * y));
return sum64;
}
Compiling for a Cortex-A8 (AM3352) I found the following function causes the compiler to generate a smlal instruction:
static __inline__ long long MADD64 (long long sum64, int x, int y) { return sum64 + ((long long) x * (long long) y); }
Perhaps the compiler documentation should be updated to mention how to get a smlal instruction generated, or the compiler changed to add a specific intrinsic for smlal.