This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

Fast 24-bit integer to float conversion

We're working on a project where we have a set of 24-bit bipolar ADC reads the MCU needs to convert to floats.

There's a SWI that I  had written that is too slow for the speed we need. While the SWI itself contains plentiful opportunities for optimization, for the purpose of this post however, I'd like to focus on speeding up the float conversion:

void fUInt24_to_float(float *dest, Uint32 *src, Uint32 length){
	register int32 iTemp;
	register float fTemp;
	register int32 twoTo23= (1<<23);//TWOto23RD;
	register int32 twoTo24= (1<<24);//TWOto24TH;

	while(length){
		iTemp=*src;
		if(iTemp & twoTo23){
			iTemp-=twoTo24;
		}
		fTemp=iTemp;
		fTemp/=(float)twoTo23;
		*dest=fTemp;
		dest++;src++;length--;
	}
}

Any suggestions like IEEE 754 ninja bit-math would be greatly appreciated.

  • Dean,

    You don't say what processor you are using, but I assume it is a C2000 since you posted in the C2000 forum.  The floating-point C2000 devices have hardware support for int to float conversion, specifically the I32TOF32 instruction.  So, if you are using a C2000 device with the FPU (e.g., F28335, F28069, F28M36x, F28M35x) you can write a simple C-callable assembly instruction to do the conversion very fast.

    Regards,

    David

  • Yes, I'm using the F28335. I've been looking over the asm code generated by the compiler. It's been enlightening and I've been able to optimize my code a bit further:

    void fUInt24_to_float(float *dest, Uint32 *src, Uint32 length){
    	register int32 iTemp;
    	register float fTemp;
    	register int32 twoTo24= 16777216; //(1<<24) (Oops! I missed the compiler warning!)
    	register float fTwoTo23=(float)8388608;//(1<<23) (Same here!)
    	register Uint32 length2=length;
    
    	while(length2){
    		iTemp=*src;
    		if(iTemp >>23){
    			iTemp-=twoTo24;
    		}
    		fTemp=iTemp;
    		fTemp/=fTwoTo23; //Oops! I hadn't caught this!
    		*dest=fTemp;
    		dest++;src++;length2--;
    	}
    }
    

  • Here's my try at implementing an uint32 to IEEE 754 float conversion without a FPU. I have not compiled this.

    float uint_to_float(uint32 i)
    {
      register int    e = 0;
      register uint32 m = i;
      union // Some compilers might complain about this.
      {
        uint32 u;
        float  f;
      } x;
    
      if (m == 0)
      {
        x.u = 0x00000000;
        return x.f;
      }
    
      /* Must create a 24 bit number where the MSB bit is 1. */
      if (m > 0x00FFFFFF)
      {
        /* Number is too big to fit into mantissa. Normalize. */
        /* Shift right until topmost 8 bits are clear. */
        do
        {
          e++;
          m >>= 1;
        }
        while(m & 0xFF000000);
      }
      else if (m < 0x00FFFFFF)
      {
        /* Number is too small to fill mantissa. Normalize. */
        /* Shift left until something appears in the 23rd bit */
        do
        {
          e--;
          m <<= 1;
        }
        while((m & 0x00800000)==0);
      }
    
      e  += 127;        /* Apply bias. */
      e <<= 23;         /* Move the exponent into position. */
      m  &= ~0x00800000;/* Clear off the leading 1. */
    
      x.u = e | m;
    
      return x.f;
      
    }
    

  • Note to self. Test code. Previous code compiled but resultd in an infinite loop. I've tested this code with VC++. I would expect that just castting to float would be faster.  On second thought, are you tryiing to convert a 24 bit integer scaled to fit between 0.0 and 1.0?

    float uitof(uint32 i)
    {
      register int    e = 0;
      register uint32 m = i;
      union // Some compilers might complain about this.
      {
        uint32 u;
        float  f;
      } x;
      if (m == 0)
      {
        x.u = 0x00000000;
        return x.f;
      }
      /* Must create a 24 bit number where the MSB bit is 1. */
      if (m > 0x00FFFFFF)
      {
        /* Number is too big to fit into mantissa. Normalize. */
        /* Shift right until topmost 8 bits are clear. */
        /* Precision is lost. */
        do
        {
          e++;
          m >>= 1;
        }
        while(m & 0xFF000000);
      }
      else
      {
        /* Number is too small to fill mantissa. Normalize. */
        /* Shift left until something appears in the 23rd bit */
        while((m & 0x00800000)==0)
        {
          e--;
          m <<= 1;
        }
      }
      e  += (23+127);   /* Moved point up from 0 to 23 and apply bias. */
      e <<= 23;         /* Move the exponent into position. */
      m  &= ~0x00800000;/* Clear off the leading 1. */
      x.u = e | m;
      return x.f;
      
    }

  • Some code that used the compiler's int to float conversion and some bit hacks to scale the resulting floating point value.

    /* 0x00000000 -> 0.000000 */
    /* 0x00FFFFFF -> 0.999999 */
    float uitofn24_cast(uint32 i)
    {
      register uint32 e;
      register uint32 m;
      union
      {
        uint32 u;
        float  f;
      } x;
    
      x.f = (float)i;      /* Use compiler to convert int to float. */
      if(i==0) return(x.f);/* Handle special case of zero. */
    
      m   = x.u;           /* Get integer representation */
      e   = x.u;           /* Get integer representation */
    
      m  &= 0x007FFFFF;    /* Extract mantissa. */
    
      e >>= 23;            /* Shift down to do math */
      e  -= 24;            /* Divide by base by decrementing exponent. */
      e <<= 23;            /* Shift back up */
    
      x.u = e | m;         /* Put exp and mantissa back together. */
    
      return x.f;
    }

  • That's exactly what I asked for, but might not be what I need. When you break it down in the assembler, it gets ugly.

    For reference, this is the Assembly output of both functions with a bit of clean up. To insert Mr. Wong's code, I'd replace lines 729 to 733 inclusive. The unknown variable is how big the floating point division function (FS$DIV) is.

    More testing is needed.

    ;***************************************************************
    ;* FNAME: _fUInt24_to_float             FR SIZE:  12           *
    ;*                                                             *
    ;* FUNCTION ENVIRONMENT                                        *
    ;*                                                             *
    ;* FUNCTION PROPERTIES                                         *
    ;*                            0 Parameter,  6 Auto,  6 SOE     *
    ;***************************************************************
    
    _fUInt24_to_float:
            MOVL      *SP++,XAR1            ; Saving clobbered registers 
            MOVL      *SP++,XAR2            ; 
            MOV32     *SP++,R4H             ; 
            ADDB      SP,#6                 ; Adjusting stack accordingly
    ;* AR6   assigned to _iTemp
    ;* R0HL  assigned to _fTemp
    ;* AR2   assigned to _twoTo24
    ;* R4HL  assigned to _fTwoTo23
    ;* AR1   assigned to _length2
    ;----------------------------------------------------------------------
    ; 720 | void fUInt24_to_float(float *dest, Uint32 *src, Uint32 length){        
    ; 721 | register int32 iTemp;                                                  
    ; 722 | register float fTemp;                                                  
    ;----------------------------------------------------------------------
            MOVL      *-SP[6],ACC           ; Length
            MOVL      *-SP[4],XAR5          ; Source pointer
            MOVL      *-SP[2],XAR4          ; Destination pointer
    ;----------------------------------------------------------------------
    ; 723 | register int32 twoTo24= 16777216;                                      
    ;----------------------------------------------------------------------
            MOV       ACC,#512 << 15        ; ACC = 2**24
            MOVL      XAR2,ACC              ; XAR2 = ACC (2**24)
    ;----------------------------------------------------------------------
    ; 724 | register float fTwoTo23=(float)8388608;                                
    ;----------------------------------------------------------------------
            MOVIZ     R4H,#19200            ; R4H = 2**23 
    ;----------------------------------------------------------------------
    ; 725 | register Uint32 length2=length;                                        
    ;----------------------------------------------------------------------
            MOVL      XAR1,*-SP[6]          ; Move length from stack to register
    ;----------------------------------------------------------------------
    ; 727 | while(length2){                                                        
    ;----------------------------------------------------------------------
            TEST      ACC                   ; Check length
            BF        $C$L7,EQ              ; Jump to end if already zero
    $C$L5:    
    ;----------------------------------------------------------------------
    ; 728 | iTemp=*src;                                                            
    ;----------------------------------------------------------------------
            MOVL      XAR4,*-SP[4]          ; XAR4 = Source Pointer
            MOVL      ACC,*+XAR4[0]         ; ACC = Source Data
            MOVL      XAR6,ACC              ; XAR6 = Source Data
    ;----------------------------------------------------------------------
    ; 729 | if(iTemp>>23){                                                         
    ;----------------------------------------------------------------------
    										; This checks the high bit
            MOV       T,#23                 ; T = 23
            ASRL      ACC,T                 ; Bitshift right data by 23
            BF        $C$L6,EQ              ; Skip if zero (data is positive)
    ;----------------------------------------------------------------------
    ; 730 | iTemp-=twoTo24;                                                        
    ;----------------------------------------------------------------------
            MOVL      ACC,XAR6              ; ACC= iTemp (Source Data)
            SUBL      ACC,XAR2              ; Convert 2's compliment
            MOVL      XAR6,ACC              ; Store back in iTemp
    ;----------------------------------------------------------------------
    ; 732 | fTemp=iTemp;                                                           
    ;----------------------------------------------------------------------
            MOV32     R0H,XAR6              ; R0H = iTemp
            NOP       ;						; Clear pipeline
            NOP       ;
            NOP       ;
            NOP       ;
            I32TOF32  R0H,R0H               ; Convert to float
    ;----------------------------------------------------------------------
    ; 733 | fTemp/=fTwoTo23;                                                       
    ;----------------------------------------------------------------------
    										; Scale down for -1.0 to 1.0
            MOV32     R1H,R4H               ; R1H = 2**23
            LCR       #FS$$DIV              ; Call Floating Point Division function
    ;----------------------------------------------------------------------
    ; 734 | *dest=fTemp;                                                           
    ;----------------------------------------------------------------------
            MOVL      XAR4,*-SP[2]          ; XAR4=Destination Pointer
            MOV32     *+XAR4[0],R0H         ; Store data into pointer
    ;----------------------------------------------------------------------
    ; 735 | dest++;src++;length2--;                                                
    ;----------------------------------------------------------------------
            MOVB      ACC,#2                ; ACC = 2
            ADDL      *-SP[2],ACC           ; Increment Destination Pointer
            ADDL      *-SP[4],ACC           ; Increment Source Pointer
            SUBB      XAR1,#1               ; Decrement length
            MOVL      ACC,XAR1              ; ACC= Length
            BF        $C$L5,NEQ             ; Loop if not Zero
    ;End of while loop
    $C$L7:    
            SUBB      SP,#6                 ; Repair stack
            MOV32     R4H,*--SP             ; Restore clobbered registers
            MOVL      XAR2,*--SP            ; 
            MOVL      XAR1,*--SP            ; 
            LRETR							; Return to sender
    
    ;************************************************************************
    		
    	.sect	".text"
    	.global	_uitofn24_cast
    ;----------------------------------------------------------------------
    ; 739 | float uitofn24_cast(Uint32 i)                                          
    ;----------------------------------------------------------------------
    
    ;***************************************************************
    ;* FNAME: _uitofn24_cast                FR SIZE:   6           *
    ;*                                                             *
    ;* FUNCTION ENVIRONMENT                                        *
    ;*                                                             *
    ;* FUNCTION PROPERTIES                                         *
    ;*                            0 Parameter,  6 Auto,  0 SOE     *
    ;***************************************************************
    
    _uitofn24_cast:
            ADDB      SP,#6                 ; [CPU_U] 
    ;* PL    assigned to _m
    ;----------------------------------------------------------------------
    ; 741 | register Uint32 e;                                                     
    ; 742 | register Uint32 m;                                                     
    ; 743 | union                                                                  
    ; 745 |   Uint32 u;                                                            
    ; 746 |   float  f;                                                            
    ; 747 | } x;                                                                   
    ;----------------------------------------------------------------------
            MOVL      *-SP[2],ACC           ; Push i onto stack
    ;----------------------------------------------------------------------
    ; 749 | x.f = (float)i;      /* Use compiler to convert int to float. */       
    ;----------------------------------------------------------------------
            UI32TOF32 R0H,*-SP[2]           ; Convert i to float
            NOP       						; Clear Pipeline
            MOV32     *-SP[4],R0H           ; x = (float)i
    ;----------------------------------------------------------------------
    ; 750 | if(i==0) return(x.f);/* Handle special case of zero. */                
    ;----------------------------------------------------------------------
            MOVL      ACC,*-SP[2]           ; ACC = i
            BF        $C$L8,EQ              ; Jump to end if zero
    ;----------------------------------------------------------------------
    ; 752 | m   = x.u;           /* Get integer representation */                  
    ;----------------------------------------------------------------------
            MOVL      P,*-SP[4]             ; P = m = x
    ;----------------------------------------------------------------------
    ; 753 | e   = x.u;           /* Get integer representation */                  
    ;----------------------------------------------------------------------
            MOVL      ACC,*-SP[4]           ; ACC = x
            MOVL      *-SP[6],ACC           ; e = ACC
    ;----------------------------------------------------------------------
    ; 755 | m  &= 0x007FFFFF;    /* Extract mantissa. */                           
    ;----------------------------------------------------------------------
            AND       PH,#127               ; AND m upper half against 0x007F
    ;----------------------------------------------------------------------
    ; 757 | e >>= 23;            /* Shift down to do math */                       
    ;----------------------------------------------------------------------
            MOV       T,#23                 ; T=23
            MOVL      ACC,*-SP[6]           ; ACC = e
            LSRL      ACC,T                 ; Shift ACC Right by 23
            MOVL      *-SP[6],ACC           ; e = ACC 
    ;----------------------------------------------------------------------
    ; 758 | e  -= 24;            /* Divide by base by decrementing exponent. */    
    ;----------------------------------------------------------------------
            MOVL      ACC,*-SP[6]           ; ACC = e 
            SUBB      ACC,#24               ; ACC (e) -=24
            MOVL      *-SP[6],ACC           ; e = ACC
    ;----------------------------------------------------------------------
    ; 759 | e <<= 23;            /* Shift back up */                               
    ;----------------------------------------------------------------------
            MOVL      ACC,*-SP[6]           ; ACC = e
            LSLL      ACC,T                 ; Shift ACC (e) Left by 23
            MOVL      *-SP[6],ACC           ; e = ACC
    ;----------------------------------------------------------------------
    ; 761 | x.u = e | m;         /* Put exp and mantissa back together. */         
    ;----------------------------------------------------------------------
            MOVZ      AR6,*-SP[6]           ; AR6H = 0, AR6=e.lo (16-bit)
            MOV       AL,PL                 ; AL = m.lo (16-bit)
            MOV       AH,*-SP[5]            ; AH = e.hi
            OR        AL,AR6                ; AL |= e.lo 
            MOV       PL,AL                 ; m.lo = AL (m.lo OR'ed with e.lo)
            MOV       AL,PH                 ; AL = m.hi 
            OR        AL,AH                 ; AL (m.hi) |= AH (e.hi) 
            MOV       PH,AL                 ; m.hi = AL (m.hi OR'ed with e.hi) 
            MOVL      *-SP[4],P             ; x = m
    ;----------------------------------------------------------------------
    ; 763 | return x.f;                                                            
    ;----------------------------------------------------------------------
            MOV32     R0H,*-SP[4]           ; R0H =  x
    $C$L8:									; Jump destination from earlier
            SUBB      SP,#6                 ; Repair stack 
            LRETR     						; Return to Sender
    

  • Guesses on my part. Never used the C28X processor. Odd that the "register" attribute has no effect. The math variables seem to be on the stack when they should in registers. I suspect your optimization is set to none. Perhaps try combining all the calculations onto one line. The compiler should keep a calculation in registers knowing you cannot set a break point in the middle of line. Trying to convince the compiler with line by line register level operations does not seem to work here. You might have to write it in assembly to get absolute best speed. Also avoids the entire union thing that is difficult to get rid of in C. Especially if you want to stay in registers.

    /* 0x00000000 -> 0.000000 */
    /* 0x00FFFFFF -> 0.999999 */
    float uitofn24_cast(uint32 i)
    {
      union
      {
        uint32 u;
        float  f;
      } x;
      x.f = (float)i;      /* Use compiler to convert int to float. */
      if(i==0) return(x.f);/* Handle special case of zero. */
    
      /* The next line does this: */
      /* 1) Breaks apart the exponent and mantissa. */
      /* 2) Shift down exponent by 23 to do math */
      /* 3) Divide by base by decrementing exponent by 24. */
      /* 4) Shift expoenent back up by 23. */
      /* 5) Put exp and mantissa back together. */ 
    
      /*       Exponent part           | Mantissa part */
      x.f = (((x.u >> 23) - 24) << 23) | (x.u & 0x007FFFFF);
    
      return x.f;
    }
    

  • Another thought. Instead of division maybe try multiplying by the reciprocal. Seems counter intuitive that floating point ops would be faster. Probably consumes more power and there may some round-off problems.

    void fUInt24_to_float(float *dest, Uint32 *src, Uint32 length){
        register int32 iTemp;
        register float fTemp;
        register int32 twoTo24= 16777216; //(1<<24) (Oops! I missed the compiler warning!)
        register float fTwoTo23=(float)(1.0/8388608.0);//(1<<23) (Same here!)
        register Uint32 length2=length;
        while(length2){
            iTemp=*src;
            if(iTemp >>23){
                iTemp-=twoTo24;
            }
            fTemp=iTemp;
            fTemp*=fTwoTo23; //Oops! I hadn't caught this!
            *dest=fTemp;
            dest++;src++;length2--;
        }
    }

  • Penultimate post here. The harbinger to the end.

    I got to playing with the bit math. You're going to love this. (Note: I tested that wacky value (i.e. 23 << 7) with several extreme and in-between values. It works in all cases.)

    float itofn24_cast(int32 i)
    {
      register union
      {
        struct{
        	Uint16 lo;
        	Uint16 hi;
        }u;
        float  f;
      } x;
    
      x.f = (float)i;      /* Use compiler to convert int to float. */
      if(i==0) return(x.f);/* Handle special case of zero. */
      x.u.hi-=0x0B80;			//Bam. That just happened.
      return x.f;
    }
    

    And in the assembly code:

    	.sect	".text"
    	.global	_itofn24_cast
    
    
    ;***************************************************************
    ;* FNAME: _itofn24_cast                 FR SIZE:   4           *
    ;*                                                             *
    ;* FUNCTION ENVIRONMENT                                        *
    ;*                                                             *
    ;* FUNCTION PROPERTIES                                         *
    ;*                            0 Parameter,  4 Auto,  0 SOE     *
    ;***************************************************************
    
    _itofn24_cast:
            ADDB      SP,#4                 ;Expand stack 
            MOVL      *-SP[2],ACC           ;Push i onto stack
            I32TOF32  R0H,*-SP[2]           ;Convert i to a float
            NOP       						;
            MOV32     *-SP[4],R0H           ;Store float in x
            MOVL      ACC,*-SP[2]           ;Copy i back from stack (for branch test)
            BF        $C$L8,EQ              ;Branch if zero
            SUB       *-SP[3],#2944         ;Subtract magic number from x.u.hi
            MOV32     R0H,*-SP[4]           ;Move new value back into float
    $C$L8:    
            SUBB      SP,#4                 ;Repair stack
            LRETR     						;Return to sender
    

  • Good catch. I did not consider that

      x.u = (((x.u >> 23) - 24) << 23) | (x.u & 0x007FFFFF);

    reduces to

      x.u -= (24<<23);

    as long as the exponent is 24 or greater. Any less and there will a integer sign change thats messed with floating point sign bit. With well behaved floating point numbers, that is ensured with the exponent bias of 127. I can see that it also works with signed integers as the floating point sign bit is left alone.

    Is your ADC putting out a 24-bit signed two-complement number? I tried your code in VC++ but I don't get  normalized range of -1.0 to 1.0 for 0x800000 to 0x7FFFFF. I get 0.0 to 2.0. Are you sign extending the 24 bits to 32 bits? I need to sign extend for your function to produce -1.0 to 1.0. An alternative to sign extending is to shift the 24-bit values up by 8 to create a 32 -bit number. Then use the magic number of 0x0F80 instead of 0x0B80.

    EDIT: Fix code.

  • Here we go. Two functions for converting arrays:

    typedef union{
    	struct{
    		Uint16 lo;
    		Uint16 hi;
    	}u16;
    	Uint32 u32;
    	int32 i32;
    	float  f;
    }Versatype32;
    
    //Bipolar version (-1.0 to ~1.0)
    void fInt24_to_float(float *dest, int32 *src, Uint32 length){
    	register Versatype32 xTemp;
    	while(length--){
    		xTemp.u32=*src;
    		//Check if Negative by right shifting 8
    		xTemp.u32<<=8; //(If it's a negative, we'll know)  (Props to Norman Wong)
    		//Convert to float
    		xTemp.f=(float)xTemp.i32;
    		//Skip divide down if zero
    		if(xTemp.u32!=0){
    			//Divide by (1<<31 or 2^31)
    			xTemp.u16.hi-=0x0F80;	//BAM! Bitmagic!
    		}
    		*dest=xTemp.f;
    		//Move to next set
    		dest++;
    		src++;
    	}	//Are we done yet?
    	//Yes!
    	return;
    }
    
    //Unipolar version (0.0 to ~1.0)
    void fUInt24_to_float(float *dest, Uint32 *src, Uint32 length){
    	register Versatype32 xTemp;
    
    	while(length--){
    		xTemp.u32=*src;
    		//Convert to float
    		xTemp.f=(float)xTemp.u32;
    		//Skip divide down if zero
    		if(xTemp.u32!=0){
    			//Divide by (1<<24 or 2^24)
    			xTemp.u16.hi-=0x0C00;	//BAM! Bitmagic! (Props to Norman Wong)
    		}
    		*dest=xTemp.f;
    		//Move to next set
    		dest++;
    		src++;
    	}	//Are we done yet?
    	//Yes!
    	return;
    }
    

    I ran it under these conditions on my F28335:

    Uint32 myUintArray1[9]={
    	0x00800000,			//-1.0
    	0x00C00000,			//-0.5
    	0x00E00000,			//-0.25
    	0x00FFFFFF,			//-0.0000001
    	0x00000000,			// 0.0
    	0x00000001,			//+0.0000001
    	0x00200000,			//+0.25
    	0x00400000,			//+0.5
    	0x007FFFFF			//+0.9999999
    };
    
    Uint32 myUintArray2[9]={
    	0x00000000,			// 0.0
    	0x00000001,			//+0.00000005
    	0x00200000,			//+0.125
    	0x00400000,			//+0.25
    	0x007FFFFF,			//+0.4999999
    	0x00800000,			//0.5
    	0x00C00000,			//0.75
    	0x00E00000,			//0.875
    	0x00FFFFFF			//0.99999999
    };
    
    float myFloatArray1[9];
    float myFloatArray2[9];
    
    
    /*
     *  ======== main ========
     */
    void main() {
        InitCodeBlahBlahBlah();
        //.
        //.
        //.
        fInt24_to_float(myFloatArray1,myUintArray1, 9);
        fUInt24_to_float(myFloatArray2,myUintArray2, 9);
        ConvenientFunctionToSetBreakPoint();
        //.
        //.
        //.
    }

    And I was able to observe the proper values. The only thing that could make these better is to add byte-swapping for when using with the MCBSP/DMA. But that's trivial so I'll leave them as they are for the benefit of future readers.

    EDIT: Did I really just misspell Bipolar?