This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

_pmuGetEventCount_() does not work using TMS570LC4357/HDK, GCC 5.4 and HALCoGen -> TI-Code vs. GNU -Code?

Other Parts Discussed in Thread: TMS570LC4357, HALCOGEN, RM46L852

Hi,

I use the Hercules HDK /Cortex R5F /  TMS570LC4357.

As i have Code generated for the TI compiler and for the GNU toolchain, set up two projects for the same task - performance measurement.

The code for the ti Compiler worked, but the same C-Code returned only zeros as measured time.

after comparing HL_sys_pmu.asm (TI) and HL_sys_pmu.s (GNU gas), the Assembler Code differs.

i.e.  the function _pmuGetEventCount_ misses the "lsr   r0,  r0, #1 " part...

See Code created by HALCoGen:

/*-------------------------------------------------------------------------------*/
@ Get Event Counter Count Value

    .weak     _pmuGetEventCount_
    .type     _pmuGetEventCount_, %function
    
_pmuGetEventCount_:

        mcr   p15, #0, r0, c9, c12, #5 @ select counter
        mrc   p15, #0, r0, c9, c13, #2 @ read event counter
        bx    lr

After Modification:

/*-------------------------------------------------------------------------------*/
@ Get Event Counter Count Value

    .weak     _pmuGetEventCount_
    .type     _pmuGetEventCount_, %function
    
_pmuGetEventCount_:

lsr   r0,  r0, #1 @ missed - TODO: Bug Report mcr p15, #0, r0, c9, c12, #5 @ select counter mrc p15, #0, r0, c9, c13, #2 @ read event counter bx lr

a complete diff is here (sorry - Code was cleaned up before by unusable trailing spaces and tabs...):

--- ../../BenchR5F-gcc/source/HL_sys_pmu.s	2016-08-03 16:36:00.000000000 +0200
+++ HL_sys_pmu.s	2016-08-03 17:02:08.000000000 +0200
@@ -2,8 +2,8 @@
  HL_sys_pmu.s
 
  Copyright (C) 2009-2016 Texas Instruments Incorporated - www.ti.com 
- 
- 
+
+
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions 
   are met:
@@ -44,11 +44,12 @@
 @ Initialize Pmu
 @ Note: It will reset all counters
 
-    .weak _pmuInit_
-    .type _pmuInit_, %function
+    .weak     _pmuInit_
+    .type     _pmuInit_, %function
 
 _pmuInit_:
 
+        stmfd sp!, {r0}
         @ set control register
         mrc   p15, #0, r0, c9, c12, #0 
         orr   r0,  r0, #(1 << 4) + 6 + 1
@@ -72,6 +73,7 @@
         mcr   p15, #0, r0, c9, c12, #5 @ select counter
         mov   r0,  #0x11
         mcr   p15, #0, r0, c9, c13, #1 @ select event
+        ldmfd sp!, {r0}
         bx    lr
 
 /*-------------------------------------------------------------------------------*/
@@ -80,12 +82,14 @@
 
     .weak     _pmuEnableCountersGlobal_
     .type     _pmuEnableCountersGlobal_, %function
-    
+
 _pmuEnableCountersGlobal_:
 
+        stmfd sp!, {r0}
         mrc   p15, #0, r0, c9, c12, #0 
         orr   r0,  r0, #7
         mcr   p15, #0, r0, c9, c12, #0
+        ldmfd sp!, {r0}
         bx    lr
 
 /*-------------------------------------------------------------------------------*/
@@ -93,12 +97,14 @@
 
     .weak     _pmuDisableCountersGlobal_
     .type     _pmuDisableCountersGlobal_, %function
-    
+
 _pmuDisableCountersGlobal_:
 
+        stmfd sp!, {r0}
         mrc   p15, #0, r0, c9, c12, #0 
         bic   r0,  r0, #1
         mcr   p15, #0, r0, c9, c12, #0
+        ldmfd sp!, {r0}
         bx    lr
 
 /*-------------------------------------------------------------------------------*/
@@ -106,12 +112,14 @@
 
     .weak     _pmuResetCycleCounter_
     .type     _pmuResetCycleCounter_, %function
-    
+
 _pmuResetCycleCounter_:
 
+        stmfd sp!, {r0}
         mrc   p15, #0, r0, c9, c12, #0 
         orr   r0,  r0, #4
         mcr   p15, #0, r0, c9, c12, #0
+        ldmfd sp!, {r0}
         bx    lr
 
 /*-------------------------------------------------------------------------------*/
@@ -119,25 +127,29 @@
 
     .weak     _pmuResetEventCounters_
     .type     _pmuResetEventCounters_, %function
-    
+
 _pmuResetEventCounters_:
 
+        stmfd sp!, {r0}
         mrc   p15, #0, r0, c9, c12, #0 
         orr   r0,  r0, #2
         mcr   p15, #0, r0, c9, c12, #0
+        ldmfd sp!, {r0}
         bx    lr
 
 /*-------------------------------------------------------------------------------*/
-@ Reset Cycle Counter abd Event Counters [0..2]
+@ Reset Cycle Counter and Event Counters [0..2]
 
-    .weak     _pmuResetCounters_ 
+    .weak     _pmuResetCounters_
     .type     _pmuResetCounters_, %function
-    
+
 _pmuResetCounters_:
 
+        stmfd sp!, {r0}
         mrc   p15, #0, r0, c9, c12, #0 
         orr   r0,  r0, #6
         mcr   p15, #0, r0, c9, c12, #0
+        ldmfd sp!, {r0}
         bx    lr
 
 /*-------------------------------------------------------------------------------*/
@@ -145,7 +157,7 @@
 
     .weak     _pmuStartCounters_
     .type     _pmuStartCounters_, %function
-    
+
 _pmuStartCounters_:
 
         mcr   p15, #0, r0, c9, c12, #1
@@ -156,7 +168,7 @@
 
     .weak     _pmuStopCounters_
     .type     _pmuStopCounters_, %function
-    
+
 _pmuStopCounters_:
 
         mcr   p15, #0, r0, c9, c12, #2
@@ -166,8 +178,8 @@
 @ Set Count event
 
     .weak     _pmuSetCountEvent_
-    .type     _pmuSetCountEvent_, %function    
-    
+    .type     _pmuSetCountEvent_, %function
+
 _pmuSetCountEvent_:
  
         mcr   p15, #0, r0, c9, c12, #5 @ select counter
@@ -179,7 +191,7 @@
 
     .weak     _pmuGetCycleCount_
     .type     _pmuGetCycleCount_, %function
-    
+
 _pmuGetCycleCount_:
 
         mrc   p15, #0, r0, c9, c13, #0
@@ -190,9 +202,10 @@
 
     .weak     _pmuGetEventCount_
     .type     _pmuGetEventCount_, %function
-    
+
 _pmuGetEventCount_:
 
+        lsr   r0,  r0, #1
         mcr   p15, #0, r0, c9, c12, #5 @ select counter
         mrc   p15, #0, r0, c9, c13, #2 @ read event counter
         bx    lr
@@ -202,7 +215,7 @@
 
     .weak     _pmuGetOverflow_
     .type     _pmuGetOverflow_, %function
-    
+
 _pmuGetOverflow_:
 
         mrc   p15, #0, r0, c9, c12, #3 @ read overflow

  • Hi Bernd,

      Could you tell me if the PMU is working after you added  lsr   r0,  r0, #1? I try to generate both the TI compiler and GNU compiler code and I do see that the lsr   r0,  r0, #1 is missing in the GNU code.

  • Bernd Singer95 said:
    i.e.  the function _pmuGetEventCount_ misses the "lsr   r0,  r0, #1 " part...

    In the thread Errors in setting the Performance Counter Selection Register in HALCoGen 04.03.00? I suggested removing the initial lsr instruction which left-shifts the counter parameter by one bit before writing to the Performance Counter Selection Register, since the instruction meant the _pmuSetCountEvent_  and _pmuGetEventCount_  functions didn't operate as expected.

    However, the thread RM48 HDK pmu is the report from another user that removing the initial lsr instruction was an incorrect modification to the HALCoGen code.

    Bernd Singer95 said:
    As i have Code generated for the TI compiler and for the GNU toolchain, set up two projects for the same task - performance measurement.

    The code for the ti Compiler worked, but the same C-Code returned only zeros as measured time.

    Can you show your code which calls the PMU functions?

    I will try and find my original test code.

  • hi,
    after removing the described line in _pmuGetEventCount_ only zeros are returned.

    here is the calling Code compiled by gcc 5.4:

    52 int main(void) {
    main():
    0000965c: E92D4010 push {r4, lr}
    55 _pmuInit_();
    00009660: EB000313 bl #0xa2b4
    56 _pmuEnableCountersGlobal_();
    00009664: EB000327 bl #0xa308
    57 _pmuSetCountEvent_(pmuCOUNTER0, PMU_CYCLE_COUNT); // PMU_CYCLE_COUNT > PMU_INST_ARCH_EXECUTED
    00009668: E3A01011 mov r1, #0x11
    0000966c: E3A00001 mov r0, #1
    00009670: EB000346 bl #0xa390
    59 hetInit();
    00009674: EBFFEE8B bl hetInit
    61 spiInit();
    00009678: EBFFFBC7 bl spiInit
    initLeds():
    0000967c: E30B38FF movw r3, #0xb8ff
    00009680: E34F3FF7 movt r3, #0xfff7
    00009684: E3A02021 mov r2, #0x21
    00009688: E34A2A06 movt r2, #0xaa06
    0000968c: E50320B3 str r2, [r3, #-0xb3]
    64 initUdp();
    EB00076A bl initUdp
    70 _pmuResetCounters_();
    00009694: EB000333 bl #0xa368
    71 _pmuStartCounters_(pmuCOUNTER0);
    00009698: E3A00001 mov r0, #1
    0000969c: EB000337 bl #0xa380
    73 uint32_t tStart = _pmuGetEventCount_(pmuCOUNTER0);
    000096a0: E3A00001 mov r0, #1
    000096a4: EB00033E bl #0xa3a4
    000096a8: E1A04000 mov r4, r0
    74 uint32_t tCorrection = _pmuGetEventCount_(pmuCOUNTER0);
    000096ac: E3A00001 mov r0, #1
    000096b0: EB00033B bl #0xa3a4
    75 if ((tCorrection == 0 ) && (tStart == 0)) {
    000096b4: E1943000 orrs r3, r4, r0
    000096b8: 1A000003 bne #0x96cc
    76 printUdp( "\n Error using the performance Counter - Zero Values detected.");
    000096bc: E3030AEC movw r0, #0x3aec

    plain C Code:
    int main(void) {
    _pmuInit_();
    _pmuEnableCountersGlobal_();
    _pmuSetCountEvent_(pmuCOUNTER0, PMU_CYCLE_COUNT); // PMU_CYCLE_COUNT > PMU_INST_ARCH_EXECUTED
    /** - Configure NHET */
    hetInit();
    /** - Configure SPI */
    spiInit();
    initLeds( );
    /** Initialize minimal UDP */
    initUdp();

    /* check presets for Benchmark... */
    _pmuResetCounters_();
    _pmuStartCounters_(pmuCOUNTER0);
    // get time needed to call _pmuGetEventCount_() since counters were started, expect it is the same time as between two calls
    uint32_t tStart = _pmuGetEventCount_(pmuCOUNTER0);
    uint32_t tCorrection = _pmuGetEventCount_(pmuCOUNTER0);
    if ((tCorrection == 0 ) && (tStart == 0)) {
    printUdp( "\n Error using the performance Counter - Zero Values detected.");
    return -1;
    } else {
    runBench();
    }
    return 0;
    }

    Best Regards,
    Bernd
  • Hi,
    i read the Posts you mentioned.
    now, i see - the User was correct. The function was not implemented as the Interface description said.

    But the modification was not done completely.
    => HL_sys_pmu.h was not adapted to the change.
    Please adapt the Values for
    pmuCOUNTER0,
    pmuCOUNTER1,
    pmuCOUNTER2

    Best Regards,
    Bernd
  • Hi Bernd, Chester,

    I'm looking at the below code from your output. The pmuCOUNTER0 has #define pmuCOUNTER0 0x00000001U. This is why a #1 is moved to r0 register to begin with.

    73 uint32_t tStart = _pmuGetEventCount_(pmuCOUNTER0);
    000096a0: E3A00001 mov r0, #1
    000096a4: EB00033E bl #0xa3a4
    000096a8: E1A04000 mov r4, r0

    Once you are in the  _pmuGetEventCount_() the r0 will go through lsr   r0,  r0, #1 which is a logical shift right by one bit which will make r0 with zero again.

     According to the Cortex-R TRM excerpt below to select counter 0 you need b0000 in the r0. So having lsr   r0,  r0, #1 is correct.

    Can you please elaborate the below comments. I'm not clear what needs to adapt in the HL_sys_pmu.h.

    => HL_sys_pmu.h was not adapted to the change.
    Please adapt the Values for 
    pmuCOUNTER0,
    pmuCOUNTER1, 
    pmuCOUNTER2

  • according to HL_sys_pmu.h, the function is described as
    /** @fn uint32 _pmuGetEventCount_(uint32 counter)
    * @brief Returns current event counter value
    * @param[in] counter - Counter select 0..2
    *
    * @return event counter count.
    */

    according to HL_sys_pmu.h, the values (pmuCOUNTER0, ...) are defined as:

    /** @def pmuCOUNTER0
    * @brief pmu event counter 0
    *
    * Alias for pmu event counter 0
    */
    #define pmuCOUNTER0 0x00000001U

    /** @def pmuCOUNTER1
    * @brief pmu event counter 1
    *
    * Alias for pmu event counter 1
    */
    #define pmuCOUNTER1 0x00000002U

    /** @def pmuCOUNTER2
    * @brief pmu event counter 2
    *
    * Alias for pmu event counter 2
    */
    #define pmuCOUNTER2 0x00000004U

    Therefore, if defining the Interface to the pmu functions as values in Range of [0..2], the values above seem to be wrong or not applicable for these functions ...

    On the other Hand - if they not intended to be used for this, i dislike to uses Magic Numbers...
    Best Regards,
    Bernd
  • Hi Bernd,

     The three counters are defined as 0x00000001U, 0x00000002U, and 0x00000004U. As you have already found that if the lsr   r0,  r0, #1 is inserted in the _pmuGetEventCount_() then these three counter values after right shift will become 0, 1 and 2 which are the correct values to select the respective counter registers according to the below definition.

  • Bernd Singer95 said:
    Therefore, if defining the Interface to the pmu functions as values in Range of [0..2], the values above seem to be wrong or not applicable for these functions ...

    On the other Hand - if they not intended to be used for this, i dislike to uses Magic Numbers...
    Best Regards,

    Looking at the sys_pmu.h in a RM46L852 Cortex-R4F project created by HALCoGen 04.05.02 shows that pmuCOUNTER1, pmuCOUNTER2 and pmuCYCLE_COUNTER macros are bit-masks intended to be used in calls to the _pmuStartCounters_ and _pmuStopCounters_ functions:

    /** @def pmuCOUNTER0
    *   @brief pmu event counter 0 mask
    *
    *   Alias for pmu event counter 0 mask
    *
    *	@Note: Use this macro as a parameter 'counters' in APIs _pmuStartCounters_ and _pmuStopCounters_
    */
    #define pmuCOUNTER0 0x00000001U
    
    /** @def pmuCOUNTER1
    *   @brief pmu event counter 1 mask
    *
    *   Alias for pmu event counter 1 mask
    *
    *	@Note: Use this macro as a parameter 'counters' in APIs _pmuStartCounters_ and _pmuStopCounters_
    */
    #define pmuCOUNTER1 0x00000002U
    
    /** @def pmuCOUNTER2
    *   @brief pmu event counter 2 mask
    *
    *   Alias for pmu event counter 2 mask
    *
    *	@Note: Use this macro as a parameter 'counters' in APIs _pmuStartCounters_ and _pmuStopCounters_
    */
    #define pmuCOUNTER2 0x00000004U
    
    /** @def pmuCYCLE_COUNTER
    *   @brief pmu cycle counter mask
    *
    *   Alias for pmu event counter mask
    *
    *	@Note: Use this macro as a parameter 'counters' in APIs _pmuStartCounters_ and _pmuStopCounters_
    */
    #define pmuCYCLE_COUNTER 0x80000000U
    

    Whereas _pmuGetEventCount_ is documented as taking a counter number in the range 0..2:

    /** @fn uint32 _pmuGetEventCount_(uint32 counter)
    *   @brief Returns current event counter value
    *   @param[in] counter - Counter select 0..2
    *
    *   @return event counter count.
    */
    uint32 _pmuGetEventCount_(uint32 counter);
    

    I think the confusion is that in HALCoGen 04.05.02 the HL_sys_pmu.h which gets used for TMS570LC4357 Cortex-R5F has less informative comments for the pmuCOUNTER0, pmuCOUNTER1, pmuCOUNTER2 and pmuCYCLE_COUNTER macros, where the comments don't indicate the macros are bit-masks only for use with the _pmuStartCounters_ and _pmuStopCounters_ functions:

    /** @def pmuCOUNTER0
    *   @brief pmu event counter 0
    *
    *   Alias for pmu event counter 0
    */
    #define pmuCOUNTER0 0x00000001U
    
    /** @def pmuCOUNTER1
    *   @brief pmu event counter 1
    *
    *   Alias for pmu event counter 1
    */
    #define pmuCOUNTER1 0x00000002U
    
    /** @def pmuCOUNTER2
    *   @brief pmu event counter 2
    *
    *   Alias for pmu event counter 2
    */
    #define pmuCOUNTER2 0x00000004U
    
    /** @def pmuCYCLE_COUNTER
    *   @brief pmu cycle counter
    *
    *   Alias for pmu event counter
    */
    #define pmuCYCLE_COUNTER 0x80000000U
    

  • Bernd Singer95 said:
    after comparing HL_sys_pmu.asm (TI) and HL_sys_pmu.s (GNU gas), the Assembler Code differs.

    i.e.  the function _pmuGetEventCount_ misses the "lsr   r0,  r0, #1 " part...

    Which version of HALCoGen are you using?

    Looking in HALCoGen 04.05.02 (currently the "latest" version), shows that the _pmuGetEventCount_ is the following files are all the same, in that none of them have the  "lsr   r0,  r0, #1 " part:

    - The sys_pmu.asm used for a Cortex-R4F device (RM46L852) and the TI compiler

    - The sys_pmu.s used for a Cortex-R4F device (RM46L852) and the GCC compiler

    - The HL_sys_pmu.asm used for a Cortex-R5F device (TMS570LC4357) and the TI compiler

    - The HL_sys_pmu.s used for a Cortex-R5F device (TMS570LC4357) and the GCC compiler

  • Chester,
    thanks for looking into this. I just generated code using halcogen 04.05.02 and I do see that the lsr r0, r0, #1 is missing. In order for this to work, either the macro for the pmuCOUNTER0/1/2 need to change or the lsr r0, r0, #1 be inserted. I will open a CQ ticket to our HalCoGen team.
  • Charles Tsai said:
    In order for this to work, either the macro for the pmuCOUNTER0/1/2 need to change or the lsr r0, r0, #1 be inserted.

    If the macros for pmuCOUNTER0/1/2 are changed then the macros wouldn't be usable as inputs for the _pmuStartCounters_ and _pmuStopCounters_ functions.

    I think we need to understand that:

    a) The _pmuStartCounters_ and _pmuStopCounters_ functions take a bit-mask which can specify one or more PMU counters to operate on.

    b) The _pmuGetCycleCount_ function takes a counter select which specifies a single PMU counter to read.

    If the lsr r0, r0, #1 is inserted into the  _pmuGetCycleCount_ function then now realize the pmuCOUNTER0/1/2 macros can be used as the _pmuGetCycleCount_ counter parameters, but that is only valid due to having three PMU counters in the current Cortex-R4 and Cortex-R5 devices.

    E.g. if a future device had 4 PMU counters then you would need a new macro like #define pmuCOUNTER3 0x00000008U for use an input to the _pmuStartCounters_ and _pmuStopCounters_ functions. However pmuCOUNTER3 left-shifted by one would be the value 4, which wouldn't map to the correct counter selection of 3.

  • Hi Chester,

     _pmuGetEventCount function takes input as 0, 1, or 2.  I know it is little confusing since mask is not used.. But we have to do this way to maintain compatibility.  There is already a ticket raised on this few years back, after some internal discussion we decided to keep it this way. 

    I took this to the developer's notice already.  Please do let us know if you have further questions. Thanks.

  • Hi,

    The functions _pmuGetEventCount_ and _pmuSetCountEvent_ take in the input parameter counter select like 0,1 or 2.

    /** @fn uint32 _pmuGetEventCount_(uint32 counter)
    *   @brief Returns current event counter value
    *   @param[in] counter - Counter select 0..2
    *
    *   @return event counter count.
    */
    uint32 _pmuGetEventCount_(uint32 counter);

    The macro is not expected to be given as the parameter for these functions. The macros are given as parameter where the the functions expects a counter mask as in the function _pmuStartCounters_. It may be difficult to move it the other way since that might affect the compatibility.

    Thanks and Regards,

    Veena

  • Thanks!
    This is exactly what i thougt. I never knew the other file...

    Adding the word "mask" or the word "number" would help.

    For programming safety software, names should be taken carefully to reduce the risk of misunderstanding.

    In this case it is a Mask, but is named pmu_COUNTER0 ...
    Therefore, as a beginner for TMS570 CPUs i was not aware of the bug in my Colleagues Code (using the Macro as Counter Number..

    for me it is solved, but you may take this into account on next Versions of HALCoGen..


    Best Regards,

    Bernd

  • You are correct. all i did was using a new HALCoGen (4.05.02) with old Code using the pmuCOUNTER0 as definition for Counter Number 0.
    (This Code was created by a prevous Colleague of mine... - just as a Prototype to see, how TS570 on HDK works...)

    Due to the missing "mask" in this definition, i did not find this bug.

    Secondly, _pmuGetEventCount_() was changed to fulfill it's interface. after that, _pmuGetEventCount_(pmuCOUNTER0) did not work
    (not surprising...)

    all in all it was a mixture of changes on Interfaces, misunderstandable Names, me new to TMS570, HDK, HALCoGen...

    At last i suggest to prevent Users from errors by renaming "pmuCOUNTER0" to see from the name, that this is a mask.