This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

AM3358: PRU CGT and packed attribute

Part Number: AM3358


I have a header file containing a structure definition used by the PRU and shared with ARM. The PRU does not have alignment constraints. As a result I would expect the compiler to generate a packed struct by default. The ARM core however does have alignment constraints. So I give ARM a hint by adding __attribute__((packed)) to my struct definition.

Doing this results in the PRU pulling data byte by byte to reassemble individual values. I understand why this can happen and on certain architectures it absolutely makes sense. However given the PRU could care less about alignment shouldn't the compiler just ignore the directive?

I feel like a shared header file is a fairly common scenario. If someone isn't paying attention they are going to take a pretty significant performance hit on memory access. Granted I can #ifdef __KERNEL__ my way out of this issue quite easily. I am just curious if this is really the desired compiler behavior when specifying __attribute__((packed))?

  • Hello Andrew,

    I will check with the compiler team. What is being generated when the PRU is pulling data byte by byte?

    Regards,
    Nick
  • Hello Andrew,

    Let me know what the output looks like when the PRU is pulling data byte by byte, and I'll pass it on to the compiler team.

    Regards,
    Nick
  • Sorry that it took so long for me to respond. I have been out of the office and attempting to catch back up. Here is a simple example that demonstrates the behavior. Define some arbitrary data structures.

    struct __attribute__ ((__packed__)) packed_struct {
    	uint8_t a8;
    	uint16_t b16;
    	uint32_t c32;
    };
    
    struct unpacked_struct {
    	uint8_t a8;
    	uint16_t b16;
    	uint32_t c32;
    };
    
    typedef struct {
    	volatile struct packed_struct packed;
    	volatile struct unpacked_struct unpacked;
    } packing_test;
    
    volatile __far packing_test test __attribute__((cregister("PRU_ARM_SMEM", near), peripheral));

    do the following in your test program

    	test.packed.c32 = test.unpacked.c32;
    	test.unpacked.c32 = test.packed.c32;
    	test.packed.b16 = test.unpacked.b16;
    	test.unpacked.b16 = test.packed.b16;

    compiler generates the following:

    	.dwpsn	file "test.c",line 378,column 2,is_stmt,isa 0
            LDI32     r0, ||test||+3        ; [ALU_PRU] |378| test
            LBCO      &r1, __PRU_CREG_PRU_ARM_SMEM, $CSBREL(||test||+10), 4 ; [ALU_PRU] |378| test
            LSR       r14, r1, 0x08         ; [ALU_PRU] |378| 
            SBBO      &r1.b0, r0, 0, 1      ; [ALU_PRU] |378| 
            SBBO      &r14.b0, r0, 1, 1     ; [ALU_PRU] |378| 
            LSR       r14, r1, 0x10         ; [ALU_PRU] |378| 
            LSR       r1, r1, 0x18          ; [ALU_PRU] |378| 
            SBBO      &r14.b0, r0, 2, 1     ; [ALU_PRU] |378| 
            SBBO      &r1.b0, r0, 3, 1      ; [ALU_PRU] |378| 
    	.dwpsn	file "test.c",line 379,column 2,is_stmt,isa 0
            LBBO      &r1.b0, r0, 1, 1      ; [ALU_PRU] |379| 
            LSL       r1, r1.b0, 0x08       ; [ALU_PRU] |379| 
            LBBO      &r14.b0, r0, 0, 1     ; [ALU_PRU] |379| 
            OR        r14, r14.b0, r1       ; [ALU_PRU] |379| 
            LBBO      &r1.b0, r0, 2, 1      ; [ALU_PRU] |379| 
            LSL       r1, r1.b0, 0x10       ; [ALU_PRU] |379| 
            OR        r1, r14, r1           ; [ALU_PRU] |379| 
            LBBO      &r0.b0, r0, 3, 1      ; [ALU_PRU] |379| 
            LSL       r0, r0.b0, 0x18       ; [ALU_PRU] |379| 
            OR        r0, r1, r0            ; [ALU_PRU] |379| 
            SBCO      &r0, __PRU_CREG_PRU_ARM_SMEM, $CSBREL(||test||+10), 4 ; [ALU_PRU] |379| test
    	.dwpsn	file "test.c",line 380,column 2,is_stmt,isa 0
            LDI32     r0, ||test||+1        ; [ALU_PRU] |380| test
            LBCO      &r14.w0, __PRU_CREG_PRU_ARM_SMEM, $CSBREL(||test||+8), 2 ; [ALU_PRU] |380| test
            MOV       r1, r14.w0            ; [ALU_PRU] |380| 
            SBBO      &r1.b0, r0, 0, 1      ; [ALU_PRU] |380| 
            LSR       r1, r14.w0, 0x08      ; [ALU_PRU] |380| 
            SBBO      &r1.b0, r0, 1, 1      ; [ALU_PRU] |380| 
    	.dwpsn	file "test.c",line 381,column 2,is_stmt,isa 0
            LBBO      &r1.b0, r0, 1, 1      ; [ALU_PRU] |381| 
            LSL       r1, r1.b0, 0x08       ; [ALU_PRU] |381| 
            LBBO      &r0.b0, r0, 0, 1      ; [ALU_PRU] |381| 
            OR        r0, r0.b0, r1         ; [ALU_PRU] |381| 
            SBCO      &r0.w0, __PRU_CREG_PRU_ARM_SMEM, $CSBREL(||test||+8), 2 ; [ALU_PRU] |381| test

    if you drop the packed attribute from the packed_struct definition you get the following

    	.dwpsn	file "test.c",line 378,column 2,is_stmt,isa 0
            LBCO      &r0, __PRU_CREG_PRU_ARM_SMEM, $CSBREL(||test||+10), 4 ; [ALU_PRU] |378| test
            SBCO      &r0, __PRU_CREG_PRU_ARM_SMEM, $CSBREL(||test||+3), 4 ; [ALU_PRU] |378| test
    	.dwpsn	file "test.c",line 379,column 2,is_stmt,isa 0
            LBCO      &r0, __PRU_CREG_PRU_ARM_SMEM, $CSBREL(||test||+3), 4 ; [ALU_PRU] |379| test
            SBCO      &r0, __PRU_CREG_PRU_ARM_SMEM, $CSBREL(||test||+10), 4 ; [ALU_PRU] |379| test
    	.dwpsn	file "test.c",line 380,column 2,is_stmt,isa 0
            LBCO      &r0.w0, __PRU_CREG_PRU_ARM_SMEM, $CSBREL(||test||+8), 2 ; [ALU_PRU] |380| test
            SBCO      &r0.w0, __PRU_CREG_PRU_ARM_SMEM, $CSBREL(||test||+1), 2 ; [ALU_PRU] |380| test
    	.dwpsn	file "test.c",line 381,column 2,is_stmt,isa 0
            LBCO      &r0.w0, __PRU_CREG_PRU_ARM_SMEM, $CSBREL(||test||+1), 2 ; [ALU_PRU] |381| test
            SBCO      &r0.w0, __PRU_CREG_PRU_ARM_SMEM, $CSBREL(||test||+8), 2 ; [ALU_PRU] |381| test

  • Just to clarify, the shared memory is located in PRU0 Data RAM.
  • Hello Andrew,

    Great, thank you for bringing this up. The compiler team says this is a gap in the compiler: You are right, the PRU assembly for a packed and unpacked structure should not result in that different assembly. I have submitted a bug report, so it should get addressed sometime down the road. For now, your #ifdef workaround sounds reasonable.

    Regards,
    Nick