This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

TDA4VM: 9G and 2G mac2mac,A72 hang

Part Number: TDA4VM

hi:

  • testing environment2Network port9G and 2G switch mac2mac
  • A72 -> 9G -> 2G ->MCUTDA4 R5, udp packet, len 400, 400hzIn about 5 minutes, A72 will hang up, A72 will stop printing, and there is no abnormal printoutA72and MCU cannot pingMCU serial port printing is normal
  • The packet becomes smaller, the frequency increases, the result the same as 2
  • Reduce the frequency and packet size, you can run longer, but eventually hang up

A72 logno abnormal printout):

test programe:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <time.h>
#include <unistd.h>
#include <pthread.h>
#include <signal.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <string.h>
#include <errno.h>
#define CONF_FILE_PATH    "Config.ini"

typedef struct {
    int dstport;
    int sockfd;
    int port;
}wm_t;

typedef struct {
    unsigned long   cnt;
    struct timespec tx;
}header_t;

static wm_t s_wm_info[100];
static int s_socket_fd[10] = {-1};
static void vti_pf_gettm(struct timespec *tm)
{
    struct timespec ts = {0};
    clock_gettime(CLOCK_REALTIME, &ts);
    tm->tv_sec = ts.tv_sec;
    tm->tv_nsec = ts.tv_nsec;
    return ;
}
static void *thread_func(void *arg){

    wm_t *p = (wm_t *)arg;
    int counter = 0;
    int  ret;
    unsigned long lost = 0;
    unsigned long last_cnt = 0;

    char buf[1500];  
    socklen_t len;
    int count;
    struct sockaddr_in clent_addr;  
    int server_fd = p->sockfd;
    
    if (server_fd < 0){

        printf("error sockfd %d\n", server_fd);
        return ;
    }

    while(1)
    {
        memset(buf, 0, sizeof(buf));
        len = sizeof(clent_addr);
        count = recvfrom(server_fd, buf, 1500, 0, (struct sockaddr*)&clent_addr, &len);  //recvfrom是拥塞函数,没有数据就一直拥塞
        if(count == -1)
        {
            printf("recieve data fail!\n");
            continue;
        }
        header_t *k = (header_t *)buf;
        unsigned char v = k->cnt - last_cnt;
        if (v > 1){
            //TODO pub lost samples
            lost += (v - 1);
        }
        struct timespec rx;
        vti_pf_gettm(&rx);

        unsigned long delay = (rx.tv_sec % 100 ) * 1000000 + rx.tv_nsec / 1000 - ((k->tx.tv_sec % 100)*1000000 + k->tx.tv_nsec / 1000);
        if(k->cnt % 100 == 0)
            printf("port :%d lost %lu  delay us: %lu \n", p->port, lost, delay);

        
    }

}

static void *thread_func_tx(void *arg){
    unsigned long counter = 0;
    int client_fd;
    char buf[1500];
    struct sockaddr_in ser_addr;
    wm_t *p = (wm_t *)arg;
    client_fd = p->sockfd;
    if(client_fd < 0)
    {
        printf("socket error!\n");
        return ;
    }

    memset(&ser_addr, 0, sizeof(ser_addr));
    ser_addr.sin_family = AF_INET;
    ser_addr.sin_addr.s_addr = inet_addr("192.168.1.4");
    ser_addr.sin_port = htons(p->dstport);  //注意网络序转换

    while(1){
        memset(buf, 0, sizeof(buf));
        header_t *v = (header_t *)buf;
        v->cnt = counter;
        vti_pf_gettm(&v->tx);
        int rc = sendto(client_fd, buf, 400, 0, (struct sockaddr *)&ser_addr, sizeof(struct sockaddr));
        if (rc < 0){
            printf("send error %d\n", errno); 
            continue;
        }
        counter++;
        usleep(3000);
    }
}

int main(int argc, const char **argv){
    int server_fd, ret;
    struct sockaddr_in ser_addr; 
    pthread_t   sync_process;
    pthread_t   sync_process1;
    int i = 0;
    for(i = 0; i < 3; i++){
        s_wm_info[i].port = 5000 + i;
        s_wm_info[i].dstport = 5000 + i;
        server_fd = socket(AF_INET, SOCK_DGRAM, 0);
        if(server_fd < 0)
        {
            printf("create socket fail!\n");
            return -1;
        }
        memset(&ser_addr, 0, sizeof(ser_addr));
        ser_addr.sin_family = AF_INET;
        ser_addr.sin_addr.s_addr = htonl(INADDR_ANY); 
        ser_addr.sin_port = htons(s_wm_info[i].port); 

        ret = bind(server_fd, (struct sockaddr*)&ser_addr, sizeof(ser_addr));
        if(ret < 0)
        {
            printf("socket bind fail!\n");
            return -1;
        }
        s_wm_info[i].sockfd = server_fd;

    }

    for (i = 0; i < 3; i++){
        
        printf("[udp] port:%d  socketfd:%d\n", 
               s_wm_info[i].port, s_wm_info[i].sockfd);   
        if (pthread_create(&sync_process, NULL/*&s_thread_attr*/, thread_func, &s_wm_info[i]) != 0){
            printf("thread create failed!\n");
            return 0;
        }

        if (pthread_create(&sync_process1, NULL/*&s_thread_attr*/, thread_func_tx, &s_wm_info[i]) != 0){
            printf("thread create failed!\n");
            return 0;
        }
    }
        while(1){
        usleep(100000);
    }
}

pelase give us some advise,thanks!

  • Hi,

    I will check and get back to you.

    Regards

    Vineet

  • Hi,

    I tried looking at the socket application, but there are no comments or descriptions inside. To expedite things, can you provide the network diagram and network capture file so I can quickly reproduce.

    Regards

    Vineet

  • mcu.part01.rarmcu.part02.rar

    i use tcpdump to capture network diagram. 

  • Hi,

    I tried to recreate the scenario with iperf + UDP. Here is my setup

    Test 1 (CPSW 9G) :

    PC (server) -----> EVM (client)

    EVM (client) ------> PC (server)

    Commands:

    Server : iperf -s -u

    Client : iperf -c <server IP> -u -b 4000K -l 400 -t 1800 -i 2

    Test 2 : Same as above, but with CPSW 2G

    You can see the bandwidth output on PC showing bidirectional traffic

    The bandwidth is higher than what is generated by your socket application.

    No crash seen after running for 30 min. In fact the CPU load is less than 2% with all this running.

    Regards

    Vineet

  • Please provide the kernel crash log (if any) and the output of 'top' command just before the crash.

    Regards

    Vineet

  • Hi Vineet:

    Thanks for help us reproduce this issue. At customer board, just A72 -> 9G -> 2G ->MCUTDA4 R5)can find A72 crash issue. If use PC -> 9G -> 2G ->MCU(TDA4 R5), PC -> 9G -> A72 customer can find find A72 crash.

    Attached is A72 crash kernel log, wish it can give you some information about A72 side clue.

    Best Regards!

    Han tao

    ARPING 192.168.1.58 from 192.168.1.12 eth0
    64 bytes from 192.168.1.60: seq=5800 ttl=128 time=2.907 ms
    Sent 1 probe(s) (0 broadcast(s))
    Received 0 response(s) (0 request(s), 0 broadcast(s))
    ARPING 192.168.1.59 from 192.168.1.12 eth0
    64 bytes from 192.168.1.60: seq=5801 ttl=128 time=2.936 ms
    Unhandled Exception in EL3.
    x30            = 0x00000000700054c0
    x0             = 0xffff000849df3000
    x1             = 0xffff800015b2fb78
    x2             = 0x0000000000000003
    x3             = 0x0000000000000001
    x4             = 0x0000000000000001
    x5             = 0x000000000000000b
    x6             = 0xffff800010864230
    x7             = 0xffff000844006e80
    x8             = 0xffff00084d8fa440
    x9             = 0xffff800015b2f870
    x10            = 0x00000000000009e0
    x11            = 0x0000000000000000
    x12            = 0x0000000000000001
    x13            = 0x0000000000000000
    x14            = 0x0000000000000000
    x15            = 0x0000000000000000
    x16            = 0x0000000000000000
    x17            = 0x0000000000000000
    x18            = 0x0000000000000000
    x19            = 0xffff800015b2fb78
    x20            = 0xffff000849df3000
    x21            = 0xffff00084c5b0680
    x22            = 0x0000000000000000
    x23            = 0x0000000000000000
    x24            = 0xffff000849df3001
    x25            = 0x0000000000000020
    x26            = 0xffff000849df3000
    x27            = 0xffff800015b2fa84
    x28            = 0x0000000000000019
    x29            = 0xffff800015b2f9a0
    scr_el3        = 0x000000000000073d
    sctlr_el3      = 0x0000000030cd183f
    cptr_el3       = 0x0000000000000000
    tcr_el3        = 0x0000000080803520
    daif           = 0x00000000000002c0
    mair_el3       = 0x00000000004404ff
    spsr_el3       = 0x0000000080000005
    elr_el3        = 0xffff800010864250
    ttbr0_el3      = 0x0000000070010b00
    esr_el3        = 0x0000000092000016
    far_el3        = 0xffff00084c5b06a0
    spsr_el1       = 0x0000000040000005
    elr_el1        = 0xffff800010086de8
    spsr_abt       = 0x0000000000000000
    spsr_und       = 0x0000000000000000
    spsr_irq       = 0x0000000000000000
    spsr_fiq       = 0x0000000000000000
    sctlr_el1      = 0x0000000034d4d91d
    actlr_el1      = 0x0000000000000000
    cpacr_el1      = 0x0000000000300000
    csselr_el1     = 0x0000000000000000
    sp_el1         = 0xffff800015b2f9a0
    esr_el1        = 0x0000000056000000
    ttbr0_el1      = 0x00000008cc710a00
    ttbr1_el1      = 0x016c000082e00000
    mair_el1       = 0x0000bbff440c0400
    amair_el1      = 0x0000000000000000
    tcr_el1        = 0x00000034f5507510
    tpidr_el1      = 0xffff80086ec30000
    tpidr_el0      = 0x0000ffff9912f8e0
    tpidrro_el0    = 0x0000000000000000
    par_el1        = 0x0000000000000000
    mpidr_el1      = 0x0000000080000001
    afsr0_el1      = 0x0000000000000000
    afsr1_el1      = 0x0000000000000000
    contextidr_el1 = 0x0000000000000000
    vbar_el1       = 0xffff800010081800
    cntp_ctl_el0   = 0x0000000000000005
    cntp_cval_el0  = 0x0000011699bb651d
    cntv_ctl_el0   = 0x0000000000000000
    cntv_cval_el0  = 0x0000000000000000
    cntkctl_el1    = 0x00000000000000e6
    sp_el0         = 0xffff00084d8f9a00
    isr_el1        = 0x0000000000000040
    dacr32_el2     = 0x0000000000000000
    ifsr32_el2     = 0x0000000000000000
    cpuectlr_el1   = 0x0000001b00000040
    cpumerrsr_el1  = 0x0000000000000000
    l2merrsr_el1   = 0x0000000000000000
    

  • Hi,

    Thanks. I will try to run iperf between 9G and 2G.

    Regards

    Vineet

  • Hi,

    I connected CPSW 9G and 2G and ran iperf in both directions simultaneously with the commands mentioned previously but did not see any issues.

    Regards

    Vineet

  • Hi Vineet:

    Thanks for your help. This problem is trigger by MCU domain SPI driver. Customer resolve this problem through change MCU domain SPI driver.

    I will close this issue.

    Best Regards!

    Han Tao