This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

AM625: UART with DMA takes longer in continues sending

Part Number: AM625

Tool/software:  SDK-10.00.07.04

When configured with DMA, the serial port of AM62x exhibits lower efficiency in continuous data transmission compared to when not using DMA.

The detailed steps as follows:

1.Configuration in DTS

uart4==>/dev/ttyS6

uart5==>/dev/ttyS7

&main_uart4 {
	bootph-all;
	status = "okay";
	pinctrl-names = "default";
	pinctrl-0 = <&main_uart4_pins_default>;
	//dmas = <&main_pktdma 0x4404 0>, <&main_pktdma 0xc404 0>;
	//dma-names = "rx", "tx";
};

&main_uart5 {
	bootph-all;
	status = "okay";
	pinctrl-names = "default";
	pinctrl-0 = <&main_uart5_pins_default>;
	dmas = <&main_pktdma 0x4405 0>, <&main_pktdma 0xc405 0>;
    dma-names = "rx", "tx";
};

2.Test code

#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>        //exit()
#include <signal.h>
#include <string.h>

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <termios.h>
#include <time.h>

#define SEND_PKG_SIZE 10000

#define BUF_SIZE 70

int baudSetOpt(int fd,int nSpeed,int nBits,char nEvent,int nStop);

int main(int argc, char *argv[])
{
	
	int uart_fd = 0;
	int baudrate = 0;
	//int sendCount = 0;
	int sendSize = BUF_SIZE;
	int count = 0;
	int ret = 0;
	int i = 0;
	
	unsigned char wBuf[BUF_SIZE];
	
	for(i = 0; i < BUF_SIZE; i++)
	{
		wBuf[i] = i & 0xFF;
	}
	
	//for(i = 0; i < BUF_SIZE; i++)
	//{
	//	if(i%16 == 0)
	//		printf("\n");
	//	printf("%d  ", wBuf[i]);
	//}
	
	if(argc != 3)
	{
		printf("param error !\n");
		printf("please input: ./main /dev/ttyS7 460800\n");
		return -1;
	}
	
	baudrate = atoi(argv[2]);

	uart_fd = open(argv[1], O_RDWR | O_NONBLOCK);
	if(uart_fd < 0)
	{
		printf("%s open file !\n",argv[1]);
		return -1;
	}
	
	ret = baudSetOpt(uart_fd, baudrate, 8, 'N', 1);
	if(ret == -1)
	{
		printf("%s set baudrate fail !\n", argv[1]);
		return -1;
	}
	
	long long int err_sum = 0;
	
	//记录时间戳
	unsigned long long int start_time = 0;
	time_t get_time = 0;	
	time(&get_time); //时间
	start_time = get_time;
	
	for(i=0; i<SEND_PKG_SIZE; i++)
	{
		count = 0;
		while(sendSize != count)
		{
			ret = write(uart_fd, &wBuf[count], sendSize-count);
			if(ret >= 0)
			{
				count += ret;
			}
			else
			{	
				err_sum++;
				//printf("ret = %d count = %d \n", ret, count);
				usleep(10000); //10ms
				
				//perror("write");
				//continue;
			}
		}
		//printf(" count = %d \n", count);
	}
	
	time(&get_time); //时间
	printf(" error times %lld, elapsed time %lld s\n", err_sum, get_time - start_time);	
	
	close(uart_fd);
	return 0;
}

int baudSetOpt(int fd,int nSpeed,int nBits,char nEvent,int nStop)
{
	struct termios newtio;
	bzero( &newtio, sizeof( newtio ) );
	cfmakeraw(&newtio);
	tcflush(fd,TCIFLUSH);
	
	newtio.c_cflag &= ~CRTSCTS; // no stream ctrl
	newtio.c_cflag |= CLOCAL | CREAD;   /* | CRTSCTS //stream strl*/
	newtio.c_cflag &= ~CSIZE;
	switch( nBits )
	{
		case 7:
			newtio.c_cflag |= CS7;
			break;
		case 6:
			newtio.c_cflag |= CS6;
			break;
		case 5:
			newtio.c_cflag |= CS5;
			break;
		default:
			newtio.c_cflag |= CS8;
			break;

	}
	
	switch( nEvent )
	{
		case 'o':
		case 'O':
			newtio.c_cflag |= PARENB;
			newtio.c_cflag |= PARODD;
		//	newtio.c_iflag |= (INPCK | ISTRIP);
			break;
			
		case 'e':
		case 'E': 	

			newtio.c_cflag |= PARENB;
			newtio.c_cflag &= ~PARODD;		

			break;
		case 'n':
		case 'N':
			newtio.c_cflag &= ~PARENB;
			break;
		default:
			newtio.c_cflag |= PARENB;
			newtio.c_cflag &= ~PARODD;
			break;
	}
	
	switch( nSpeed )
	{
		case 300:
			cfsetispeed(&newtio, B300);
			cfsetospeed(&newtio, B300);
			break;				
		case 600:
			cfsetispeed(&newtio, B600);
			cfsetospeed(&newtio, B600);
			break;				
		case 1200:
			cfsetispeed(&newtio, B1200);
			cfsetospeed(&newtio, B1200);
			break;				
		case 2400:
			cfsetispeed(&newtio, B2400);
			cfsetospeed(&newtio, B2400);
			break;
		case 4800:
			cfsetispeed(&newtio, B4800);
			cfsetospeed(&newtio, B4800);
			break;
		case 9600:
			cfsetispeed(&newtio, B9600);
			cfsetospeed(&newtio, B9600);
			break;
		case 19200:
			cfsetispeed(&newtio, B19200);
			cfsetospeed(&newtio, B19200);
			break;
		case 38400:
			cfsetispeed(&newtio, B38400);
			cfsetospeed(&newtio, B38400);
			break;
		case 57600:
			cfsetispeed(&newtio, B57600);
			cfsetospeed(&newtio, B57600);
			break;
		case 115200:
			cfsetispeed(&newtio, B115200);
			cfsetospeed(&newtio, B115200);
			break;
		case 460800:
			cfsetispeed(&newtio, B460800);
			cfsetospeed(&newtio, B460800);
			break;
		default:
			cfsetispeed(&newtio, B9600);
			cfsetospeed(&newtio, B9600);
			break;
	}

	if( nStop == 1 )
	{
		newtio.c_cflag &=  ~CSTOPB;
	}
	else if ( nStop == 2 )
	{
		newtio.c_cflag |=  CSTOPB;
	}
	newtio.c_cc[VTIME]  = 1;
	newtio.c_cc[VMIN] = 1;
				
	newtio.c_oflag = 0;
	newtio.c_lflag |= 0;
	newtio.c_oflag &= ~OPOST;
	newtio.c_cc[VTIME] = 1;     /* unit: 1/10 second. */
	newtio.c_cc[VMIN] = 1;      /* minimal CHARacters for reading */
	
	if((tcsetattr(fd,TCSANOW,&newtio))!=0)
	{
		return -1;
	}
	
	return 0;	    
}
 

aarch64-oe-linux-gcc -mbranch-protection=standard --sysroot=/opt/arago-2023.10/sysroots/aarch64-oe-linux main.c -o main

3. Test results

Send 700,000 bytes at 460,800bps.

root@am62xx-evm:/mnt/nfs/am62x# ./main /dev/ttyS6 460800
error times 1500, elapsed time 15 s

root@am62xx-evm:/mnt/nfs/am62x# ./main /dev/ttyS7 460800
error times 31604, elapsed time 318 s

error times denotes write fail return -1 times, once write fail, account +1, sleep 10ms, then write again.

The UART using DMA takes longer, please help to analysis.

  • Hi Fuyi,

    It is known that DMA doesn't perform well with UART on AM62x. Why do you need DMA in your UART use case?

  • Hi Bin Liu,

    We need to continuously send/receive a large amount of data through the serial port. In the absence of DMA, when working simultaneously with other peripherals (such as LCD, SDIO WiFi, SPI, I2C, USB, NET, etc.), the serial port can generate a significant number of overruns/errors (OE), leading to abnormal device functionality. We encountered this issue when developing the AM335x kernel version 3.2. We require UART DMA.

  • Hi Bin,

    UART is very important for industry, especially for medical application. since it is a known issue, is it under analysis, is there chance to fix?

    Is there explanation regarding the result of DMA?

  • Hi Fuyi,

    DMA in general is to off load CPU, but it doesn't mean to be transferring data faster, so it shouldn't have impact on issues such as overruns. The UART performance on AM625x should have greater difference than that on AM335x, because of different SoC architecture, ARM families and number of cores, and clock speed, etc.

    Have you run into any issue on AM625x UART without DMA?

    Tony,

    We haven't done any deep analysis on the DMA performance on AM62x UART. We had several customers initially ran into different UART issues with or without DMA, but most of them (if not all) eventually resolved without using DMA. So it isn't urgently required to analyze how exactly DMA performs on UART.

  • Hi Bin,

    #1. As the application will need to run up to 4Mbps, even 3 ports at 4Mbps, customer think without DMA will cause CPU high loading CPU, and drop data.

    #2. They said even on i.MX8M Plus, also drop data if not use DMA at 4Mbps.

    #3. From experience on AM335x, DMA can help to resolve UART data drop issue.

  • Tony,

    Let me re-evaluate the case then discuss with the dev team. I might not have an immediate update, since I am working on a release coming in two weeks. 

  • Customer did experiments without DMA get abnormal result as below:

    Shorten UART_TX and UART_RX on the UART to be tested with to do external loopback test.

    Run command as below to loop 40 times, 10,000byte data file, 4Mbps, loopback mode.

    time serialcheck -b 4000000 -d /dev/ttyS3 -f ./binary_1w_ttyS3 -l 40 -m d

    It should take ~1 second to transfer 400 000byte in 4Mbps in theory, but test result in 6m 57.35s

  • Hi Tony,

    kernel does tasks scheduling, so the real transfer time would be slower than the theory time, but 7 minutes seem to be way too slow. Has the customer measured the UART output that it is indeed transmitted at 4Mbps? I don't think the TRM has 4M baud listed, and not sure how '-b 4000000' parameter is configured to the UART controller.

  • Hi Bin,

    Measured 3Mbps and 4Mbps separately, it follows the configuration in command:

    3Mbps:

    4Mbps:

  • Hi Tony,

    I did the similar test on my evm, and found baud 4Mbps is really slow, as you said, but baud 3Mbps or 2Mbps would finished within a couple seconds.

    Checking the TRM, Table 12-113 "UART Baud Rate Settings", the highest baud rate is 3.6884Mbps, which is when DLH,DLL register is set to 1. So I don't think the UART controller is configured properly when setting baud to 4Mbps, but I run out of time today to understand how exactly DLH,DLL is set with 4Mbps. I will be out next week.

  • Bin,

    Yes, some baud rate behave abnormal. test with serialcheck with SDK10.0.

    Baud rate channel test result
    4Mbps 1 abnormal
    3Mbps 3 normal
    3.5Mbps 2 normal
    2Mbps 3 abnormal
    1.5Mbps 3 abnormal
  • Tony, Bin is OoO this week. Please expect delayed response till next week.

  • Hi Tony,

    As I explained above, we already know 4Mbps cannot be reached. What is "abnormal" with 2Mbps and 1.5Mbps, slow transfer or something else?

  • Hi Bin,

    Abnormal means slow transfer. 

    we already know 4Mbps cannot be reached.

    But captured 4Mbps waveform from upper picture 2, and data is correct. 

  • Hi Tony,

    I don't see slow transfer with 2Mbps and 1.5Mbps.

    root@am62xx-evm:~# time serialcheck -d /dev/ttyS5 -f data-10k.dat -l 40 -m d -b 3000000
    [  100.423427] omap8250 2850000.serial: mdr1 mode 3, div 0x0181
    [  100.429282] omap8250 2850000.serial: mdr1 mode 0, div 0x0001
    Needed 38 reads 1 writes loops 40 / 40
    [  101.980038] omap8250 2850000.serial: mdr1 mode 3, div 0x0181
    cts: 0 dsr: 0 rng: 0 dcd: 0 rx: 400000 tx: 400000 frame 0 ovr 0 par: 0 brk: 0 buf_ovrr: 0
    real    0m 1.59s
    user    0m 0.00s
    sys     0m 0.09s
    root@am62xx-evm:~# time serialcheck -d /dev/ttyS5 -f data-10k.dat -l 40 -m d -b 2000000
    [  113.447662] omap8250 2850000.serial: mdr1 mode 3, div 0x0181
    [  113.453514] omap8250 2850000.serial: mdr1 mode 3, div 0x0002
    Needed 38 reads 1 writes loops 40 / 40
    [  115.905578] omap8250 2850000.serial: mdr1 mode 3, div 0x0181
    cts: 0 dsr: 0 rng: 0 dcd: 0 rx: 400000 tx: 400000 frame 0 ovr 0 par: 0 brk: 0 buf_ovrr: 0
    real    0m 2.49s
    user    0m 0.01s
    sys     0m 0.14s
    root@am62xx-evm:~# time serialcheck -d /dev/ttyS5 -f data-10k.dat -l 40 -m d -b 1500000
    [  128.485086] omap8250 2850000.serial: mdr1 mode 3, div 0x0181
    [  128.490943] omap8250 2850000.serial: mdr1 mode 0, div 0x0002
    Needed 38 reads 1 writes loops 40 / 40
    [  131.350892] omap8250 2850000.serial: mdr1 mode 3, div 0x0181
    
    cts: 0 dsr: 0 rng: 0 dcd: 0 rx: 400000 tx: 400000 frame 0 ovr 0 par: 0 brk: 0 buf_ovrr: 0
    real    0m 2.89s
    user    0m 0.00s
    sys     0m 0.16s

    I added the following patch to the kernel uart driver to see how the baud rate is configured.

    diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
    index bef71a627a94..6b795bbce680 100644
    --- a/drivers/tty/serial/8250/8250_omap.c
    +++ b/drivers/tty/serial/8250/8250_omap.c
    @@ -275,6 +275,7 @@ static void omap_8250_get_divisor(struct uart_port *port, unsigned int baud,
                    priv->mdr1 = UART_OMAP_MDR1_13X_MODE;
                    priv->quot = div_13;
            }
    +       dev_info(port->dev, "mdr1 mode %d, div 0x%04x\n", priv->mdr1, priv->quot);
     }
     
     static void omap8250_update_scr(struct uart_8250_port *up,

    Please see the baud rate table 12-113 in the TRM, the max baud is 3.6884Mbps with DLH/DLL are configured to 1 and using 13x baud multiple. There is impossible to configure to 4Mbps. You can see the failure in stty command when trying for 4Mbps:

    root@am62xx-evm:~# stty -F /dev/ttyS5 3000000
    [  836.673468] omap8250 2850000.serial: mdr1 mode 3, div 0x0181
    [  836.679525] omap8250 2850000.serial: mdr1 mode 0, div 0x0001
    root@am62xx-evm:~# 
    root@am62xx-evm:~# stty -F /dev/ttyS5 4000000                                                                                                                                                                                                                                                                                                                                                                                             
    [  842.230615] omap8250 2850000.serial: mdr1 mode 0, div 0x0001
    [  842.236556] omap8250 2850000.serial: mdr1 mode 0, div 0x0001
    stty: /dev/ttyS5: unable to perform all requested operations
    

    and likely the UART module is in a wrong state after this point, then your 2Mbps and 1.5Mbps speed test failed? Please try 2M and 1.5M right after rebooted the board.

  • Hi Bin,

    1.Main Test Item: Simultaneously operating 4 serial ports at 460800 baud and 1 serial port at 115200 baud

    serialcheck -b 115200 -d /dev/ttyS3 -f ./binary_1w_ttyS3 -l 696729 -m d &
    serialcheck -b 460800 -d /dev/ttyS4 -f ./binary_1w_ttyS4 -l 2786918 -m d &
    serialcheck -b 460800 -d /dev/ttyS6 -f ./binary_1w_ttyS6 -l 2786918 -m d &
    serialcheck -b 460800 -d /dev/ttyS7 -f ./binary_1w_ttyS7 -l 2786918 -m d &
    serialcheck -b 460800 -d /dev/ttyS8 -f ./binary_1w_ttyS8 -l 2786918 -m d &

    2.Interference Test Item: Toggling the wired network interface card (NIC)

    ./eth_up_down.sh &

    #!/bin/sh
    
    for (( ; ; ))
    	do
    		ifconfig eth0 down
    		sleep 1
    
    		ifconfig eth0 up
    		sleep 1
    	done

    3.Test Result: Test failed

  • Hi Fuyi,

    The issue likely is due to interrupt latency in Linux. Do you use RT-Linux? RT-Linux should provide better latency performance.

    If the issue still happens in RT-Linux, you can try to use irq affinity if the AM625x device you use has multiple A53 cores.

    By default, Linux use CPU0 to handle irq, which would have latency problem if a lot of interrupts happens at the same time. irq affinity moves some of irq handling to other CPUs which could improve latency performance. For example, the following procedure moves uart0 irq handling to CPU3.

    root@am62xx-evm:~# grep serial /proc/interrupts
    238:       1498          0          0          0     GICv3 210 Level     2800000.serial
    239:          0          0          0          0   pinctrl 456 Edge      2800000.serial:wakeup
    root@am62xx-evm:~# echo 3 >  /proc/irq/238/smp_affinity_list
    root@am62xx-evm:~# grep serial /proc/interrupts
    238:       1656          0          0         11     GICv3 210 Level     2800000.serial
    239:          0          0          0          0   pinctrl 456 Edge      2800000.serial:wakeup

  • Hi Fuyi,

    Please check the post linked below, reducing the RX FIFO TRIGGER from 48 bytes to 16 bytes resolves the data loss issue for this customer.

    https://e2e.ti.com/support/processors-group/processors/f/processors-forum/1488925/am625-how-to-enable-uart-dma-in-rt-linux/5743531#5743531

  • Given this thread's topic is 4Mbps support, customer accept 4Mbps is not support from TRM. let's start another thread to discuss further UART issue.

  • Hi Bin,

    1.Reduce the RX FIFO TRIGGER from 48 bytes to 16 bytes.

    2.Assign the serial port interrupt to another CPU.

    3.Test with RT-Linux.

    None of these three methods work.

  • Hi Fuyi,

    Could you please remind me what exactly the problem we are discussing on this thread?

    In your first post Initially you said UART DMA transfer takes long time to finish, then I said to not use UART DMA.

    Then Tony said you tested at 4Mbps baud without DMA and it was slow; I said 4Mbps baud is not supported.

    Finally about 3 weeks ago, you tested multiple uart simultaneously along with toggling network and the test failed (you didn't explain what the failure was).

    Tony also created a new e2e thread (linked below). It is for your project, right?

    https://e2e.ti.com/support/processors-group/processors/f/processors-forum/1502417/am625-run-multiple-serialcheck-hang-over-half-hours

    I think I am kind of lost now. Please clarify what UART problem do you have and what we should be focusing on. Thanks.

  • Hi Fuyi,

    Tony and I sync'd up offline and now I have a better understanding of your project.

    I found a kernel hack patch I created some time back for the UART DMA problem. Can you please test it with all UART DMA enabled to see if it fixes your original DMA slow problem?

    diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
    index 0b9c47172bc8..0952f08622c9 100644
    --- a/drivers/tty/serial/8250/8250_omap.c
    +++ b/drivers/tty/serial/8250/8250_omap.c
    @@ -1042,6 +1042,10 @@ static int omap_8250_tx_dma(struct uart_8250_port *p)
            unsigned int    skip_byte = 0;
            int ret;
     
    +       /* HACK: do not use DMA for TX */
    +       ret = -EINVAL;
    +       goto err;
    +
            if (dma->tx_running)
                    return 0;
            if (uart_tx_stopped(&p->port) || uart_circ_empty(xmit)) {

  • Hi,Bin

    Sorry.

    I sorted it out.

    1.https://e2e.ti.com/support/processors-group/processors/f/processors-forum/1472428/am625-uart-with-dma-takes-longer-in-continues-sending/5652059
    The current link only addresses the slow DMA performance issue in UART.

    2. https://e2e.ti.com/support/processors-group/processors/f/processors-forum/1502417/am625-run-multiple-serialcheck-hang-over-half-hours
    The issue discussed in this link is the same one I'm encountering: the serial port lacks DMA configuration.

    3. https://e2e.ti.com/support/processors-group/processors/f/processors-forum/1504677/am625-run-multiple-serial-ports-simultaneously-and-toggle-the-wired-nic-to-introduce-interference 
    This link discusses the simultaneous operation of multiple serial ports with NIC toggling issues.

  • Hi Fuyi,

    Thanks for the clarification. Please test the kernel patch above and let me know if it fixes the DMA slow problem you originally have.

  • Hi,Bin
    After adding these two lines of code, the time consumption for sending data via DMA appears to be normal. I am currently testing the serial port DMA functionality using the method outlined in this TI forum thread:

    https://e2e.ti.com/support/processors-group/processors/f/processors-forum/1504677/am625-run-multiple-serial-ports-simultaneously-and-toggle-the-wired-nic-to-introduce-interference

  • Hi Fuyi,

    Thanks for the update. Looking forward to the next test result.

  • Hi Tony,

    It appears you edited this thread a couple times yesterday, but I cannot tell what exact you modified. Please let me know if there is any important new information I should be aware of.