Hi,
we try to set up a fast TCP ethernet connection with the AM3359 using the LWIP stack and TI RTOS. We both need fast response (roundtrip in a few 100 microseconds) and a high data rate (> 20 Mbit) in both directions.
We have problems to get the CPSW CPDMA running as described in the AM335x TRM (spruh73o.pdf) chapters 14.3 and 14.4: Packets that are appended to the descriptor queue while the CPDMA is running are not sent.
C:\ti\am335x_sysbios_ind_sdk_1.1.0.8\sdk\starterware\third_party\lwip-1.4.0\ports\cpsw\netif\cpswif.c:
static err_t
cpswif_transmit(struct netif *netif, struct pbuf *pbuf) {
....
log(flag = 0)
flag = 1;
/* For the first time, write the HDP with the filled bd */
if(txch->send_tail == NULL) {
CPSWCPDMATxHdrDescPtrWrite(cpswinst->cpdma_base,
(u32_t)(bd_to_send), 0);
} else {
/**
* Chain the bd's. If the DMA engine, already reached the end of the chain,
* the EOQ will be set. In that case, the HDP shall be written again.
*/
curr_bd = txch->send_tail;
curr_bd->next = bd_to_send;
if(curr_bd->flags_pktlen & CPDMA_BUF_DESC_EOQ) {
/* Write the Header Descriptor Pointer and start DMA */
CPSWCPDMATxHdrDescPtrWrite(cpswinst->cpdma_base,
(u32_t)(bd_to_send), 0);
flag = 3;
}
}
log(flag = 1 | 3);
txch->send_tail = bd_end;
return ERR_OK;
}
The packet is not sent if it is appended, but the HDP is not written (because DMA is running). Instead, the tx interrupt handler does a retry:
void
cpswif_tx_inthandler(u32_t inst_num) {
log(flag = 2)
....
/* Check for correct start of packet */
while((curr_bd->flags_pktlen) & CPDMA_BUF_DESC_SOP) {
/* Make sure that the transmission is over */
while(((curr_bd->flags_pktlen & CPDMA_BUF_DESC_OWNER)
== CPDMA_BUF_DESC_OWNER) && ((--cnt) != 0));
/* If CPDMA failed to transmit, give it a chance once more */
if(0 == cnt) {
log(flag = 4);
CPSWCPDMATxHdrDescPtrWrite(cpswinst->cpdma_base,
(u32_t)(curr_bd), 0);
return;
}
The while loop counting down from cnt = 0xffff takes about 16 ms - the half of an eternity - not acceptable. (About 200 cycles for each read of the descriptor memory @ 0x4a102000 ff. !?)
(There is also another case, where the tx_inthandler waits for a packet not yet enqueued by the transmit function (head > tail), a bug with pointers?)
Why is it necessary to 'give it (the CPDMA) a chance once more'?
We also had a look at the linux driver codes (cpsw.c), there is a similar piece of code that writes the TX HDP again:
static int _cpsw_send(struct cpsw_priv *priv, void *packet, int length)
{
void *buffer;
int len;
int timeout = CPDMA_TIMEOUT;
flush_dcache_range((unsigned long)packet,
(unsigned long)packet + ALIGN(length, PKTALIGN));
/* first reap completed packets */
while (timeout-- &&
(cpdma_process(priv, &priv->tx_chan, &buffer, &len) >= 0))
;
if (timeout == -1) {
printf("cpdma_process timeout\n");
return -ETIMEDOUT;
}
return cpdma_submit(priv, &priv->tx_chan, packet, length);
}
with
if (status & CPDMA_DESC_OWNER) {
if (chan_read(chan, hdp) == 0) {
if (desc_read(desc, hw_mode) & CPDMA_DESC_OWNER)
chan_write(chan, hdp, desc);
}
return -EBUSY;
}
within cpdma_process().
So this leads to the question: Does it work reliably if packets are appended on a queue while CPDMA is running? Or are only the 'misqueued' packets sent at all?
Some more questions concerning the TX interrupts: The TRM states the a TX interrupt is asserted with each packet sent (there may be more packets in the queue) and that it is not deasserted if the completion pointer written to the TX CP register is different to that of the CPDMA. Does this mean the RTOS Hwi is triggered again and again until the right value is written? Does CPSWCPDMAEndOfIntVectorWrite(cpswinst->cpdma_base, CPSW_EOI_TX_PULSE); matter in this case?
What if the CPDMA has continued to the next pointer between read and write of the CP by the CPU?
Below you can find logged data of the failed transmissions.
Any hints? (All things must be done well with interrupts, busy waits are not acceptable, the CPU has other things to do.)
Thanks,
Frank
Descriptor table and log of transfers (breakpoint in tx_inthandler if packet not sent (if (cnt == 0)):
error 1: appended packed is not sent:
descriptors @ 0x4a102348 ff:
4A10235C 9E535F8E 000005EA 100005EA 9E535F7C
4A102370 9E536596 000005EA 100005EA 9E536584
4A102384 9E536B9E 000005EA 100005EA 9E536B8C
4A102398 9E5371A6 000005EA 100005EA 9E537194
4A1023AC 9E5377AE 000005EA 100005EA 9E53779C
4A1023C0 9E53537E 000005EA 100005EA 9E53536C
4A1023D4 9E537DB6 000005EA 100005EA 9E537DA4
00000000 9E5383BE 00000176 E0000176 9E5383AC
4A1023FC 00000000 00000000 00000000 00000000
log of cpsw_transmit, cpswif_tx_inthandler, time in cpu cycles:
flag: 0 xmit before queueing, 1,3 xmit after queueing (without / with write HDP), 2 begin of isr, 4 isr if loop cnt == 0 (before try again)
time flag head|next headflags end|next endflag tail tailflag hdp cp dmastate
good 'misqueued' packets:
ED5B4CF7 00000003 23840000 E00005EA 23840000 E00005EA 4A102370 D00005EA 4A102384 4A102370 00000000
ED5B617D 00000002 235C2370 D00005EA 00000000 00000000 4A102384 E00005EA 4A102384 4A102370 00000000
ED5F1A96 00000000 23980000 E00005EA 23980000 E00005EA 4A102384 100005EA 00000000 4A102384 80000000
ED5F2384 00000003 23980000 E00005EA 23980000 E00005EA 4A102384 100005EA 4A102398 4A102384 00000000
ED5F4768 00000000 23AC0000 E00005EA 23AC0000 E00005EA 4A102398 D00005EA 00000000 4A102398 80000000
ED5F58F9 00000003 23AC0000 E00005EA 23AC0000 E00005EA 4A102398 D00005EA 4A1023AC 4A102398 00000000
ED5F71FA 00000002 239823AC D00005EA 00000000 00000000 4A1023AC E00005EA 4A1023AC 4A102398 00000000
ED604412 00000000 23C00000 E00005EA 23C00000 E00005EA 4A1023AC 100005EA 00000000 4A1023AC 80000000
ED604CF2 00000003 23C00000 E00005EA 23C00000 E00005EA 4A1023AC 100005EA 4A1023C0 4A1023AC 00000000
packet enqueued without writing TX_HDP:
ED606604 00000000 23D40000 E0000176 23D40000 E0000176 4A1023C0 E00005EA 4A1023C0 4A1023AC 00000000
ED606DFC 00000001 23D40000 E0000176 23D40000 E0000176 4A1023C0 E00005EA 00000000 4A1023C0 80000000
... is not sent:
ED608AF3 00000002 23C023D4 D00005EA 00000000 00000000 4A1023D4 E0000176 00000000 4A1023C0 80000000
EE21D302 00000004 23D40000 E0000176 00000000 00000000 4A1023D4 E0000176 00000000 4A1023C0 80000000
00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
error 2: head > tail (pointer bug?)
free_head = send_head = 0x22bc, send_tail = 22a8
@ 4A1021E0 ff:
4A1021F4 9E53537E 00000046 10000046 9E53536C
4A102208 9E535986 00000046 10000046 9E535974
4A10221C 9E53537E 00000046 10000046 9E53536C
4A102230 9E535986 00000046 10000046 9E535974
4A102244 9E53537E 0000004E 1000004E 9E53536C
4A102258 9E535986 00000046 10000046 9E535974
4A10226C 9E53537E 00000262 10000262 9E53536C
4A102280 9E5355FE 000005EA 100005EA 9E5355EC
4A102294 9E535C06 000005EA 100005EA 9E535BF4
4A1022A8 9E53620E 000005EA 100005EA 9E5361FC
00000000 9E536816 000005EA D00005EA 9E536804
4A1022D0 00000000 00000000 A00005EA 00000000
4A1022E4 00000000 00000000 00000000 00000000
log:
flag: 0 xmit before queueing, 1,3 xmit after queueing (without / with write HDP), 2 tx_int, 4 tx_int loop cnt = 0
time flag head|next headflags end|next endflag tail tailflag hdp cp dmastate
6D16E72C 00000002 221C0000 D0000046 00000000 00000000 4A102208 10000046 00000000 4A10221C 80000000
0490434A 00000000 22300000 E000004E 22300000 E000004E 4A10221C 10000046 00000000 4A10221C 80000000
04904CC7 00000003 22300000 E000004E 22300000 D000004E 4A10221C 10000046 00000000 4A102230 80000000
049060FB 00000002 22300000 D000004E 00000000 00000000 4A10221C 10000046 00000000 4A102230 80000000
0490AB4F 00000000 22440000 E0000046 22440000 E0000046 4A102230 1000004E 00000000 4A102230 80000000
0490B455 00000003 22440000 E0000046 22440000 D0000046 4A102230 1000004E 00000000 4A102244 80000000
0490C5CC 00000002 22440000 D0000046 00000000 00000000 4A102230 1000004E 00000000 4A102244 80000000
04AD2217 00000000 22580000 E0000262 22580000 E0000262 4A102244 10000046 00000000 4A102244 80000000
04AD2BBA 00000003 22580000 E0000262 22580000 E0000262 4A102244 10000046 4A102258 4A102244 00000000
04AD49BF 00000002 22580000 D0000262 00000000 00000000 4A102258 D0000262 00000000 4A102258 80000000
063B2358 00000000 226C0000 E00005EA 226C0000 E00005EA 4A102258 10000262 00000000 4A102258 80000000
063B2CB8 00000003 226C0000 E00005EA 226C0000 E00005EA 4A102258 10000262 4A10226C 4A102258 00000000
063B6352 00000002 226C0000 D00005EA 00000000 00000000 4A10226C D00005EA 00000000 4A10226C 80000000
063B7F03 00000000 22800000 E00005EA 22800000 E00005EA 4A10226C 100005EA 00000000 4A10226C 80000000
063B8819 00000003 22800000 E00005EA 22800000 E00005EA 4A10226C 100005EA 4A102280 4A10226C 00000000
063BE1B2 00000000 22940000 E00005EA 22940000 E00005EA 4A102280 100005EA 00000000 4A102280 80000000
063BEAAC 00000003 22940000 E00005EA 22940000 E00005EA 4A102280 100005EA 4A102294 4A102280 00000000
063C1033 00000000 22A80000 E00005EA 22A80000 E00005EA 4A102294 D00005EA 00000000 4A102294 80000000
063C22B6 00000002 22940000 D00005EA 00000000 00000000 4A102294 D00005EA 00000000 4A102294 80000000
063C38D4 00000003 22A80000 E00005EA 22A80000 E00005EA 4A102294 100005EA 4A1022A8 4A102294 00000000
waiting for a packed not enqueued:
063C69BF 00000002 22BC22D0 A00005EA 00000000 00000000 4A1022A8 D00005EA 00000000 4A1022A8 80000000
07084D5D 00000004 22BC22D0 A00005EA 00000000 00000000 4A1022A8 D00005EA 00000000 4A1022A8 80000000
00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
code that generates the dumps above:
struct cpsw_log_tx {
unsigned long time, flag, head, headflag, end, endflag, tail, tailflag,
hdp, cp, dma;
} cpsw_log_tx[CPSW_LOGSIZE];
log() .... {
#if LOG_CPSW_ISRS
hwikey = Hwi_disable();
cpsw_log_tx[cpsw_log_xinx].time = TimestampProvider_get32();
cpsw_log_tx[cpsw_log_xinx].flag = 4;
cpsw_log_tx[cpsw_log_xinx].head = ((uint32_t)curr_bd << 16) |
((uint32_t)curr_bd->next & 0xffff);
cpsw_log_tx[cpsw_log_xinx].headflag = (uint32_t)curr_bd->flags_pktlen;
cpsw_log_tx[cpsw_log_xinx].end = ((uint32_t)0 << 16) |
((uint32_t)0 & 0xffff);
cpsw_log_tx[cpsw_log_xinx].endflag = (uint32_t)0;
cpsw_log_tx[cpsw_log_xinx].tail = (uint32_t)txch->send_tail;
cpsw_log_tx[cpsw_log_xinx].tailflag = txch->send_tail == NULL ?
0xfffffffff : (uint32_t)txch->send_tail->flags_pktlen;
cpsw_log_tx[cpsw_log_xinx].hdp =
HWREG(cpswinst->cpdma_base + CPSW_CPDMA_TX_HDP(0));
cpsw_log_tx[cpsw_log_xinx].cp =
HWREG(cpswinst->cpdma_base + CPSW_CPDMA_TX_CP(0));
cpsw_log_tx[cpsw_log_xinx].dma =
HWREG(cpswinst->cpdma_base + CPSW_CPDMA_DMASTATUS);
cpsw_log_txmit[cpsw_log_xinx] = ts64toul64();
if ( ++cpsw_log_xinx >= CPSW_LOGSIZE ) {
cpsw_log_xinx = 0;
}
Hwi_restore(hwikey);
#endif
}