Hi all.
I've got an important use case to put in evidence a strange behavior over a TCP/IP connection.
The scenario is:
- CPU#1 board with TIVAC129XNCZAD (client) with IP 192.168.0.100
- CPU#2 board with TIVAC129XNCZAD (client) with IP 192.168.0.103
- PC Dell W10 (server) with IP 192.168.0.2
- Private protocol communication like command-response using “send” and ”recv” (from NDK/socket.h) for transferring files
- Compiler TI v.18.12.7.LTS
- TI RTOS 2.16.1.14
- NDK v.2.25.00.09
- XDC 3.32.00.06
- TivaWare C_Series 2.1.4.178
- CCS 9.3.0.00012
- Wireshark 3.6.7 (installed on PC dell W10)
During normal operation of a TCP/IP session, where each packet send by client is verified by server with a response, sometimes one of the two CPU boards receive an error code 35 (EWOULDBLOCK).
The private application tries to recover this situation with a do-while loop where the client try to make another receive but, after an application timeout period, the socket is closed as no valid data is received.
The following analysis is done by using Wireshark.
The weird thing you see happening is that sometimes, after the client send a packet (#1006), the response send from the server is not received by the client (#1008), so after a timeout period (specified by the “setsockopt”) the “recv” function return -1 and analyzing the cause with “fdError” function you can see code EWOULDBLOCK.
By using a timestamp reference with a serial debug interface, we can correlate the time when this behavior happens to another weird thing: a TCP spurious retransmission of the last packet not verified (#1009).
It seems that the TCP /IP stack of TI resend the last packet not verified (#1009) autonomously and the server is responding with a duplicated ack (see #1010). I’m quite sure about that because in my private application the send counter is stopped to the last packet sent.
The question is: is there a particular reason because the application will get error EWOULDBLOCK?
Before write to this forum, I’ve read several articles and tried out several proposed fixes.
The fixes I’ve tried out are:
- Configure socket with BLOCKING AND NO_BLOCKING
- Increase stack and priority for the task
- Manage error as no fault in my application (disaster…)
- Increase application timeout and “setsockopt” timeout
The forums I’ve read are:
static Bool CommNetRemote_Receive( void ) { Bool ret = TRUE; Bool bFinish = TRUE; uint16_t pckt_size = 0; int16_t chunk_received = 0; int rc = 0; commNetRemoteHeaderS header; CommNetRemotePktDataS *payload = (CommNetRemotePktDataS*)&commNetRemoteBuffer[SESSION_HEADER_COUNT]; commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_REQ_ACK_GENERIC_OK; commNetRemoteCtrl.flags.recvErr = FALSE; commNetRemoteCtrl.flags.timeout = FALSE; // uncomment if want blocking mode //setsockopt( commNetRemoteCtrl.clientfd, SOL_SOCKET, SO_BLOCKING, &rc, sizeof(rc)); do { rc = recv( commNetRemoteCtrl.clientfd, (BYTE*)commNetRemoteBuffer, REMOTE_BUFFER_LEN, 0); if (rc > 0) { ret = TRUE; // fill-up the header structure, due to aligned with 3 bytes header.IC = commNetRemoteBuffer[0]; header.PS_low = commNetRemoteBuffer[1]; header.PS_high = commNetRemoteBuffer[2]; pckt_size = (header.PS_high << 8) + header.PS_low; // First check, if size of packet is the same of the one declared into header if( rc != (pckt_size + SESSION_HEADER_COUNT) ) { commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_SEND_ERR_PACKET_SIZE; OS_Error("Receive: error packet size"); ret = FALSE; } else { switch( header.IC ) { case SESSION_ACK_IC: if( CommNetRemote_CompareData( payload->sessionAck.serial, (BYTE*)appMain.config.serialNumber, 10 ) == FALSE ) { commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_SEND_ERR_SERIAL_WRONG; OS_Error("Receive: serial number wrong"); ret = FALSE; } else { // Header ok } break; case SESSION_REQACK_IC: commNetRemoteCtrl.chunk_resp++; commNetRemoteCtrl.recvErr = payload->requestAck.retCode; break; case SESSION_ABORT_IC: commNetRemoteCtrl.recvErr = payload->abortReturn.retCode; break; case SESSION_SEND_CHUNK_IC: if( CommNetRemote_CompareData( (BYTE*)payload->sendChunk.GUID, (BYTE*)commNetRemoteCtrl.GUID, REMOTE_GUID_LEN) == TRUE ) { chunk_received = payload->sendChunk.chunk_index[1]; chunk_received *= 256; // equivale shift di 8 bit chunk_received += payload->sendChunk.chunk_index[0]; if( chunk_received == commNetRemoteCtrl.chunk_index ) { // Packet OK commNetRemoteCtrl.iBytesToSend = pckt_size - (SESSION_SEND_CHUNK_PAYLOAD_COUNT - 1); } else { // Packet KO commNetRemoteCtrl.iBytesToSend = 0; commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_SEND_ERR_WRONG_CHUNK; } } else { commNetRemoteCtrl.iBytesToSend = 0; commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_SEND_ERR_WRONG_GUID; } break; default: commNetRemoteCtrl.iBytesToSend = 0; commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_SEND_ERR_UNDEFINED; break; } } } else { bFinish = TRUE; ret = FALSE; rc = fdError(); if( rc == 0 ) { // close communication: error managed by upper layer // commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_SEND_ERR_RECV_ZERO_BYTE; } else { commNetRemoteCtrl.recvErr = rc; if( (rc == EWOULDBLOCK) || (rc == EAGAIN) ) { // The timeout is started by upper layer. when the time is reached the flag commNetRemoteCtrl.flags.timeout is updated // if( commNetRemoteCtrl.flags.timeout == FALSE ) { bFinish = FALSE; OS_Sleep( OS_HUNDRED_MILLISECONDS ); } else { // Timeout reached : error managed by upper layer } } else { // Other error codes: error managed by upper layer } } OS_Error( "Receive: err socket (%ld) @ %ld.", rc, OS_GetTimerTick() ); } } while ( bFinish != TRUE ); return (ret); } void CommNetRemote_ClientProcess(TASK_ARG_T arg0, TASK_ARG_T arg1) { int status = 0; int32_t err = 0; int16_t iWaitCount = 0; struct timeval to; CommNetRemotePktDataS *payload = (CommNetRemotePktDataS*)&commNetRemoteBuffer[SESSION_HEADER_COUNT]; commNetRemoteCtrl.enabled = TRUE; commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_INIT_WAIT; commNetRemoteCtrl.flags.all = 0; commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_REQ_ACK_GENERIC_OK; commNetRemoteCtrl.sendErr = COMM_NET_REMOTE_REQ_ACK_GENERIC_OK; // Create a TCP stream socket. commNetRemoteCtrl.clientfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (commNetRemoteCtrl.clientfd < 0) { commNetRemoteCtrl.flags.quit = TRUE; OS_Error("CommNet_RemoteClientProcess: socket failed."); } else { // Setup the Server IP address and port memset((char *) &commNetRemoteCtrl.servAddr, 0, sizeof(commNetRemoteCtrl.servAddr)); commNetRemoteCtrl.servAddr.sin_family = AF_INET; commNetRemoteCtrl.servAddr.sin_port = htons(REMOTE_SERVER_PORT); commNetRemoteCtrl.servAddr.sin_addr.s_addr = inet_addr(REMOTE_SERVER_IP ); to.tv_sec = REMOTE_SOCKET_TIMEOUT; to.tv_usec = 0; setsockopt( commNetRemoteCtrl.clientfd, SOL_SOCKET, SO_RCVTIMEO, &to, sizeof( to ) ); setsockopt( commNetRemoteCtrl.clientfd, SOL_SOCKET, SO_SNDTIMEO, &to, sizeof( to ) ); // Connect to the server status = connect(commNetRemoteCtrl.clientfd, (struct sockaddr *) &commNetRemoteCtrl.servAddr, sizeof(commNetRemoteCtrl.servAddr)); if (status < 0) { fdClose( commNetRemoteCtrl.clientfd ); commNetRemoteCtrl.flags.quit = TRUE; OS_Error("CommNet_RemoteClientProcess: client connect failed. (%d).",fdError() ); } else { // Init the Error_Block Error_init(&commNetRemoteCtrl.eb); commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_OPEN_SESSION; commNetRemoteCtrl.flags.quit = FALSE; OS_SysDbg( "RemoteControlClientProcess: start clientfd = 0x%x.", commNetRemoteCtrl.clientfd ); } } // Loop while we receive data while( commNetRemoteCtrl.flags.quit == FALSE ) { switch( commNetRemoteCtrl.state ) { case COMM_NET_REMOTE_STS_IDLE: // // controlla le condizioni di uscita if( appMain.diagnostic.general.flags.cableDisconnected == TRUE ) { commNetRemoteCtrl.flags.quit = TRUE; } else { if( Network_IsLANConfigured() == FALSE) { commNetRemoteCtrl.flags.quit = TRUE; } } if( commNetRemoteCtrl.flags.close == TRUE ) { commNetRemoteCtrl.flags.quit = TRUE; } // Check if request flag is active , then close session if( commNetRemoteCtrl.flags.quit == FALSE ) { if( commNetRemoteCtrl.flags.request == TRUE ) { commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_CLOSE_SESSION; } else { OS_Sleep(OS_TEN_MILLISECONDS); } } break; case COMM_NET_REMOTE_STS_WAIT_RESP: if( CommNetRemote_Receive() == FALSE ) { commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_RECV_ERROR; } else { commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_IDLE; } break; case COMM_NET_REMOTE_STS_OPEN_SESSION: // Compose header // Compose payload OS_SysDbg("Open session: %ld ", OS_GetTimerTick() ); if( CommNetRemote_Send( commNetRemoteBuffer, SESSION_OPEN_HEADER_PAYLOAD) == TRUE ) { // decide il prossimo stato commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_WAIT_RESP; } else { commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_SEND_ERR_NONE; commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_SEND_ERROR; iWaitCount = 0; } break; case COMM_NET_REMOTE_STS_CLOSE_SESSION: // Compose the header // Compose payload // Get return code CommNetRemote_GetLastErr(&err); payload->sessionClose.retcode = err; OS_SysDbg("Close session: %ld ", OS_GetTimerTick() ); CommNetRemote_Send( commNetRemoteBuffer, SESSION_CLOSE_HEADER_PAYLOAD); // decide il prossimo stato OS_SetPeriodicTask(commNetRemoteCtrl.timer, OS_ONE_SECOND); OS_StartPeriodicTask(commNetRemoteCtrl.timer); commNetRemoteCtrl.flags.request = FALSE; commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_IDLE; break; case COMM_NET_REMOTE_STS_PUBLISH_FILE: // Compose the header // Compose payload if( CommNetRemote_Send( commNetRemoteBuffer, SESSION_PUBLISH_HEADER_PAYLOAD) == TRUE ) { commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_WAIT_RESP; } else { commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_SEND_ERR_NONE; commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_SEND_ERROR; iWaitCount = 0; } break; case COMM_NET_REMOTE_STS_PUBLISH_CHUNK: // Compose the header // Compose payload if( CommNetRemote_Send( commNetRemoteBuffer, (commNetRemoteCtrl.iBytesToSend + SESSION_HEADER_COUNT) ) == TRUE ) { commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_WAIT_RESP; } else { commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_SEND_ERR_NONE; commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_SEND_ERROR; iWaitCount = 0; } break; case COMM_NET_REMOTE_STS_GET_FILE: // Compose the header // Compose payload if( CommNetRemote_Send( commNetRemoteBuffer, SESSION_GET_HEADER_PAYLOAD) == TRUE ) { commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_WAIT_RESP; } else { commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_SEND_ERR_NONE; commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_SEND_ERROR; iWaitCount = 0; } break; case COMM_NET_REMOTE_STS_GET_CHUNK: // Compose the header if( CommNetRemote_Send( commNetRemoteBuffer, SESSION_GET_CHUNK_HEADER_PAYLOAD ) == TRUE ) { commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_WAIT_RESP; } else { commNetRemoteCtrl.recvErr = COMM_NET_REMOTE_SEND_ERR_NONE; commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_SEND_ERROR; iWaitCount = 0; } break; case COMM_NET_REMOTE_STS_SEND_ERROR: // Wait for error management with flag commNetRemoteCtrl.flags.sendErr in order to be sncronized with upper layer // // Starvation avoid: after 200 times return idle. // iWaitCount++; if( iWaitCount > 200 ) { iWaitCount = 0; commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_IDLE; } else { if( TRUE == commNetRemoteCtrl.flags.sendErr ) { commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_IDLE; } else { OS_Sleep(OS_HUNDRED_MILLISECONDS); } } break; case COMM_NET_REMOTE_STS_RECV_ERROR: commNetRemoteCtrl.flags.recvErr = TRUE; commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_IDLE; break; default: commNetRemoteCtrl.state = COMM_NET_REMOTE_STS_IDLE; break; } } // while( commNetRemoteCtrl.flags.quit == FALSE ) // The process is closed // if( commNetRemoteCtrl.clientfd ) { fdClose( commNetRemoteCtrl.clientfd ); OS_SysDbg("Close socket."); } // Clean all variables OS_SysDbg("CommNetRemote_ClientProcess: quit."); commNetRemoteCtrl.enabled = FALSE; commNetRemoteCtrl.flags.all = 0; OS_DeleteTask(commNetRemoteCtrl.taskHandle); commNetRemoteCtrl.taskHandle = OS_TASK_ERROR; return; }