compiler optimization issue

Tuotuo Li

Prodigy 100 points

Other Parts Discussed in Thread: TDA2, SYSBIOS

Hi, I would like to report a potential compiler optimization bug.

The problem is identified on TI Vayu evm but not on earlier platform like AT70.

When turn on optimization, the compiler generate code that produce incorrect result.

The problem seems to be inside a double loop with some condition if and else statement.

Is this a know issue?

Will there be any fix in the near future?

Thanks!

over 10 years ago

0 Raja over 10 years ago

TI__Guru* 81335 points

Moved this thread to correct forum for faster response.

Thank you.

0 Archaeologist over 10 years ago

TI__Guru* 84225 points

I'm sorry, we'll need a test case to determine whether this is a known bug.

See this wiki page

0 Tuotuo Li over 10 years ago in reply to Archaeologist

Prodigy 100 points

I have written one.

given the same input, the function foo are generating incorrect and inconsistent results.

Please note that this is only seen on TI Vayu evm.

#include "stdlib.h"
#include "stdio.h"

//  This is the function that have problem.
//  if uncomment line 21, which disable the optimization, then the result will be correct.
void foo(double **a, int n, int *indx, double b[])
{
  int   i, ip, j;
  int ii = -1;
  double sum, tmp;

  for (i = 0; i<n; i++)
  {
    ip = indx[i];
    sum = b[ip];
    b[ip] = b[i];
    if (ii >= 0)
    {
      for (j = ii; j < i; j++)
      {
        // asm(" ");
        sum -= a[i][j] * b[j];
      }
    }
    else if (sum)
      ii = i;
    b[i] = sum;
  }
}

void allocdim2(double***a, int dim)
{
  int i;
  *a = (double**)malloc(dim*sizeof(double*));
  for (i = 0; i < dim; ++i)
  {
    (*a)[i] = (double*)malloc(dim*sizeof(double));
  }
}

void freedim2(double** a, int dim)
{
  int i;
  for (i = 0; i < dim; ++i)
  {
    free(a[i]);
  }
  free(a);
}

void initlizeMat(double ** a, int dim)
{
  int i, j;
  for (i = 0; i < dim; ++i)
  for (j = 0; j < dim; ++j)
  {
    a[i][j] = rand() % 32 / (0.41 + rand() % 1234);
  }
}



int isEqual(double*a, double *b, int dim)
{
  int i;
  for (i = 0; i < dim; ++i)
  {
    if (a[i] != b[i])
      return 0;
  }
  return 1;
}

void VerifyPipeLineError(int dim)
{
  double **a, *res1, *res2;
  int i;
  int *index;

  allocdim2(&a, dim);
  initlizeMat(a, dim);

  res1 = (double*)malloc(dim*sizeof(double));
  res2 = (double*)malloc(dim*sizeof(double));
  index = (int*)malloc(dim*sizeof(int));

  for (i = 0; i < dim; ++i)
  {
    res1[i] = rand() % 23 / (0.41 + rand() % 1231);
    res2[i] = res1[i];
    index[i] = rand() % dim;
  }
  
  foo(a,  dim,  index, res1);
  foo(a,  dim,  index, res2);

  if (!isEqual(res1, res2, dim))
    printf("BAD!!!!!!!\n");
  else
    printf("good!\n");
  freedim2(a, dim);
  free(res1);
  free(res2);
  free(index);
}

void main()
{
  int i;
  srand(13);
  for (i = 100; i < 200; i+=4)
  {
    printf("Dim :%d, ", i);
    VerifyPipeLineError(i);
  }
}

0 Archaeologist over 10 years ago in reply to Tuotuo Li

TI__Guru* 84225 points

So far I have not been able to reproduce the problem.

Please make sure your program was able to allocate memory:

void allocdim2(double***a, int dim)
{
  int i;
  *a = (double**)malloc(dim*sizeof(double*));
  assert(*a);
  for (i = 0; i < dim; ++i)
  {
    (*a)[i] = (double*)malloc(dim*sizeof(double));
    assert((*a)[i]);
  }
}

Please post the complete command line options, including linker options, and the version of the compiler you are using (this is not the same as the CCS version).

Please post the output of the program when the problem is encountered. Your test requires strict equality of double values; when the values are not strictly equal, how far apart are they?

I'm a compiler guy - I'm not familiar with the terms "Vayu" or "AT70". Do you know what CPU is used in these devices? Do they have floating-point hardware?

0 Tuotuo Li over 10 years ago in reply to Archaeologist

Prodigy 100 points

I am sorry that the test code is not perfect, might be buggy and meaningless,

but that is not the point, I just want to reproduce the problem.

Please post the output of the program when the problem is encountered.

Your test requires strict equality of double values; when the values are not strictly equal, how far apart are they?

Are they allowed to be different when we are using the exactly the same inputs, in a consecutive run??

the processor is xc5777, which looks like this, http://www.ti.com/lit/ds/symlink/tda2.pdf

It is so called a ADAS system. Have multiple processor. I believe they do have floating-point hardware.

currently we only see this on this particular platform

The compiler version is 7.3.15.

c:/ti/c6000_cgt_7.3.15/bin/cl6x -c -Dcmdebugging -DcmNoMTSupport -D_DEBUG -DcmBuildingDLLs -DcmExport_cvtprea -DcmLibraryName=cvtprea -DTI_NDK_MAJOR_VERSION=2 -DTI_NDK_MINOR_VERSION=24 -D_TMS320C6x_PLUS_ -DcmTIUseNDK -DcmUseNetViaRPC -D_INCLUDE_IPv6_CODE -D_INCLUDE_NIMU_CODE -Dcmvayu -DcmNoImports -DcmLinkstatic -DcmBiosMajorVer=6 -DcmNoAsm -DcmNoInlineAsm -Ip:/5050/cvt/defs/ci_rtl/ti -I. -Ic:/ti/c6000_cgt_7.3.15/include -Ic:/VISION_SDK_02_03_00_00/ti_components/os_tools/windows/xdctools_3_25_05_94/packages -Ic:/VISION_SDK_02_03_00_00/ti_components/os_tools/bios_6_37_03_30/packages -Ip:/5050/cvt/src/sys/sys/os/sysbios/xdc/packages -Ic:/VISION_SDK_02_03_00_00/ti_components/networking/ndk_2_24_00_11/packages -Ic:/VISION_SDK_02_03_00_00/ti_components/networking/ndk_2_24_00_11/packages/ti/ndk/inc -Ip:/5050/cvt/defs -Ip:/5050/../modapi -DcmTiSysBios -DcmTINDKVersion=200 -DcmExpertSensor -DcmNoExcept -D_DEBUG -mv6600 -q -rtti -ma -pden -pds694 -pds77 -pds177 -pdse230 -pdse1107 -pds179 -pds552 -pds97 -pds383 -pdse1 -pdse9 -pdse43 -pdse69 -pdse169 -pdse188 -pdse225 -pdse303 --abi=eabi -g -@ ****filenames****

Is there any more information you need ??

0 Tuotuo Li over 10 years ago in reply to Tuotuo Li

Prodigy 100 points

Sorry, I just posted the debugging version of the compiling command.

The release version is c:/ti/c6000_cgt_7.3.15/bin/cl6x -c -Dcmrelease -DNDEBUG -DcmNoMTSupport -DTI_NDK_MAJOR_VERSION=2 -DTI_NDK_MINOR_VERSION=24 -D_TMS320C6x_PLUS_ -DcmTIUseNDK -DcmUseNetViaRPC -D_INCLUDE_IPv6_CODE -D_INCLUDE_NIMU_CODE -Dcmvayu -DcmNoImports -DcmLinkstatic -DcmBiosMajorVer=6 -DcmNoAsm -DcmNoInlineAsm -Ip:/5050/cvt/defs/ci_rtl/ti -I. -Ic:/ti/c6000_cgt_7.3.15/include -Ic:/VISION_SDK_02_03_00_00/ti_components/os_tools/windows/xdctools_3_25_05_94/packages -Ic:/VISION_SDK_02_03_00_00/ti_components/os_tools/bios_6_37_03_30/packages -Ip:/5050/cvt/src/sys/sys/os/sysbios/xdc/packages -Ic:/VISION_SDK_02_03_00_00/ti_components/networking/ndk_2_24_00_11/packages -Ic:/VISION_SDK_02_03_00_00/ti_components/networking/ndk_2_24_00_11/packages/ti/ndk/inc -Ip:/5050/cvt/defs -Ip:/5050/../modapi -DcmTiSysBios -DcmTINDKVersion=200 -DcmExpertSensor -DcmNoExcept -DNDEBUG -mv6600 -q -rtti -ma -pden -pds694 -pds77 -pds177 -pdse230 -pdse1107 -pds179 -pds552 -pds97 -pds383 -pdse1 -pdse9 -pdse43 -pdse69 -pdse169 -pdse188 -pdse225 -pdse303 --abi=eabi --opt_level=2 -@ ***file name****

0 Archaeologist over 10 years ago in reply to Tuotuo Li

TI__Guru* 84225 points

Tuotuo Li said:

I am sorry that the test code is not perfect, might be buggy and meaningless,

but that is not the point, I just want to reproduce the problem.

It doesn't have to be perfect, I just wanted to make sure there wasn't something misleading going on, like writing through a NULL pointer.

Tuotuo Li said:

Please post the output of the program when the problem is encountered.

Your test requires strict equality of double values; when the values are not strictly equal, how far apart are they?

Are they allowed to be different when we are using the exactly the same inputs, in a consecutive run??

Certain optimizations can make subtle differences in floating-point behavior, especially in --fp_mode=relaxed, which you are not using. I wanted to know what sort of difference there is in the float values so that I could start to guess where the problem might be occurring. If it's a very small difference, I would suspect over-aggressive optimizations. If it's a wild difference, I might look at array bounds reads or things like that. I don't really know exactly what I'm looking for until I can reproduce the problem. Unfortunately, I'm still not able to reproduce the problem with your options. I think we're going to need to find someone at TI with this hardware to try it.

One more question: are there interrupts occurring during this computation?

0 Tuotuo Li over 10 years ago in reply to Archaeologist

Prodigy 100 points

The problem is inside the following inner loop.
void foo(double **a, int n, int *indx, double b[])
{ 
  int   i, ip, j; 
  int ii = -1; 
  double sum, tmp; 
  for (i = 0; i<n; i++) 
  { 
    ip = indx[i]; 
    sum = b[ip]; 
    b[ip] = b[i]; 
    if (ii >= 0) 
    { 
      for (j = ii; j < i; j++) 
      { 
        // asm(" "); 
        sum -= a[i][j] * b[j]; 
      } 
    } 
    else if (sum) 
      ii = i; 
    b[i] = sum; 
  } 
}
We have several different platforms, and the problem is only seen on this particular platform.
I am well aware of that there is numerical approximation problem when perform float point operation.
But I believe if you are calling a function twice, with the same input, the result should at least be consistent.
I understand that this test must look strange to you
  foo(a,  dim,  index, res1); 
  foo(a,  dim,  index, res2);
You can also add a fooNoOpt and un-comment line // asm(" "); and compare the result.
 
One more question: are there interrupts occurring during this computation?
No, there is no interruption.

0 Tuotuo Li over 10 years ago in reply to Archaeologist

Prodigy 100 points

Archaeologist said:

Unfortunately, I'm still not able to reproduce the problem with your options.

I think we're going to need to find someone at TI with this hardware to try it.

I told you many times that we were only able to reproduce this problem on the Vayu platform!

Please do that, and ignore the buggy and problematic test that I wrote

and focus only on the loops inside the foo function.

Please let me know what you are going to do with the problem ASAP.

Thanks!!

0 Archaeologist over 10 years ago in reply to Tuotuo Li

TI__Guru* 84225 points

I have created defect report SDSCM00051127 to track this issue. I can't give any sort of estimate as to when it might be resolved until we understand what the problem is, and we most likely will not know what the problem is until we can make a reproducible test case.

I don't see anything wrong with the assembly code. This is not surprising, since you said it worked on one platform. However, if we simply show the assembly code to a Vayu expert, they will most likely agree that they can't see anything wrong with it. We will most likely need an executable test case.

The questions I have asked are to make sure we can generate an executable test case that demonstrates the problem, and also to try to characterize the failure mode. We can wait for a Vayu expert to become available, but we might be able to get a head start with some more details about how it fails.

Depending on the manner in which it fails, we might know where to start looking. We know that the values are unequal, but we don't know much else about it. Does the program work correctly if you single-step through the SPLOOP? How many of the results in the array are wrong? Are they at one end of the array or the other? Are they off by only a little or a lot? The more detail you can provide, the more likely we can reach a speedy resolution.

0 Tuotuo Li over 10 years ago in reply to Archaeologist

Prodigy 100 points

I can certainly send you the executable I compiled, if you need that.

I am suspecting that in that loop, some streamline optimization stuff is messed up.

for example, there should be 100 add in the loop, but only 96 take place.

we have seen something else in another code segment that do something like this :

void PrintElt(double* pointer, int idx)
{
 printf("%lf\n", pointer[idx]);
}

void foo()
{
double vals[100];
int i;
int ii,jj;
int mdx;

for (ii = 0; ii < 10; ++ii)
{
for (jj = 0; jj < 10; ++jj)
{
for (i = 0; i < 100; ++i)
{
vals[i] = 0.0;
}

vals[ii+jj*10] = 255.0;

for (mdx = -1; mdx <= 0; ++mdx)
{
if (ii + mdx >= 0)
{
PrintElt(vals, ii+jj*10);
}
}
}
}
}

0 Archaeologist over 10 years ago in reply to Tuotuo Li

TI__Guru* 84225 points

Tuotuo Li said:
I can certainly send you the executable I compiled, if you need that.

Yes, that would be very helpful.

Tuotuo Li said:
there should be 100 add in the loop, but only 96 take place.

Okay, that's suspicious. I don't know of any known problems that are consistent with the conditions of your test case, but there are some other early-exit issues. I'll check them against your executable.

0 Tuotuo Li over 10 years ago in reply to Archaeologist

Prodigy 100 points

6443.testinv.c

Fullscreen

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#include "stdlib.h"
#include "stdio.h"
//  This is the function that have problem.
//  if uncomment line 21, then the result will be correct.
void foo(double **a, int n, int *indx, double b[])
{
  int   i, ip, j;
  int ii = -1;
  double sum, tmp;
  for (i = 0; i<n; i++)
  {
    ip = indx[i];
    sum = b[ip];
    b[ip] = b[i];
    if (ii >= 0)
    {
      for (j = ii; j < i; j++)
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

#include "stdlib.h"
#include "stdio.h"

//  This is the function that have problem.
//  if uncomment line 21, then the result will be correct.
void foo(double **a, int n, int *indx, double b[])
{
  int   i, ip, j;
  int ii = -1;
  double sum, tmp;

  for (i = 0; i<n; i++)
  {
    ip = indx[i];
    sum = b[ip];
    b[ip] = b[i];
    if (ii >= 0)
    {
      for (j = ii; j < i; j++)
      {
        // asm(" ");
        sum -= a[i][j] * b[j];
      }
    }
    else if (sum)
      ii = i;
    b[i] = sum;
  }
}

void allocdim2(double***a, int dim)
{
  int i;
  *a = (double**)malloc(dim*sizeof(double*));
  for (i = 0; i < dim; ++i)
  {
    (*a)[i] = (double*)malloc(dim*sizeof(double));
  }
}

void freedim2(double** a, int dim)
{
  int i;
  for (i = 0; i < dim; ++i)
  {
    free(a[i]);
  }
  free(a);
}

void initlizeMat(double ** a, int dim)
{
  int i, j;
  for (i = 0; i < dim; ++i)
  for (j = 0; j < dim; ++j)
  {
    a[i][j] = rand() % 32 / (0.41 + rand() % 1234);
  }
}



int isEqual(double*a, double *b, int dim)
{
  int i;
  int count = 0;
  for (i = 0; i < dim; ++i)
  {
    if (a[i] != b[i])
    {
      printf("diff: %lf\n", a[i]-b[i]);
      count++;
    }
  }
  return count==0;
}

void VerifyPipeLineError(int dim)
{
  double **a, *res1, *res2;
  int i;
  int *index;

  allocdim2(&a, dim);
  initlizeMat(a, dim);

  res1 = (double*)malloc(dim*sizeof(double));
  res2 = (double*)malloc(dim*sizeof(double));
  index = (int*)malloc(dim*sizeof(int));

  for (i = 0; i < dim; ++i)
  {
    res1[i] = rand() % 23 / (0.41 + rand() % 1231);
    res2[i] = res1[i];
    index[i] = rand() % dim;
  }
  
  foo(a,  dim,  index, res1);
  foo(a,  dim,  index, res2);

  if (!isEqual(res1, res2, dim))
    printf("BAD!!!!!!!\n");
  else
    printf("good!\n");
  freedim2(a, dim);
  free(res1);
  free(res2);
  free(index);
}

void main()
{
  int i;
  srand(13);
  for (i = 100; i < 200; i+=4)
  {
    printf("Dim :%d, ", i);
    VerifyPipeLineError(i);
  }
}

8640.testinv.7z

please see attachment

0 Archaeologist over 10 years ago in reply to Tuotuo Li

TI__Guru* 84225 points

We have confirmed that this is the same issue as SDSCM00051143, a silicon defect. The defect can occur when an SPLOOP containing multi-cycle instructions (such as floating-point instructions) is interrupted. TI is developing a compiler work-around for this issue.

Code Composer Studio™︎

Code Composer Studio forum

compiler optimization issue