Other Parts Discussed in Thread: SHA-256
Hello,
the compilation of a module that implements the SHA-256 algorithm takes an incredibly long time with CGT 5.2.15. The same source code could be compiled within seconds when using CGT 6.4.1 or GCC (and x86 as a target).
With CGT 5.2.15, the compilation is finished after cca 2 hours when the algorithm has been wrapped in a for-cycle. When the alorithm was fully unrolled, the compilation didn't finish in 30 hours - then I had to turn off the computer.
The code follows:
/** * @file * @brief Basic types definitions * @par "File:" types.h * @par "Charset:" UTF-8 */ //--------------------------------------------------------------------------- // EC1 H1 Protection against multiple header inclusion //--------------------------------------------------------------------------- #ifndef TYPES_H #define TYPES_H //--------------------------------------------------------------------------- // EC1 H1.1 System headers inclusions //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H1.2 User headers inclusions //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H2 Global symbolic constants definitions //--------------------------------------------------------------------------- #if !defined(LITTLE_ENDIAN) # define LITTLE_ENDIAN (0x3412u) #endif #if !defined(BIG_ENDIAN) # define BIG_ENDIAN (0x1234u) #endif #if !defined(BYTE_ORDER) # if defined(__TMS320C2000__) # define BYTE_ORDER LITTLE_ENDIAN # endif #endif //--------------------------------------------------------------------------- // EC1 H3 Global parametrized macros definitions //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H4 Global types definitions //--------------------------------------------------------------------------- typedef unsigned int U16; /**< Unsigned, 16-bits wide number */ typedef unsigned long int U32; /**< Unsigned, 32-bits wide number */ //--------------------------------------------------------------------------- // EC1 H5 Global variables declarations //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H6 Global inline functions definitions //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H7 Global functions prototypes //--------------------------------------------------------------------------- #endif
/**
* @file
* @brief SHA256 implementation
* @par "File:" sha256.h
* @par "Charset:" UTF-8
*/
//---------------------------------------------------------------------------
// EC1 H1 Protection against multiple header inclusion
//---------------------------------------------------------------------------
#ifndef SHA256_H
#define SHA256_H
//---------------------------------------------------------------------------
// EC1 H1.1 System headers inclusions
//---------------------------------------------------------------------------
#include <limits.h> // CHAR_BIT
#include <stdlib.h> // size_t
//---------------------------------------------------------------------------
// EC1 H1.2 User headers inclusions
//---------------------------------------------------------------------------
#include "types.h"
//---------------------------------------------------------------------------
// EC1 H2 Global symbolic constants definitions
//---------------------------------------------------------------------------
#define SHA256_HASH_WORDS (8u)
#define SHA256_BUFFER_LEN (16u)
#define SHA256_BUFFER_LEN_BYTE ((SHA256_BUFFER_LEN * sizeof(U32)) * ((size_t) CHAR_BIT / 8u))
#define SHA256_DATA_LEN_BYTE (8u) // data length is stored as a 64-bit big endian word
//---------------------------------------------------------------------------
// EC1 H3 Global parametrized macros definitions
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// EC1 H4 Global types definitions
//---------------------------------------------------------------------------
/**
* @brief SHA-256 context
*/
typedef struct {
U32 hash[SHA256_HASH_WORDS];
U32 buffer[SHA256_BUFFER_LEN];
U32 total_len;
U32 buffer_ptr;
} SHA256_CONTEXT;
//---------------------------------------------------------------------------
// EC1 H5 Global variables declarations
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// EC1 H6 Global inline functions definitions
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// EC1 H7 Global functions prototypes
//---------------------------------------------------------------------------
/**
* @brief Initializes SHA-256 context
* @param[out] Pointer to SHA256_CONTEXT
*/
void SHA256_init(SHA256_CONTEXT *pSC);
/**
* @brief Processes input data
* @param[in,out] pSC Pointer to SHA256_CONTEXT
* @param[in] pData Pointer to data
* @param[in] len Length of data as a number of octets
*/
void SHA256_update(SHA256_CONTEXT *pSC, const void *pData, U32 len);
/**
* @brief Finishes the SHA256 calculation.
* @param[in,out] pSC Pointer to SHA256_CONTEXT
*/
void SHA256_finalize(SHA256_CONTEXT *pSC);
#endif
/**
* @file
* @brief SHA256 implementation
* @par "File:" sha256.c
* @par "Charset:" UTF-8
*/
//---------------------------------------------------------------------------
// EC1 C0 System headers inclusions
//---------------------------------------------------------------------------
#if !defined(__TMS320C2000__)
#include <string.h> // memcpy
#endif
#include <stdlib.h> // size_t
#include <limits.h> // CHAR_BIT
//---------------------------------------------------------------------------
// EC1 C1 User headers inclusions
//---------------------------------------------------------------------------
#include "sha256.h"
#include "types.h"
//---------------------------------------------------------------------------
// EC1 C2 Private symbolic constants definitions
//---------------------------------------------------------------------------
#define SHA256_BLOCK_SIZE_BITS (512u)
#define SHA256_ROUNDS (64u)
#define H0 (0x6A09E667u)
#define H1 (0xBB67AE85u)
#define H2 (0x3C6EF372u)
#define H3 (0xA54FF53Au)
#define H4 (0x510E527Fu)
#define H5 (0x9B05688Cu)
#define H6 (0x1F83D9ABu)
#define H7 (0x5BE0CD19u)
//---------------------------------------------------------------------------
// EC1 C3 Private parametrized macros definitions
//---------------------------------------------------------------------------
#define SHR32(x, c) ((U32)(x) >> (c))
#define ROTR32(x, c) (((U32)(x) >> (c)) | (U32)((U32)(x) << (((size_t) sizeof(U32) * CHAR_BIT) - (c))))
#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z)))
#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#define SIGMA0(x) (ROTR32((x), 2u) ^ ROTR32((x), 13u) ^ ROTR32((x), 22u))
#define SIGMA1(x) (ROTR32((x), 6u) ^ ROTR32((x), 11u) ^ ROTR32((x), 25u))
#define sigma0(x) (ROTR32((x), 7u) ^ ROTR32((x), 18u) ^ SHR32((x), 3u))
#define sigma1(x) (ROTR32((x), 17u) ^ ROTR32((x), 19u) ^ SHR32((x), 10u))
#define M(w0, w14, w9, w1) w0 = sigma1(w14) + (w9) + sigma0(w1) + (w0);
#define EXPAND \
M(w0 , w14, w9 , w1 ) \
M(w1 , w15, w10, w2 ) \
M(w2 , w0 , w11, w3 ) \
M(w3 , w1 , w12, w4 ) \
M(w4 , w2 , w13, w5 ) \
M(w5 , w3 , w14, w6 ) \
M(w6 , w4 , w15, w7 ) \
M(w7 , w5 , w0 , w8 ) \
M(w8 , w6 , w1 , w9 ) \
M(w9 , w7 , w2 , w10) \
M(w10, w8 , w3 , w11) \
M(w11, w9 , w4 , w12) \
M(w12, w10, w5 , w13) \
M(w13, w11, w6 , w14) \
M(w14, w12, w7 , w15) \
M(w15, w13, w8 , w0 )
#define F(w) \
T1 = h + SIGMA1(e) + Ch(e, f, g) + K[jj] + (w); \
T2 = SIGMA0(a) + Maj(a, b, c); \
h = g; \
g = f; \
f = e; \
e = d + T1; \
d = c; \
c = b; \
b = a; \
a = T1 + T2; \
jj++;
//---------------------------------------------------------------------------
// EC1 C4 Private types definitions
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// EC1 C5A Global variables definitions
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
// EC1 C5B Private variables definitions
//---------------------------------------------------------------------------
static const U32 padding[SHA256_BLOCK_SIZE_BITS / (sizeof(U32) * CHAR_BIT)] = {
#if (BYTE_ORDER == BIG_ENDIAN)
0x80000000u, 0x00000000u, 0x00000000u, 0x00000000u,
0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u,
0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u,
0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
#else
0x00000080u, 0x00000000u, 0x00000000u, 0x00000000u,
0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u,
0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u,
0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
#endif
};
static const U32 K[SHA256_ROUNDS] = {
0x428A2F98u, 0x71374491u, 0xB5C0FBCFu, 0xE9B5DBA5u,
0x3956C25Bu, 0x59F111F1u, 0x923F82A4u, 0xAB1C5ED5u,
0xD807AA98u, 0x12835B01u, 0x243185BEu, 0x550C7DC3u,
0x72BE5D74u, 0x80DEB1FEu, 0x9BDC06A7u, 0xC19BF174u,
0xE49B69C1u, 0xEFBE4786u, 0x0FC19DC6u, 0x240CA1CCu,
0x2DE92C6Fu, 0x4A7484AAu, 0x5CB0A9DCu, 0x76F988DAu,
0x983E5152u, 0xA831C66Du, 0xB00327C8u, 0xBF597FC7u,
0xC6E00BF3u, 0xD5A79147u, 0x06CA6351u, 0x14292967u,
0x27B70A85u, 0x2E1B2138u, 0x4D2C6DFCu, 0x53380D13u,
0x650A7354u, 0x766A0ABBu, 0x81C2C92Eu, 0x92722C85u,
0xA2BFE8A1u, 0xA81A664Bu, 0xC24B8B70u, 0xC76C51A3u,
0xD192E819u, 0xD6990624u, 0xF40E3585u, 0x106AA070u,
0x19A4C116u, 0x1E376C08u, 0x2748774Cu, 0x34B0BCB5u,
0x391C0CB3u, 0x4ED8AA4Au, 0x5B9CCA4Fu, 0x682E6FF3u,
0x748F82EEu, 0x78A5636Fu, 0x84C87814u, 0x8CC70208u,
0x90BEFFFAu, 0xA4506CEBu, 0xBEF9A3F7u, 0xC67178F2u
};
//---------------------------------------------------------------------------
// EC1 C6 Private inline functions definitions
//---------------------------------------------------------------------------
#if (BYTE_ORDER == BIG_ENDIAN)
static inline U32 switchEndianity(U32 data)
{
return data;
}
#else
static inline U32 switchEndianity(U32 data)
{
return (data >> 24u) | (U32)(data << 24u) | (((data & 0x00FF0000u) >> 8u) | (U32)((data & 0x0000FF00u) << 8u));
}
#endif
//---------------------------------------------------------------------------
// EC1 C7 Private functions prototypes
//---------------------------------------------------------------------------
static void SHA256_count(SHA256_CONTEXT *pSC);
//---------------------------------------------------------------------------
// EC1 C8A Global functions definitions
//---------------------------------------------------------------------------
/**
* @brief Initializes SAH-256 context
* @param[out] Pointer to SHA256_CONTEXT
*/
void SHA256_init(SHA256_CONTEXT *pSC)
{
pSC->hash[0] = H0;
pSC->hash[1] = H1;
pSC->hash[2] = H2;
pSC->hash[3] = H3;
pSC->hash[4] = H4;
pSC->hash[5] = H5;
pSC->hash[6] = H6;
pSC->hash[7] = H7;
pSC->buffer_ptr = 0u;
pSC->total_len = 0u;
}
/**
* @brief Processes input data
* @param[in,out] pSC Pointer to SHA256_CONTEXT
* @param[in] pData Pointer to data
* @param[in] len Length of data as a number of octets
*/
void SHA256_update(SHA256_CONTEXT *pSC, const void *pData, U32 len)
{
U16 bfree;
#if defined(__TMS320C2000__)
U16 bfreeacc = 0u;
#endif
pSC->total_len += len;
while(len != 0u)
{
bfree = (U16)(SHA256_BUFFER_LEN_BYTE - pSC->buffer_ptr);
if(bfree > len)
{
#if defined(__TMS320C2000__)
U16 ii;
for(ii = 0u; ii < len; ii++)
{ __byte((int *) &pSC->buffer[pSC->buffer_ptr], ii) = __byte((int *) pData, bfreeacc + ii); }
#else
(void) memcpy(&pSC->buffer[pSC->buffer_ptr], pData, (size_t) len);
#endif
pSC->buffer_ptr += (U32) len;
len = 0u; // this will end the while-loop
}
else
{
#if defined(__TMS320C2000__)
U16 ii;
for(ii = 0u; ii < bfree; ii++)
{ __byte((int *) &pSC->buffer[pSC->buffer_ptr], ii) = __byte((int *) pData, bfreeacc + ii); }
bfreeacc += bfree;
#else
(void) memcpy(&pSC->buffer[pSC->buffer_ptr], pData, bfree);
pData = &((U8 *) pData)[bfree];
#endif
pSC->buffer_ptr = 0u;
SHA256_count(pSC);
len -= bfree;
}
}
}
/**
* @brief Finishes the SHA256 calculation.
* @param[in,out] pSC Pointer to SHA256_CONTEXT
*/
void SHA256_finalize(SHA256_CONTEXT *pSC)
{
U32 bitlenBE = switchEndianity(pSC->total_len * 8u); // length in bits as a big-endian number
if(pSC->buffer_ptr < (SHA256_BUFFER_LEN_BYTE - SHA256_DATA_LEN_BYTE))
{
#if defined(__TMS320C2000__)
U16 ii;
for(ii = 0u; ii < ((SHA256_BUFFER_LEN_BYTE - SHA256_DATA_LEN_BYTE) - (size_t) pSC->buffer_ptr); ii++)
{ __byte((int *) &pSC->buffer, pSC->buffer_ptr + ii) = __byte((int *) padding, ii); }
#else
(void) memcpy(&((U8 *) pSC->buffer)[pSC->buffer_ptr], padding, ((SHA256_BUFFER_LEN_BYTE - SHA256_DATA_LEN_BYTE) - (size_t) pSC->buffer_ptr));
#endif
}
else
{
#if defined(__TMS320C2000__)
U16 ii;
for(ii = 0u; ii < (SHA256_BUFFER_LEN_BYTE - (size_t) pSC->buffer_ptr); ii++)
{ __byte((int *) &pSC->buffer, pSC->buffer_ptr + ii) = __byte((int *) padding, ii); }
#else
(void) memcpy(&((U8 *) pSC->buffer)[pSC->buffer_ptr], padding, SHA256_BUFFER_LEN_BYTE - (size_t) pSC->buffer_ptr);
#endif
SHA256_count(pSC);
#if defined(__TMS320C2000__)
for(ii = 0u; ii < (SHA256_BUFFER_LEN_BYTE - SHA256_DATA_LEN_BYTE); ii++)
{ __byte((int *) pSC->buffer, ii) = 0; }
#else
(void) memset(pSC->buffer, 0, SHA256_BUFFER_LEN_BYTE - SHA256_DATA_LEN_BYTE);
#endif
}
pSC->buffer[SHA256_BUFFER_LEN - 2u] = 0u;
pSC->buffer[SHA256_BUFFER_LEN - 1u] = bitlenBE;
SHA256_count(pSC);
}
//---------------------------------------------------------------------------
// EC1 C8B Private functions definitions
//---------------------------------------------------------------------------
/**
* @brief
*/
static void SHA256_count(SHA256_CONTEXT *pSC)
{
#if (BYTE_ORDER == LITTLE_ENDIAN)
U32 w0 = switchEndianity(pSC->buffer[ 0]);
U32 w1 = switchEndianity(pSC->buffer[ 1]);
U32 w2 = switchEndianity(pSC->buffer[ 2]);
U32 w3 = switchEndianity(pSC->buffer[ 3]);
U32 w4 = switchEndianity(pSC->buffer[ 4]);
U32 w5 = switchEndianity(pSC->buffer[ 5]);
U32 w6 = switchEndianity(pSC->buffer[ 6]);
U32 w7 = switchEndianity(pSC->buffer[ 7]);
U32 w8 = switchEndianity(pSC->buffer[ 8]);
U32 w9 = switchEndianity(pSC->buffer[ 9]);
U32 w10 = switchEndianity(pSC->buffer[10]);
U32 w11 = switchEndianity(pSC->buffer[11]);
U32 w12 = switchEndianity(pSC->buffer[12]);
U32 w13 = switchEndianity(pSC->buffer[13]);
U32 w14 = switchEndianity(pSC->buffer[14]);
U32 w15 = switchEndianity(pSC->buffer[15]);
#else
U32 w0 = pSC->buffer[ 0];
U32 w1 = pSC->buffer[ 1];
U32 w2 = pSC->buffer[ 2];
U32 w3 = pSC->buffer[ 3];
U32 w4 = pSC->buffer[ 4];
U32 w5 = pSC->buffer[ 5];
U32 w6 = pSC->buffer[ 6];
U32 w7 = pSC->buffer[ 7];
U32 w8 = pSC->buffer[ 8];
U32 w9 = pSC->buffer[ 9];
U32 w10 = pSC->buffer[10];
U32 w11 = pSC->buffer[11];
U32 w12 = pSC->buffer[12];
U32 w13 = pSC->buffer[13];
U32 w14 = pSC->buffer[14];
U32 w15 = pSC->buffer[15];
#endif
U32 a = pSC->hash[0];
U32 b = pSC->hash[1];
U32 c = pSC->hash[2];
U32 d = pSC->hash[3];
U32 e = pSC->hash[4];
U32 f = pSC->hash[5];
U32 g = pSC->hash[6];
U32 h = pSC->hash[7];
U32 T1;
U32 T2;
U16 ii;
U16 jj;
for(ii = 0u, jj = 0u; ii < 4u; ii++)
{
if(ii != 0u)
{
EXPAND
}
F(w0)
F(w1)
F(w2)
F(w3)
F(w4)
F(w5)
F(w6)
F(w7)
F(w8)
F(w9)
F(w10)
F(w11)
F(w12)
F(w13)
F(w14)
F(w15)
}
pSC->hash[0] += a;
pSC->hash[1] += b;
pSC->hash[2] += c;
pSC->hash[3] += d;
pSC->hash[4] += e;
pSC->hash[5] += f;
pSC->hash[6] += g;
pSC->hash[7] += h;
}
# CCDIR=C:\Program Files (x86)\Texas Instruments\C2000 Code Generation Tools 6.4.1
CCDIR=C:\Program Files (x86)\Texas Instruments\C2000 Code Generation Tools 5.2.15
C2000_INCLUDE_DIR="$(CCDIR)\include"
C2000_LIBRARY_DIR="$(CCDIR)\lib"
CC="$(CCDIR)\bin\cl2000.exe"
H2000="$(CCDIR)\bin\hex2000.exe"
PERL=perl
SRCS=sha256.c
OBJS=$(SRCS:.c=.obj)
ASMS=$(SRCS:.c=.asm)
CFLAGS=-c -pdr -pdv -as -g -ss -os -pr -O0 -ml -v28 --float_support=fpu32
INCLUDE=-I${C2000_INCLUDE_DIR}
INCLIBS=-I${C2000_LIBRARY_DIR}
all: $(OBJS)
$(OBJS): $(SRCS)
@$(PERL) -e "@list=($$sec,$$min,$$hour,$$day,$$mon,$$year,$$wday,$$yday,$$isdst)=localtime(time);$$year+=1900;printf \"Compilation started at %%4d-%%02d-%%02d %%d:%%02d:%%02d\n\", $$year, $$mon + 1, $$day, $$hour, $$min, $$sec"
$(CC) $(CFLAGS) $(INCLUDE) $^
@$(PERL) -e "@list=($$sec,$$min,$$hour,$$day,$$mon,$$year,$$wday,$$yday,$$isdst)=localtime(time);$$year+=1900;printf \"Compilation finished at %%4d-%%02d-%%02d %%d:%%02d:%%02d\n\", $$year, $$mon + 1, $$day, $$hour, $$min, $$sec"
.PHONY: clean
clean:
@del $(OBJS) >NUL 2>&1
@del $(ASMS) >NUL 2>&1