Hello,
the compilation of a module that implements the SHA-256 algorithm takes an incredibly long time with CGT 5.2.15. The same source code could be compiled within seconds when using CGT 6.4.1 or GCC (and x86 as a target).
With CGT 5.2.15, the compilation is finished after cca 2 hours when the algorithm has been wrapped in a for-cycle. When the alorithm was fully unrolled, the compilation didn't finish in 30 hours - then I had to turn off the computer.
The code follows:
/** * @file * @brief Basic types definitions * @par "File:" types.h * @par "Charset:" UTF-8 */ //--------------------------------------------------------------------------- // EC1 H1 Protection against multiple header inclusion //--------------------------------------------------------------------------- #ifndef TYPES_H #define TYPES_H //--------------------------------------------------------------------------- // EC1 H1.1 System headers inclusions //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H1.2 User headers inclusions //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H2 Global symbolic constants definitions //--------------------------------------------------------------------------- #if !defined(LITTLE_ENDIAN) # define LITTLE_ENDIAN (0x3412u) #endif #if !defined(BIG_ENDIAN) # define BIG_ENDIAN (0x1234u) #endif #if !defined(BYTE_ORDER) # if defined(__TMS320C2000__) # define BYTE_ORDER LITTLE_ENDIAN # endif #endif //--------------------------------------------------------------------------- // EC1 H3 Global parametrized macros definitions //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H4 Global types definitions //--------------------------------------------------------------------------- typedef unsigned int U16; /**< Unsigned, 16-bits wide number */ typedef unsigned long int U32; /**< Unsigned, 32-bits wide number */ //--------------------------------------------------------------------------- // EC1 H5 Global variables declarations //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H6 Global inline functions definitions //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H7 Global functions prototypes //--------------------------------------------------------------------------- #endif
/** * @file * @brief SHA256 implementation * @par "File:" sha256.h * @par "Charset:" UTF-8 */ //--------------------------------------------------------------------------- // EC1 H1 Protection against multiple header inclusion //--------------------------------------------------------------------------- #ifndef SHA256_H #define SHA256_H //--------------------------------------------------------------------------- // EC1 H1.1 System headers inclusions //--------------------------------------------------------------------------- #include <limits.h> // CHAR_BIT #include <stdlib.h> // size_t //--------------------------------------------------------------------------- // EC1 H1.2 User headers inclusions //--------------------------------------------------------------------------- #include "types.h" //--------------------------------------------------------------------------- // EC1 H2 Global symbolic constants definitions //--------------------------------------------------------------------------- #define SHA256_HASH_WORDS (8u) #define SHA256_BUFFER_LEN (16u) #define SHA256_BUFFER_LEN_BYTE ((SHA256_BUFFER_LEN * sizeof(U32)) * ((size_t) CHAR_BIT / 8u)) #define SHA256_DATA_LEN_BYTE (8u) // data length is stored as a 64-bit big endian word //--------------------------------------------------------------------------- // EC1 H3 Global parametrized macros definitions //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H4 Global types definitions //--------------------------------------------------------------------------- /** * @brief SHA-256 context */ typedef struct { U32 hash[SHA256_HASH_WORDS]; U32 buffer[SHA256_BUFFER_LEN]; U32 total_len; U32 buffer_ptr; } SHA256_CONTEXT; //--------------------------------------------------------------------------- // EC1 H5 Global variables declarations //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H6 Global inline functions definitions //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 H7 Global functions prototypes //--------------------------------------------------------------------------- /** * @brief Initializes SHA-256 context * @param[out] Pointer to SHA256_CONTEXT */ void SHA256_init(SHA256_CONTEXT *pSC); /** * @brief Processes input data * @param[in,out] pSC Pointer to SHA256_CONTEXT * @param[in] pData Pointer to data * @param[in] len Length of data as a number of octets */ void SHA256_update(SHA256_CONTEXT *pSC, const void *pData, U32 len); /** * @brief Finishes the SHA256 calculation. * @param[in,out] pSC Pointer to SHA256_CONTEXT */ void SHA256_finalize(SHA256_CONTEXT *pSC); #endif
/** * @file * @brief SHA256 implementation * @par "File:" sha256.c * @par "Charset:" UTF-8 */ //--------------------------------------------------------------------------- // EC1 C0 System headers inclusions //--------------------------------------------------------------------------- #if !defined(__TMS320C2000__) #include <string.h> // memcpy #endif #include <stdlib.h> // size_t #include <limits.h> // CHAR_BIT //--------------------------------------------------------------------------- // EC1 C1 User headers inclusions //--------------------------------------------------------------------------- #include "sha256.h" #include "types.h" //--------------------------------------------------------------------------- // EC1 C2 Private symbolic constants definitions //--------------------------------------------------------------------------- #define SHA256_BLOCK_SIZE_BITS (512u) #define SHA256_ROUNDS (64u) #define H0 (0x6A09E667u) #define H1 (0xBB67AE85u) #define H2 (0x3C6EF372u) #define H3 (0xA54FF53Au) #define H4 (0x510E527Fu) #define H5 (0x9B05688Cu) #define H6 (0x1F83D9ABu) #define H7 (0x5BE0CD19u) //--------------------------------------------------------------------------- // EC1 C3 Private parametrized macros definitions //--------------------------------------------------------------------------- #define SHR32(x, c) ((U32)(x) >> (c)) #define ROTR32(x, c) (((U32)(x) >> (c)) | (U32)((U32)(x) << (((size_t) sizeof(U32) * CHAR_BIT) - (c)))) #define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z))) #define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) #define SIGMA0(x) (ROTR32((x), 2u) ^ ROTR32((x), 13u) ^ ROTR32((x), 22u)) #define SIGMA1(x) (ROTR32((x), 6u) ^ ROTR32((x), 11u) ^ ROTR32((x), 25u)) #define sigma0(x) (ROTR32((x), 7u) ^ ROTR32((x), 18u) ^ SHR32((x), 3u)) #define sigma1(x) (ROTR32((x), 17u) ^ ROTR32((x), 19u) ^ SHR32((x), 10u)) #define M(w0, w14, w9, w1) w0 = sigma1(w14) + (w9) + sigma0(w1) + (w0); #define EXPAND \ M(w0 , w14, w9 , w1 ) \ M(w1 , w15, w10, w2 ) \ M(w2 , w0 , w11, w3 ) \ M(w3 , w1 , w12, w4 ) \ M(w4 , w2 , w13, w5 ) \ M(w5 , w3 , w14, w6 ) \ M(w6 , w4 , w15, w7 ) \ M(w7 , w5 , w0 , w8 ) \ M(w8 , w6 , w1 , w9 ) \ M(w9 , w7 , w2 , w10) \ M(w10, w8 , w3 , w11) \ M(w11, w9 , w4 , w12) \ M(w12, w10, w5 , w13) \ M(w13, w11, w6 , w14) \ M(w14, w12, w7 , w15) \ M(w15, w13, w8 , w0 ) #define F(w) \ T1 = h + SIGMA1(e) + Ch(e, f, g) + K[jj] + (w); \ T2 = SIGMA0(a) + Maj(a, b, c); \ h = g; \ g = f; \ f = e; \ e = d + T1; \ d = c; \ c = b; \ b = a; \ a = T1 + T2; \ jj++; //--------------------------------------------------------------------------- // EC1 C4 Private types definitions //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 C5A Global variables definitions //--------------------------------------------------------------------------- //--------------------------------------------------------------------------- // EC1 C5B Private variables definitions //--------------------------------------------------------------------------- static const U32 padding[SHA256_BLOCK_SIZE_BITS / (sizeof(U32) * CHAR_BIT)] = { #if (BYTE_ORDER == BIG_ENDIAN) 0x80000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u #else 0x00000080u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u #endif }; static const U32 K[SHA256_ROUNDS] = { 0x428A2F98u, 0x71374491u, 0xB5C0FBCFu, 0xE9B5DBA5u, 0x3956C25Bu, 0x59F111F1u, 0x923F82A4u, 0xAB1C5ED5u, 0xD807AA98u, 0x12835B01u, 0x243185BEu, 0x550C7DC3u, 0x72BE5D74u, 0x80DEB1FEu, 0x9BDC06A7u, 0xC19BF174u, 0xE49B69C1u, 0xEFBE4786u, 0x0FC19DC6u, 0x240CA1CCu, 0x2DE92C6Fu, 0x4A7484AAu, 0x5CB0A9DCu, 0x76F988DAu, 0x983E5152u, 0xA831C66Du, 0xB00327C8u, 0xBF597FC7u, 0xC6E00BF3u, 0xD5A79147u, 0x06CA6351u, 0x14292967u, 0x27B70A85u, 0x2E1B2138u, 0x4D2C6DFCu, 0x53380D13u, 0x650A7354u, 0x766A0ABBu, 0x81C2C92Eu, 0x92722C85u, 0xA2BFE8A1u, 0xA81A664Bu, 0xC24B8B70u, 0xC76C51A3u, 0xD192E819u, 0xD6990624u, 0xF40E3585u, 0x106AA070u, 0x19A4C116u, 0x1E376C08u, 0x2748774Cu, 0x34B0BCB5u, 0x391C0CB3u, 0x4ED8AA4Au, 0x5B9CCA4Fu, 0x682E6FF3u, 0x748F82EEu, 0x78A5636Fu, 0x84C87814u, 0x8CC70208u, 0x90BEFFFAu, 0xA4506CEBu, 0xBEF9A3F7u, 0xC67178F2u }; //--------------------------------------------------------------------------- // EC1 C6 Private inline functions definitions //--------------------------------------------------------------------------- #if (BYTE_ORDER == BIG_ENDIAN) static inline U32 switchEndianity(U32 data) { return data; } #else static inline U32 switchEndianity(U32 data) { return (data >> 24u) | (U32)(data << 24u) | (((data & 0x00FF0000u) >> 8u) | (U32)((data & 0x0000FF00u) << 8u)); } #endif //--------------------------------------------------------------------------- // EC1 C7 Private functions prototypes //--------------------------------------------------------------------------- static void SHA256_count(SHA256_CONTEXT *pSC); //--------------------------------------------------------------------------- // EC1 C8A Global functions definitions //--------------------------------------------------------------------------- /** * @brief Initializes SAH-256 context * @param[out] Pointer to SHA256_CONTEXT */ void SHA256_init(SHA256_CONTEXT *pSC) { pSC->hash[0] = H0; pSC->hash[1] = H1; pSC->hash[2] = H2; pSC->hash[3] = H3; pSC->hash[4] = H4; pSC->hash[5] = H5; pSC->hash[6] = H6; pSC->hash[7] = H7; pSC->buffer_ptr = 0u; pSC->total_len = 0u; } /** * @brief Processes input data * @param[in,out] pSC Pointer to SHA256_CONTEXT * @param[in] pData Pointer to data * @param[in] len Length of data as a number of octets */ void SHA256_update(SHA256_CONTEXT *pSC, const void *pData, U32 len) { U16 bfree; #if defined(__TMS320C2000__) U16 bfreeacc = 0u; #endif pSC->total_len += len; while(len != 0u) { bfree = (U16)(SHA256_BUFFER_LEN_BYTE - pSC->buffer_ptr); if(bfree > len) { #if defined(__TMS320C2000__) U16 ii; for(ii = 0u; ii < len; ii++) { __byte((int *) &pSC->buffer[pSC->buffer_ptr], ii) = __byte((int *) pData, bfreeacc + ii); } #else (void) memcpy(&pSC->buffer[pSC->buffer_ptr], pData, (size_t) len); #endif pSC->buffer_ptr += (U32) len; len = 0u; // this will end the while-loop } else { #if defined(__TMS320C2000__) U16 ii; for(ii = 0u; ii < bfree; ii++) { __byte((int *) &pSC->buffer[pSC->buffer_ptr], ii) = __byte((int *) pData, bfreeacc + ii); } bfreeacc += bfree; #else (void) memcpy(&pSC->buffer[pSC->buffer_ptr], pData, bfree); pData = &((U8 *) pData)[bfree]; #endif pSC->buffer_ptr = 0u; SHA256_count(pSC); len -= bfree; } } } /** * @brief Finishes the SHA256 calculation. * @param[in,out] pSC Pointer to SHA256_CONTEXT */ void SHA256_finalize(SHA256_CONTEXT *pSC) { U32 bitlenBE = switchEndianity(pSC->total_len * 8u); // length in bits as a big-endian number if(pSC->buffer_ptr < (SHA256_BUFFER_LEN_BYTE - SHA256_DATA_LEN_BYTE)) { #if defined(__TMS320C2000__) U16 ii; for(ii = 0u; ii < ((SHA256_BUFFER_LEN_BYTE - SHA256_DATA_LEN_BYTE) - (size_t) pSC->buffer_ptr); ii++) { __byte((int *) &pSC->buffer, pSC->buffer_ptr + ii) = __byte((int *) padding, ii); } #else (void) memcpy(&((U8 *) pSC->buffer)[pSC->buffer_ptr], padding, ((SHA256_BUFFER_LEN_BYTE - SHA256_DATA_LEN_BYTE) - (size_t) pSC->buffer_ptr)); #endif } else { #if defined(__TMS320C2000__) U16 ii; for(ii = 0u; ii < (SHA256_BUFFER_LEN_BYTE - (size_t) pSC->buffer_ptr); ii++) { __byte((int *) &pSC->buffer, pSC->buffer_ptr + ii) = __byte((int *) padding, ii); } #else (void) memcpy(&((U8 *) pSC->buffer)[pSC->buffer_ptr], padding, SHA256_BUFFER_LEN_BYTE - (size_t) pSC->buffer_ptr); #endif SHA256_count(pSC); #if defined(__TMS320C2000__) for(ii = 0u; ii < (SHA256_BUFFER_LEN_BYTE - SHA256_DATA_LEN_BYTE); ii++) { __byte((int *) pSC->buffer, ii) = 0; } #else (void) memset(pSC->buffer, 0, SHA256_BUFFER_LEN_BYTE - SHA256_DATA_LEN_BYTE); #endif } pSC->buffer[SHA256_BUFFER_LEN - 2u] = 0u; pSC->buffer[SHA256_BUFFER_LEN - 1u] = bitlenBE; SHA256_count(pSC); } //--------------------------------------------------------------------------- // EC1 C8B Private functions definitions //--------------------------------------------------------------------------- /** * @brief */ static void SHA256_count(SHA256_CONTEXT *pSC) { #if (BYTE_ORDER == LITTLE_ENDIAN) U32 w0 = switchEndianity(pSC->buffer[ 0]); U32 w1 = switchEndianity(pSC->buffer[ 1]); U32 w2 = switchEndianity(pSC->buffer[ 2]); U32 w3 = switchEndianity(pSC->buffer[ 3]); U32 w4 = switchEndianity(pSC->buffer[ 4]); U32 w5 = switchEndianity(pSC->buffer[ 5]); U32 w6 = switchEndianity(pSC->buffer[ 6]); U32 w7 = switchEndianity(pSC->buffer[ 7]); U32 w8 = switchEndianity(pSC->buffer[ 8]); U32 w9 = switchEndianity(pSC->buffer[ 9]); U32 w10 = switchEndianity(pSC->buffer[10]); U32 w11 = switchEndianity(pSC->buffer[11]); U32 w12 = switchEndianity(pSC->buffer[12]); U32 w13 = switchEndianity(pSC->buffer[13]); U32 w14 = switchEndianity(pSC->buffer[14]); U32 w15 = switchEndianity(pSC->buffer[15]); #else U32 w0 = pSC->buffer[ 0]; U32 w1 = pSC->buffer[ 1]; U32 w2 = pSC->buffer[ 2]; U32 w3 = pSC->buffer[ 3]; U32 w4 = pSC->buffer[ 4]; U32 w5 = pSC->buffer[ 5]; U32 w6 = pSC->buffer[ 6]; U32 w7 = pSC->buffer[ 7]; U32 w8 = pSC->buffer[ 8]; U32 w9 = pSC->buffer[ 9]; U32 w10 = pSC->buffer[10]; U32 w11 = pSC->buffer[11]; U32 w12 = pSC->buffer[12]; U32 w13 = pSC->buffer[13]; U32 w14 = pSC->buffer[14]; U32 w15 = pSC->buffer[15]; #endif U32 a = pSC->hash[0]; U32 b = pSC->hash[1]; U32 c = pSC->hash[2]; U32 d = pSC->hash[3]; U32 e = pSC->hash[4]; U32 f = pSC->hash[5]; U32 g = pSC->hash[6]; U32 h = pSC->hash[7]; U32 T1; U32 T2; U16 ii; U16 jj; for(ii = 0u, jj = 0u; ii < 4u; ii++) { if(ii != 0u) { EXPAND } F(w0) F(w1) F(w2) F(w3) F(w4) F(w5) F(w6) F(w7) F(w8) F(w9) F(w10) F(w11) F(w12) F(w13) F(w14) F(w15) } pSC->hash[0] += a; pSC->hash[1] += b; pSC->hash[2] += c; pSC->hash[3] += d; pSC->hash[4] += e; pSC->hash[5] += f; pSC->hash[6] += g; pSC->hash[7] += h; }
# CCDIR=C:\Program Files (x86)\Texas Instruments\C2000 Code Generation Tools 6.4.1 CCDIR=C:\Program Files (x86)\Texas Instruments\C2000 Code Generation Tools 5.2.15 C2000_INCLUDE_DIR="$(CCDIR)\include" C2000_LIBRARY_DIR="$(CCDIR)\lib" CC="$(CCDIR)\bin\cl2000.exe" H2000="$(CCDIR)\bin\hex2000.exe" PERL=perl SRCS=sha256.c OBJS=$(SRCS:.c=.obj) ASMS=$(SRCS:.c=.asm) CFLAGS=-c -pdr -pdv -as -g -ss -os -pr -O0 -ml -v28 --float_support=fpu32 INCLUDE=-I${C2000_INCLUDE_DIR} INCLIBS=-I${C2000_LIBRARY_DIR} all: $(OBJS) $(OBJS): $(SRCS) @$(PERL) -e "@list=($$sec,$$min,$$hour,$$day,$$mon,$$year,$$wday,$$yday,$$isdst)=localtime(time);$$year+=1900;printf \"Compilation started at %%4d-%%02d-%%02d %%d:%%02d:%%02d\n\", $$year, $$mon + 1, $$day, $$hour, $$min, $$sec" $(CC) $(CFLAGS) $(INCLUDE) $^ @$(PERL) -e "@list=($$sec,$$min,$$hour,$$day,$$mon,$$year,$$wday,$$yday,$$isdst)=localtime(time);$$year+=1900;printf \"Compilation finished at %%4d-%%02d-%%02d %%d:%%02d:%%02d\n\", $$year, $$mon + 1, $$day, $$hour, $$min, $$sec" .PHONY: clean clean: @del $(OBJS) >NUL 2>&1 @del $(ASMS) >NUL 2>&1