This thread has been locked.
If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.
Hi,
I want to use the combination of DMA and MPY32 of MSP430F5529 for matrix multiplication. I have achieved the task and the code gives correct outputs for smaller dimensions of input matrices like (1x3)*(3x2). But for my application, I want to do the dot product of (1x128) and (128x5) matrix. In this case, the outputs are not as expected and I get only one element out of the 5 which is erroneous. I'm using the MPY32 in MACS mode for 8x8 signed multiplication. Please find the code below. It would be of great help if someone can find the bug in the code. Thanks in advance!
int8_t a[128] = {3,8,-1,7,9,12,3,10,13,11,4,10,2,12,10,2,-3,7,11,13,4,7,8,7,9,6,9,7,2,7,6,-2,9,0,13,3,5,11,-6,12,5,8,3,11,11,10,1,9,11,1,9,4,7,1,7,-2,7,11,14,11,3,9,5,8,9,9,-3,7,8,12,9,11,10,9,9,13,10,7,3,-5,11,12,11,7,7,4,2,9,2,6,12,9,7,9,9,7,8,4,9,6,7,6,7,11,9,9,3,12,0,11,1,9,8,10,8,2,9,9,1,5,3,13,5,8,-1,9,12,10}; int8_t beta[640]={-9,-4,-8,4,5,8,8,5,-9,-9,-9,-8,7,5,7,1,-4,2,1,4,8,8,8,-6,-8,8,-2,-2,8,3,-6,-9,-5,2,2,-3,-3,-1,7,3,7,7,-9,4,1,2,-1,-2,-3,-8,-8,-6,3,0,-9,2,3,0,-5,9,8,-6,-9,3,4,9,-8,9,-8,-9,2,-2,-8,-9,2,4,6,0,9,-2,-6,7,2,-9,-4,-6,-7,-3,1,-8,-1,1,5,-1,3,4,5,9,4,-1,-1,-8,-9,5,6,5,-3,7,9,-9,6,3,-6,-6,-2,6,-4,2,9,-7,5,9,0,-3,-2,-1,-5,6,8,6,-5,3,9,6,-4,8,-3,7,-5,1,4,-6,0,3,-3,-7,3,1,-5,-9,7,-6,4,8,-7,-1,-8,-9,2,8,6,4,-3,5,9,-9,-7,8,-9,7,2,7,-8,-9,-2,4,0,7,9,7,-2,8,5,9,6,0,-6,-3,-4,0,5,-6,8,9,6,-3,-9,1,-6,-2,9,1,6,6,2,-7,-3,-5,9,2,-5,-4,-4,6,-4,-2,-6,-8,8,1,-7,0,-2,7,7,-9,0,0,0,-8,8,1,-6,1,-1,-4,-6,-8,-6,2,-6,8,0,5,9,1,6,4,4,2,4,5,-9,8,-7,-7,-5,8,6,4,3,0,-4,-7,4,4,1,-5,7,6,-4,-5,-7,9,-9,-2,7,9,1,8,-1,8,0,-5,9,1,-4,7,-8,3,9,-2,-7,-1,9,6,-2,-8,-8,2,9,-7,-5,9,0,3,-4,-3,-8,-2,3,3,7,0,-4,-2,-1,9,-7,8,-3,0,1,7,0,-2,0,-8,4,-6,8,7,7,-5,-1,-3,-3,-2,5,9,-4,-4,6,9,5,4,-9,1,-4,0,-7,0,4,-3,0,2,-5,9,5,2,5,5,1,-6,0,-8,-2,7,-1,-9,-5,8,-6,4,4,-5,-1,4,7,5,0,0,3,6,6,8,-1,2,6,3,0,-5,5,8,-5,6,0,-4,2,2,5,-6,5,2,0,2,-9,-7,7,-8,2,-5,-6,7,-4,-8,-8,-2,-9,-2,5,-1,-2,4,1,3,3,8,2,0,3,-4,4,-6,2,4,-3,-7,3,5,-2,5,-8,3,-6,0,2,3,5,6,9,-8,4,-4,-9,2,-8,-3,1,-6,-9,-4,-3,-1,-6,-1,3,-7,-6,-7,-2,2,-5,-1,-4,9,-5,-3,-3,4,8,9,3,-1,4,-1,4,2,5,2,-8,-9,-5,7,4,3,-9,-8,-8,-4,0,0,6,1,4,-9,8,-5,4,-4,6,-7,-5,-3,0,-8,4,-5,-5,-9,3,2,-1,-1,0,-1,0,9,-2,-5,6,-9,4,0,-7,2,6,-8,6,8,-8,-3,1,3,-9,-5,1,-5,8,-1,5,-6,-4,8,0,-1,-2,-6,-9,-2,0,7,-2,3,9,5,-8,-5,5,-6,0,6,7,-9,-1,4,-1,2,7,1,-5,9,9,5,-9,5,-6,4,2,1,-6,-8,-3,-8,-2,5,-4,-4,-2,-4,8,-7,-3,7,0,-8,4,9,-2,9,2,7,-3,0,2,-1,-8,-7,-9,-1,-1,-6,-5,1,-7,-9,-9,-5,7,1,7,1,8,-6,1,-5,4,-4,-7,-2}; uint16_t hid_dim = 128; uint8_t outputDim = 5; int16_t g[5]; int main(void) { WDTCTL = WDTPW | WDTHOLD; // Stop watchdog timer MACS_8X8(a,beta,g,hid_dim,outputDim); //operand 1, operand 2, result,no.of.rows in operand 2, no.of.elements in result } void MACS_8X8(int8_t *pHidden, int8_t *pWeight, int16_t *pOutput, uint16_t hiddenDimenson, uint8_t outputDimenson) { initDMAsforMPY8X8(hiddenDimenson, pHidden, pWeight, pOutput); int i; for (i=0; i<outputDimenson-1; i++) { pOutput += 1; // results is 16-bit, uint16_t type ptr needs to increase by 1 pWeight += hiddenDimenson; __data16_write_addr((unsigned short) &DMA2DA,(unsigned long) pOutput); __data16_write_addr((unsigned short) &DMA0SA, (unsigned long) pWeight); // restart DMA RESHI=0; RESLO=0; MACS_L=0x00; DMA1CTL &= ~DMAIFG; DMA0CTL |= DMAEN; // enable DMA channel 0 DMA1CTL |= DMAEN; // enable DMA channel 1 DMA2CTL |= DMAEN; // enable DMA channel 2 OP2 = 0x00; // start MACS to get first DMA trigger } } void initDMAsforMPY8X8(uint16_t inputVectDimenson, int8_t *pInput, int8_t *pWeight, int16_t *pOutput) { // DMA channel 0: from pWeight (increment) to &MAC, single transfer per trigger, byte transfer // triggered by DMA_TRIGGERSOURCE_29 (MPY ready signal), totaly transfer inputVectDimenson times DMACTL0 = DMA0TSEL_29; // MPY ready signal triggered DMA0CTL = DMADT_0+DMASRCINCR_3+DMADSTBYTE+DMASRCBYTE; // Single tranfer (DMADT_0), inc src (DMASRCINCR_3), Interrupt disenabled // unchange dst (DMADSTINCR_0), byte to byte, rising edge trigger (default) DMA0SZ = inputVectDimenson; // transfer size __data16_write_addr((unsigned short) &DMA0SA, (unsigned long) pWeight); // Source address __data16_write_addr((unsigned short) &DMA0DA,(unsigned long) &MACS); // Destination address // DMA channel 1: from pInput (increment) to &OP2, single transfer per trigger // triggered by DMA_TRIGGERSOURCE_29 (MPY ready signal), totaly transfer inputVectDimenson times DMACTL0 |= DMA1TSEL_29; // MPY ready signal triggered for channel 1 DMA1CTL = DMADT_0+DMASRCINCR_3+DMADSTBYTE+DMASRCBYTE; // Single tranfer (DMADT_0), inc src (DMASRCINCR_3), Interrupt disenabled // unchange dst (DMADSTINCR_0), byte to byte, rising edge trigger (default) DMA1SZ = inputVectDimenson; // transfer size __data16_write_addr((unsigned short) &DMA1SA, (unsigned long) pInput); // Source address __data16_write_addr((unsigned short) &DMA1DA,(unsigned long) &OP2); // Destination address // DMA channel 2: from &RESLO (increment) to pOutput (increment), 2 transfers (DMA_TRANSFER_BLOCK, with size of 2) // triggered by DMA_TRIGGERSOURCE_30 (DMA1IFG signal) DMACTL1 |= DMA2TSEL_30; // MPY ready signal triggered for channel 2 DMA2CTL = DMADT_1+DMASRCINCR_3+DMADSTINCR_3; // Block tranfer (DMADT_1), inc src (DMASRCINCR_3), Interrupt disenabled // inc dst (DMADSTINCR_0), word to word, rising edge trigger (default) DMA2SZ = 1; // transfer size __data16_write_addr((unsigned short) &DMA2SA, (unsigned long) &RESLO); // Source address __data16_write_addr((unsigned short) &DMA2DA,(unsigned long) pOutput); // Destination address // initiate MPY by setting output and OP1 registers to be 0x0000 RESLO=0; RESHI=0; MACS_L=0x00; DMA2CTL |= DMAIE; DMA0CTL |= DMAEN; // enable DMA channel 0 DMA1CTL |= DMAEN; // enable DMA channel 1 DMA2CTL |= DMAEN; // enable DMA channel 2 OP2 = 0x00; // start MACS to get first DMA trigger }
Hi Vignesh,
It seems like you have a timing issue. I modified your code and this seems to work. I was able to do this with just 2 DMA channels. Let me know if this helps.
#include "driverlib.h" int8_t a[128] = {3,8,-1,7,9,12,3,10,13,11,4,10,2,12,10,2,-3,7,11,13,4,7,8,7,9,6,9,7,2,7,6,-2,9,0,13,3,5,11,-6,12,5,8,3,11,11,10,1,9,11,1,9,4,7,1,7,-2,7,11,14,11,3,9,5,8,9,9,-3,7,8,12,9,11,10,9,9,13,10,7,3,-5,11,12,11,7,7,4,2,9,2,6,12,9,7,9,9,7,8,4,9,6,7,6,7,11,9,9,3,12,0,11,1,9,8,10,8,2,9,9,1,5,3,13,5,8,-1,9,12,10}; int8_t beta[640]={-9,-4,-8,4,5,8,8,5,-9,-9,-9,-8,7,5,7,1,-4,2,1,4,8,8,8,-6,-8,8,-2,-2,8,3,-6,-9,-5,2,2,-3,-3,-1,7,3,7,7,-9,4,1,2,-1,-2,-3,-8,-8,-6,3,0,-9,2,3,0,-5,9,8,-6,-9,3,4,9,-8,9,-8,-9,2,-2,-8,-9,2,4,6,0,9,-2,-6,7,2,-9,-4,-6,-7,-3,1,-8,-1,1,5,-1,3,4,5,9,4,-1,-1,-8,-9,5,6,5,-3,7,9,-9,6,3,-6,-6,-2,6,-4,2,9,-7,5,9,0,-3,-2,-1,-5,6,8,6,-5,3,9,6,-4,8,-3,7,-5,1,4,-6,0,3,-3,-7,3,1,-5,-9,7,-6,4,8,-7,-1,-8,-9,2,8,6,4,-3,5,9,-9,-7,8,-9,7,2,7,-8,-9,-2,4,0,7,9,7,-2,8,5,9,6,0,-6,-3,-4,0,5,-6,8,9,6,-3,-9,1,-6,-2,9,1,6,6,2,-7,-3,-5,9,2,-5,-4,-4,6,-4,-2,-6,-8,8,1,-7,0,-2,7,7,-9,0,0,0,-8,8,1,-6,1,-1,-4,-6,-8,-6,2,-6,8,0,5,9,1,6,4,4,2,4,5,-9,8,-7,-7,-5,8,6,4,3,0,-4,-7,4,4,1,-5,7,6,-4,-5,-7,9,-9,-2,7,9,1,8,-1,8,0,-5,9,1,-4,7,-8,3,9,-2,-7,-1,9,6,-2,-8,-8,2,9,-7,-5,9,0,3,-4,-3,-8,-2,3,3,7,0,-4,-2,-1,9,-7,8,-3,0,1,7,0,-2,0,-8,4,-6,8,7,7,-5,-1,-3,-3,-2,5,9,-4,-4,6,9,5,4,-9,1,-4,0,-7,0,4,-3,0,2,-5,9,5,2,5,5,1,-6,0,-8,-2,7,-1,-9,-5,8,-6,4,4,-5,-1,4,7,5,0,0,3,6,6,8,-1,2,6,3,0,-5,5,8,-5,6,0,-4,2,2,5,-6,5,2,0,2,-9,-7,7,-8,2,-5,-6,7,-4,-8,-8,-2,-9,-2,5,-1,-2,4,1,3,3,8,2,0,3,-4,4,-6,2,4,-3,-7,3,5,-2,5,-8,3,-6,0,2,3,5,6,9,-8,4,-4,-9,2,-8,-3,1,-6,-9,-4,-3,-1,-6,-1,3,-7,-6,-7,-2,2,-5,-1,-4,9,-5,-3,-3,4,8,9,3,-1,4,-1,4,2,5,2,-8,-9,-5,7,4,3,-9,-8,-8,-4,0,0,6,1,4,-9,8,-5,4,-4,6,-7,-5,-3,0,-8,4,-5,-5,-9,3,2,-1,-1,0,-1,0,9,-2,-5,6,-9,4,0,-7,2,6,-8,6,8,-8,-3,1,3,-9,-5,1,-5,8,-1,5,-6,-4,8,0,-1,-2,-6,-9,-2,0,7,-2,3,9,5,-8,-5,5,-6,0,6,7,-9,-1,4,-1,2,7,1,-5,9,9,5,-9,5,-6,4,2,1,-6,-8,-3,-8,-2,5,-4,-4,-2,-4,8,-7,-3,7,0,-8,4,9,-2,9,2,7,-3,0,2,-1,-8,-7,-9,-1,-1,-6,-5,1,-7,-9,-9,-5,7,1,7,1,8,-6,1,-5,4,-4,-7,-2}; uint16_t hid_dim = 128; uint8_t outputDim = 5; int16_t g[5]; int main(void) { WDTCTL = WDTPW | WDTHOLD; // Stop watchdog timer MACS_8X8(a,beta,g,hid_dim,outputDim); //operand 1, operand 2, result,no.of.rows in operand 2, no.of.elements in result } void MACS_8X8(int8_t *pHidden, int8_t *pWeight, int16_t *pOutput, uint16_t hiddenDimenson, uint8_t outputDimenson) { initDMAsforMPY8X8(hiddenDimenson, pHidden, pWeight, pOutput); for (i=0; i<outputDimenson; i++) { // pOutput += 1; // results is 16-bit, uint16_t type ptr needs to increase by 1 // __data16_write_addr((unsigned short) &DMA2DA,(unsigned long) pOutput); __data16_write_addr((unsigned short) &DMA0SA, (unsigned long) pWeight); pWeight += hiddenDimenson; // restart DMA RESHI=0; RESLO=0; MACS_L=0x00; DMA1CTL &= ~DMAIFG; DMA0CTL |= DMAEN; // enable DMA channel 0 DMA1CTL |= DMAEN; // enable DMA channel 1 // DMA2CTL |= DMAEN; // enable DMA channel 2 OP2 = 0x00; // start MACS to get first DMA trigger while(!(DMA1CTL & DMAIFG)); //could go into LPM0 and add DMA interrupt for ch 1 IFG. *pOutput++ = RESLO; __no_operation(); } } void initDMAsforMPY8X8(uint16_t inputVectDimenson, int8_t *pInput, int8_t *pWeight, int16_t *pOutput) { // DMA channel 0: from pWeight (increment) to &MAC, single transfer per trigger, byte transfer // triggered by DMA_TRIGGERSOURCE_29 (MPY ready signal), totaly transfer inputVectDimenson times DMACTL0 = DMA0TSEL_29; // MPY ready signal triggered DMA0CTL = DMADT_0+DMASRCINCR_3+DMADSTBYTE+DMASRCBYTE; // Single tranfer (DMADT_0), inc src (DMASRCINCR_3), Interrupt disenabled // unchange dst (DMADSTINCR_0), byte to byte, rising edge trigger (default) DMA0SZ = inputVectDimenson; // transfer size __data16_write_addr((unsigned short) &DMA0SA, (unsigned long) pWeight); // Source address __data16_write_addr((unsigned short) &DMA0DA,(unsigned long) &MACS); // Destination address // DMA channel 1: from pInput (increment) to &OP2, single transfer per trigger // triggered by DMA_TRIGGERSOURCE_29 (MPY ready signal), totaly transfer inputVectDimenson times DMACTL0 |= DMA1TSEL_29; // MPY ready signal triggered for channel 1 DMA1CTL = DMADT_0+DMASRCINCR_3+DMADSTBYTE+DMASRCBYTE; // Single tranfer (DMADT_0), inc src (DMASRCINCR_3), Interrupt disenabled // unchange dst (DMADSTINCR_0), byte to byte, rising edge trigger (default) DMA1SZ = inputVectDimenson; // transfer size __data16_write_addr((unsigned short) &DMA1SA, (unsigned long) pInput); // Source address __data16_write_addr((unsigned short) &DMA1DA,(unsigned long) &OP2); // Destination address // DMA channel 2: from &RESLO (increment) to pOutput (increment), 2 transfers (DMA_TRANSFER_BLOCK, with size of 2) // triggered by DMA_TRIGGERSOURCE_30 (DMA1IFG signal) // DMACTL1 |= DMA2TSEL_30; // MPY ready signal triggered for channel 2 // DMA2CTL = DMADT_1+DMASRCINCR_3+DMADSTINCR_3; // Block tranfer (DMADT_1), inc src (DMASRCINCR_3), Interrupt disenabled // inc dst (DMADSTINCR_0), word to word, rising edge trigger (default) // DMA2SZ = 1; // transfer size // __data16_write_addr((unsigned short) &DMA2SA, (unsigned long) &RESLO); // Source address // __data16_write_addr((unsigned short) &DMA2DA,(unsigned long) pOutput); // Destination address // initiate MPY by setting output and OP1 registers to be 0x0000 // RESLO=0; // RESHI=0; // MACS_L=0x00; // DMA2CTL |= DMAIE; // DMA0CTL |= DMAEN; // enable DMA channel 0 // DMA1CTL |= DMAEN; // enable DMA channel 1 // DMA2CTL |= DMAEN; // enable DMA channel 2 // OP2 = 0x00; // start MACS to get first DMA trigger }
Hi LaFollette,
Thanks for the code and it works well. I have a question in using only 2 DMA channels instead of 3. The instruction
*pOutput++ = RESLO;
will be executed by the processor? I don't want the CPU to be turned on while performing this calculation. Please clarify in this regard.
Thanks
Vignesh,
Yes that code will be executed by the CPU but only happens once the MAC result is ready. The RESLO register is updated every time a MAC operation completes but we only care about the final MAC result. It's not necessary to use a third DMA channel for the result, in fact configuring DMA to copy a single result is less efficient that using the CPU.
The following code will configure DMA to perform MAC, enter LPM0, wake up the CPU when the MAC is complete, copy the result and repeat until all results have been calculated. I also included a reference function that computes the same answer using C code.
#include <msp430.h> #include <stdint.h> #define DATA16_ADDR(addr) ((uint16_t)(((uintptr_t)addr) & 0xffff)) #define DIM_HIDDEN 128 #define DIM_OUTPUT 5 int8_t a[DIM_HIDDEN] = {3,8,-1,7,9,12,3,10,13,11,4,10,2,12,10,2,-3,7,11,13,4,7,8,7,9,6,9,7,2,7,6,-2,9,0,13,3,5,11,-6,12,5,8,3,11,11,10,1,9,11,1,9,4,7,1,7,-2,7,11,14,11,3,9,5,8,9,9,-3,7,8,12,9,11,10,9,9,13,10,7,3,-5,11,12,11,7,7,4,2,9,2,6,12,9,7,9,9,7,8,4,9,6,7,6,7,11,9,9,3,12,0,11,1,9,8,10,8,2,9,9,1,5,3,13,5,8,-1,9,12,10}; int8_t beta[DIM_HIDDEN*DIM_OUTPUT]={-9,-4,-8,4,5,8,8,5,-9,-9,-9,-8,7,5,7,1,-4,2,1,4,8,8,8,-6,-8,8,-2,-2,8,3,-6,-9,-5,2,2,-3,-3,-1,7,3,7,7,-9,4,1,2,-1,-2,-3,-8,-8,-6,3,0,-9,2,3,0,-5,9,8,-6,-9,3,4,9,-8,9,-8,-9,2,-2,-8,-9,2,4,6,0,9,-2,-6,7,2,-9,-4,-6,-7,-3,1,-8,-1,1,5,-1,3,4,5,9,4,-1,-1,-8,-9,5,6,5,-3,7,9,-9,6,3,-6,-6,-2,6,-4,2,9,-7,5,9,0,-3,-2,-1,-5,6,8,6,-5,3,9,6,-4,8,-3,7,-5,1,4,-6,0,3,-3,-7,3,1,-5,-9,7,-6,4,8,-7,-1,-8,-9,2,8,6,4,-3,5,9,-9,-7,8,-9,7,2,7,-8,-9,-2,4,0,7,9,7,-2,8,5,9,6,0,-6,-3,-4,0,5,-6,8,9,6,-3,-9,1,-6,-2,9,1,6,6,2,-7,-3,-5,9,2,-5,-4,-4,6,-4,-2,-6,-8,8,1,-7,0,-2,7,7,-9,0,0,0,-8,8,1,-6,1,-1,-4,-6,-8,-6,2,-6,8,0,5,9,1,6,4,4,2,4,5,-9,8,-7,-7,-5,8,6,4,3,0,-4,-7,4,4,1,-5,7,6,-4,-5,-7,9,-9,-2,7,9,1,8,-1,8,0,-5,9,1,-4,7,-8,3,9,-2,-7,-1,9,6,-2,-8,-8,2,9,-7,-5,9,0,3,-4,-3,-8,-2,3,3,7,0,-4,-2,-1,9,-7,8,-3,0,1,7,0,-2,0,-8,4,-6,8,7,7,-5,-1,-3,-3,-2,5,9,-4,-4,6,9,5,4,-9,1,-4,0,-7,0,4,-3,0,2,-5,9,5,2,5,5,1,-6,0,-8,-2,7,-1,-9,-5,8,-6,4,4,-5,-1,4,7,5,0,0,3,6,6,8,-1,2,6,3,0,-5,5,8,-5,6,0,-4,2,2,5,-6,5,2,0,2,-9,-7,7,-8,2,-5,-6,7,-4,-8,-8,-2,-9,-2,5,-1,-2,4,1,3,3,8,2,0,3,-4,4,-6,2,4,-3,-7,3,5,-2,5,-8,3,-6,0,2,3,5,6,9,-8,4,-4,-9,2,-8,-3,1,-6,-9,-4,-3,-1,-6,-1,3,-7,-6,-7,-2,2,-5,-1,-4,9,-5,-3,-3,4,8,9,3,-1,4,-1,4,2,5,2,-8,-9,-5,7,4,3,-9,-8,-8,-4,0,0,6,1,4,-9,8,-5,4,-4,6,-7,-5,-3,0,-8,4,-5,-5,-9,3,2,-1,-1,0,-1,0,9,-2,-5,6,-9,4,0,-7,2,6,-8,6,8,-8,-3,1,3,-9,-5,1,-5,8,-1,5,-6,-4,8,0,-1,-2,-6,-9,-2,0,7,-2,3,9,5,-8,-5,5,-6,0,6,7,-9,-1,4,-1,2,7,1,-5,9,9,5,-9,5,-6,4,2,1,-6,-8,-3,-8,-2,5,-4,-4,-2,-4,8,-7,-3,7,0,-8,4,9,-2,9,2,7,-3,0,2,-1,-8,-7,-9,-1,-1,-6,-5,1,-7,-9,-9,-5,7,1,7,1,8,-6,1,-5,4,-4,-7,-2}; int16_t g[DIM_OUTPUT]; int16_t g_ref[DIM_OUTPUT]; extern void REF_MACS(int8_t *srcA, int8_t *srcB, int16_t *dst, uint16_t srcALen, uint8_t dstLen); extern void DMA_MACS(int8_t *srcA, int8_t *srcB, int16_t *dst, uint16_t srcALen, uint8_t dstLen); int main(void) { // Stop watchdog timer WDTCTL = WDTPW | WDTHOLD; // Calculate result DMA_MACS(a, beta, g, DIM_HIDDEN, DIM_OUTPUT); // Calculate reference result REF_MACS(a, beta, g_ref, DIM_HIDDEN, DIM_OUTPUT); return 0; } void REF_MACS(int8_t *srcA, int8_t *srcB, int16_t *dst, uint16_t srcALen, uint8_t dstLen) { uint16_t i; uint16_t j; for (i = 0; i < dstLen; i++) { dst[i] = 0; for (j = 0; j < srcALen; j++) { dst[i] += (int16_t)srcA[j] * (int16_t)srcB[i*srcALen + j]; } } return; } void DMA_MACS(int8_t *srcA, int8_t *srcB, int16_t *dst, uint16_t srcALen, uint8_t dstLen) { uint16_t i; // Configure DMA channel 0 to transfer srcA to MACS DMACTL0 = DMA0TSEL_29; DMA0CTL = DMADT_0+DMASRCINCR_3+DMADSTBYTE+DMASRCBYTE+DMAIE; DMA0SZ = srcALen; __data16_write_addr(DATA16_ADDR(&DMA0SA), (uintptr_t)srcA); __data16_write_addr(DATA16_ADDR(&DMA0DA),(uintptr_t)&MACS); // Configure DMA channel 1 to transfer srcB to OP2 DMACTL0 |= DMA1TSEL_29; DMA1CTL = DMADT_0+DMASRCINCR_3+DMADSTBYTE+DMASRCBYTE; DMA1SZ = srcALen; __data16_write_addr(DATA16_ADDR(&DMA1DA),(uintptr_t)&OP2); for (i = 0; i < dstLen; i++) { // Setup DMA to copy srcB and increment pointer __data16_write_addr(DATA16_ADDR(&DMA1SA), (uintptr_t)srcB); srcB += srcALen; // Enable DMA and start DMA trigger with zero multiply // Using MPYS resets result registers to zero DMA1CTL &= ~DMAIFG; DMA0CTL |= DMAEN; DMA1CTL |= DMAEN; MPYS = 0; OP2 = 0; // Enter LPM0 while waiting for result to complete __bis_SR_register(GIE+LPM0_bits); // Save result to dst *dst++ = RESLO; } return; } // DMA interrupt service routine #pragma vector = DMA_VECTOR __interrupt void DmaIsr(void) { switch(__even_in_range(DMAIV, DMAIV_DMA0IFG)) { case DMAIV_NONE: break; case DMAIV_DMA0IFG: // Exit from LPM0 __bic_SR_register_on_exit(LPM0_bits); break; default: break; } }
Regards,
Brent
**Attention** This is a public forum