This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

Codec Engine low perfomance on DM8168

Hello,
I used to write image processing algorithms using C6Accel on DM8168 but since C6Accel is deprecated, I started to work with Codec Engine (EZSDK 5.04.00.11). I modified image_copy app to make it work with buffersize of 921600 bytes. Here is my new apps/image_copy/app.c file:

/* 
 * Copyright (c) 2012, Texas Instruments Incorporated
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * *  Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * *  Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * *  Neither the name of Texas Instruments Incorporated nor the names of
 *    its contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
/*
 *  ======== app.c ========
 */
#include <xdc/std.h>
#include <xdc/runtime/Assert.h>
#include <xdc/runtime/Diags.h>
#include <xdc/runtime/Log.h>

#include <ti/sdo/ce/Engine.h>
#include <ti/sdo/ce/osal/Memory.h>
#include <ti/sdo/ce/image/imgdec.h>
#include <ti/sdo/ce/image/imgenc.h>

#include <string.h>  /* for memset */

#include <stdio.h>
#include <stdlib.h>

#include <sys/time.h>

/*
 * If an XDAIS algorithm _may_ use DMA, buffers provided to it need to be
 * aligned on a cache boundary.
 */

#ifdef CACHE_ENABLED

/*
 * If buffer alignment isn't set on the compiler's command line, set it here
 * to a default value.
 */
#ifndef BUFALIGN
#define BUFALIGN 128
#endif
#else

/* Not a cached system, no buffer alignment constraints */
#define BUFALIGN Memory_DEFAULTALIGNMENT

#endif

#define NSAMPLES    1280*720  /* must be multiple of 128 for cache/DMA reasons */
#define IFRAMESIZE  (NSAMPLES * sizeof(Int8))  /* raw frame (input) */
#define EFRAMESIZE  (NSAMPLES * sizeof(Int8))  /* encoded frame */
#define OFRAMESIZE  (NSAMPLES * sizeof(Int8))  /* decoded frame (output) */

static XDAS_Int8 *inBuf;
static XDAS_Int8 *encodedBuf;
static XDAS_Int8 *outBuf;

static String decoderName  = "imgdec_copy";
static String encoderName  = "imgenc_copy";

static Void encode_decode(IMGENC_Handle enc, IMGDEC_Handle dec, FILE *in,
    FILE *out);


/*
 *  ======== createInFileIfMissing ========
 */
static void createInFileIfMissing(char *inFileName)
{
    int i;
    FILE *f = fopen(inFileName, "rb");
    if (f == NULL) {
        printf("Input file '%s' not found, generating one.\n", inFileName);
        f = fopen(inFileName, "wb");
        for (i = 0; i < NSAMPLES; i++) {
            fwrite(&i, sizeof(i), 1, f);
        }
    }
    fclose(f);
}

/*
 *  ======== smain ========
 */
Int smain(String progName, String procId, String engineName,
          String inFile, String outFile)
{
    Engine_Handle ce = NULL;
    Engine_Attrs attrs;
    IMGDEC_Handle dec = NULL;
    IMGENC_Handle enc = NULL;
    FILE *in = NULL;
    FILE *out = NULL;
    Memory_AllocParams allocParams;

    createInFileIfMissing(inFile);

    Log_print4(Diags_USER1, "[+1] App-> Application started, procId %s "
               "engineName %s input-file %s output-file %s.",
               (IArg)procId, (IArg)engineName, (IArg)inFile, (IArg)outFile);

    /* allocate buffers */
    allocParams.type = Memory_CONTIGHEAP;
    allocParams.flags = Memory_NONCACHED;
    allocParams.align = BUFALIGN;
    allocParams.seg = 0;

    inBuf = (XDAS_Int8 *)Memory_alloc(IFRAMESIZE, &allocParams);
    encodedBuf = (XDAS_Int8 *)Memory_alloc(EFRAMESIZE, &allocParams);
    outBuf = (XDAS_Int8 *)Memory_alloc(OFRAMESIZE, &allocParams);

    if ((inBuf == NULL) || (encodedBuf == NULL) || (outBuf == NULL)) {
        goto end;
    }

    /* open file streams for input and output */
    if ((in = fopen(inFile, "rb")) == NULL) {
        printf("App-> ERROR: can't read file %s\n", inFile);
        goto end;
    }
    if ((out = fopen(outFile, "wb")) == NULL) {
        printf("App-> ERROR: can't write to file %s\n", outFile);
        goto end;
    }

    /* Initialize attrs fields to default values, and set the procId */
    Engine_initAttrs(&attrs);
    attrs.procId = procId;

    /* reset, load, and start DSP Engine */
    if ((ce = Engine_open(engineName, &attrs, NULL)) == NULL) {
        fprintf(stderr, "%s: error: can't open engine %s\n",
            progName, engineName);
        goto end;
    }

    /* allocate and initialize video decoder on the engine */
    dec = IMGDEC_create(ce, decoderName, NULL);
    if (dec == NULL) {
        printf( "App-> ERROR: can't open codec %s\n", decoderName);
        goto end;
    }

    /* allocate and initialize video encoder on the engine */
    enc = IMGENC_create(ce, encoderName, NULL);
    if (enc == NULL) {
        fprintf(stderr, "%s: error: can't open codec %s\n",
            progName, encoderName);
        goto end;
    }

    /* use engine to encode, then decode the data */
    encode_decode(enc, dec, in, out);

end:
    /* teardown the codecs */
    if (enc) {
        IMGENC_delete(enc);
    }
    if (dec) {
        IMGDEC_delete(dec);
    }

    /* close the engine */
    if (ce) {
        Engine_close(ce);
    }

    /* close the files */
    if (in) {
        fclose(in);
    }
    if (out) {
        fclose(out);
    }

    /* free buffers */
    if (inBuf) {
        Memory_free(inBuf, IFRAMESIZE, &allocParams);
    }
    if (encodedBuf) {
        Memory_free(encodedBuf, EFRAMESIZE, &allocParams);
    }
    if (outBuf) {
        Memory_free(outBuf, OFRAMESIZE, &allocParams);
    }

    Log_print0(Diags_USER1, "[+1] app done.");
    return (0);
}

double calc_time(struct timeval *before, struct timeval *after)
{
    if (before->tv_usec > after->tv_usec)
        return (after->tv_usec - before->tv_usec + 1000000) / (double)1000;
    else
        return (after->tv_usec - before->tv_usec) / (double)1000;
}

/*
 *  ======== encode_decode ========
 */
static Void encode_decode(IMGENC_Handle enc, IMGDEC_Handle dec, FILE *in,
    FILE *out)
{
	struct timeval tbegin, tend;

    Int                         n;
    Int32                       status;

    IMGDEC_InArgs               decInArgs;
    IMGDEC_OutArgs              decOutArgs;
    IMGDEC_DynamicParams        decDynParams;
    IMGDEC_Status               decStatus;

    IMGENC_InArgs               encInArgs;
    IMGENC_OutArgs              encOutArgs;
    IMGENC_DynamicParams        encDynParams;
    IMGENC_Status               encStatus;

    XDM_BufDesc                 inBufDesc;
    XDAS_Int8                  *src[XDM_MAX_IO_BUFFERS];
    XDAS_Int32                  inBufSizes[XDM_MAX_IO_BUFFERS];

    XDM_BufDesc                 encodedBufDesc;
    XDAS_Int8                  *encoded[XDM_MAX_IO_BUFFERS];
    XDAS_Int32                  encBufSizes[XDM_MAX_IO_BUFFERS];

    XDM_BufDesc                 outBufDesc;
    XDAS_Int8                  *dst[XDM_MAX_IO_BUFFERS];
    XDAS_Int32                  outBufSizes[XDM_MAX_IO_BUFFERS];

    /* clear and initialize the buffer descriptors */
    memset(src,     0, sizeof(src[0])     * XDM_MAX_IO_BUFFERS);
    memset(encoded, 0, sizeof(encoded[0]) * XDM_MAX_IO_BUFFERS);
    memset(dst,     0, sizeof(dst[0])     * XDM_MAX_IO_BUFFERS);

    src[0]     = inBuf;
    encoded[0] = encodedBuf;
    dst[0]     = outBuf;

    inBufDesc.numBufs = encodedBufDesc.numBufs = outBufDesc.numBufs = 1;

    inBufDesc.bufSizes      = inBufSizes;
    encodedBufDesc.bufSizes = encBufSizes;
    outBufDesc.bufSizes     = outBufSizes;

    inBufSizes[0] = encBufSizes[0] = outBufSizes[0] = NSAMPLES;

    inBufDesc.bufs      = src;
    encodedBufDesc.bufs = encoded;
    outBufDesc.bufs     = dst;

    /* initialize all "sized" fields */
    encInArgs.size    = sizeof(encInArgs);
    decInArgs.size    = sizeof(decInArgs);
    encOutArgs.size   = sizeof(encOutArgs);
    decOutArgs.size   = sizeof(decOutArgs);
    encDynParams.size = sizeof(encDynParams);
    decDynParams.size = sizeof(decDynParams);
    encStatus.size    = sizeof(encStatus);
    decStatus.size    = sizeof(decStatus);

    /*
     * Query the encoder and decoder.
     * This app expects the encoder to provide 1 buf in and get 1 buf out,
     * and the buf sizes of the in and out buffer must be able to handle
     * NSAMPLES bytes of data.
     */
    status = IMGENC_control(enc, XDM_GETSTATUS, &encDynParams, &encStatus);
    if (status != IMGENC_EOK) {
        /* failure, report error and exit */
        Log_print1(Diags_USER7, "[+7] encode control status = 0x%x",
                (IArg)status);
        return;
    }

    /* Validate this encoder codec will meet our buffer requirements */
    if ((inBufDesc.numBufs < encStatus.bufInfo.minNumInBufs) ||
        (IFRAMESIZE < encStatus.bufInfo.minInBufSize[0]) ||
        (encodedBufDesc.numBufs < encStatus.bufInfo.minNumOutBufs) ||
        (EFRAMESIZE < encStatus.bufInfo.minOutBufSize[0])) {

        /* failure, report error and exit */
        Log_print0(Diags_USER7, "[+7] Error:  encoder codec feature conflict");
        return;
    }

    status = IMGDEC_control(dec, XDM_GETSTATUS, &decDynParams, &decStatus);
    if (status != IMGDEC_EOK) {
        /* failure, report error and exit */
        Log_print1(Diags_USER7, "[+7] decode control status = 0x%x",
                (IArg)status);
        return;
    }

    /* Validate this decoder codec will meet our buffer requirements */
    if ((encodedBufDesc.numBufs < decStatus.bufInfo.minNumInBufs) ||
        (EFRAMESIZE < decStatus.bufInfo.minInBufSize[0]) ||
        (outBufDesc.numBufs < decStatus.bufInfo.minNumOutBufs) ||
        (OFRAMESIZE < decStatus.bufInfo.minOutBufSize[0])) {

        /* failure, report error and exit */
        Log_print0(Diags_USER7,
                "[+7] App-> ERROR: decoder does not meet buffer requirements.");
        return;
    }

    /*
     * Read complete frames from in, encode, decode, and write to out.
     */
    for (n = 0; n < 10; n++) {

        /* Deal with cache issues, if necessary */
#ifdef CACHE_ENABLED
#if defined(xdc_target__isaCompatible_64P) || \
    defined(xdc_target__isaCompatible_64T)
        /*
         *  fread() on this processor is implemented using CCS's stdio, which
         *  is known to write into the cache, not physical memory.  To meet
         *  XDAIS DMA Rule 7, we must writeback the cache into physical
         *  memory.  Also, per DMA Rule 7, we must invalidate the buffer's
         *  cache before providing it to any xDAIS algorithm.
         */
        Memory_cacheWbInv(inBuf, IFRAMESIZE);
#else
#error Unvalidated config - add appropriate fread-related cache maintenance
#endif

        /* Per DMA Rule 7, our output buffer cache lines must be cleaned */
        Memory_cacheInv(encodedBuf, EFRAMESIZE);
#endif

        Log_print1(Diags_USER1, "[+1] App-> Processing frame %d...", (IArg)n);

	    gettimeofday(&tbegin, NULL);
        /* encode the frame */
        status = IMGENC_process(enc, &inBufDesc, &encodedBufDesc, &encInArgs,
            &encOutArgs);
	    gettimeofday(&tend, NULL);
		printf("Processing time: %.3f ms\n", calc_time(&tbegin, &tend));

        Log_print2(Diags_USER2,
                "[+2] App-> Encoder frame %d process returned - 0x%x)",
                (IArg)n, (IArg)status);

#ifdef CACHE_ENABLED
        /* Writeback this outBuf from the previous call.  Also, as encodedBuf
         * is an inBuf to the next process call, we must invalidate it also, to
         * clean buffer lines.
         */
        Memory_cacheWbInv(encodedBuf, EFRAMESIZE);

        /* Per DMA Rule 7, our output buffer cache lines must be cleaned */
        Memory_cacheInv(outBuf, OFRAMESIZE);
#endif

        if (status != IMGENC_EOK) {
            Log_print3(Diags_USER7,
                    "[+7] App-> Encoder frame %d processing FAILED, status = 0x%x, "
                    "extendedError = 0x%x",
                    (IArg)n, (IArg)status, (IArg)(encOutArgs.extendedError));
            break;
        }

        /* decode the frame */
        decInArgs.numBytes = encOutArgs.bytesGenerated;
        status = IMGDEC_process(dec, &encodedBufDesc, &outBufDesc, &decInArgs,
           &decOutArgs);

        Log_print2(Diags_USER2,
                "[+2] App-> Decoder frame %d process returned - 0x%x)",
                (IArg)n, (IArg)status);

        if (status != IMGDEC_EOK) {
            Log_print3(Diags_USER7,
                    "[+7] App-> Decoder frame %d processing FAILED, status = 0x%x, "
                    "extendedError = 0x%x",
                    (IArg)n, (IArg)status, (IArg)(decOutArgs.extendedError));
            break;
        }

#ifdef CACHE_ENABLED
        /* Writeback the outBuf. */
        Memory_cacheWb(outBuf, OFRAMESIZE);
#endif
        /* write to file */
        fwrite(dst[0], OFRAMESIZE, 1, out);
    }

    Log_print1(Diags_USER1, "[+1] %d frames encoded/decoded", (IArg)n);
}
/*
 *  @(#) ti.sdo.ce.examples.apps.image_copy; 1, 0, 0,1; 2-24-2012 19:27:54; /db/atree/library/trees/ce/ce-t06/src/ xlibrary

 */

loamodules.sh:
modprobe cmemk phys_start=0x96C00000 phys_end=0x98000000
modprobe syslink

So the result time of copying from one buffer to another is 28 ms. When I use C6Accel, it takes 7 ms to copy data. Why does Codec Engine processing works several times slower than C6Accel? I tried Codec Engine project the same way on Beagleboard-xM with DVSDK 4.01.00.09, and there were no decrease of perfomance.

Regards,

Sergey

  • Log with CE_DEBUG=2:

    [t=0x0001fa7e] [tid=0x400ca000] xdc.runtime.Main: [+1] App-> Processing frame 0...

    [t=0x0001fab1] [tid=0x400ca000] ti.sdo.ce.image.IMGENC: [+E] IMGENC_process> Enter (handle=0x107de0, inBufs=0xbef419a8, outBufs=0xbef4191c, inArgs=0xbef41a78, outArgs=0xbef41a68)

    [t=0x0001faee] [tid=0x400ca000] ti.sdo.ce.VISA: [+4] VISA_getMaxMsgSize(0x107de0): returning 0x1000

    [t=0x0001fb11] [tid=0x400ca000] ti.sdo.ce.VISA: [+5] VISA_allocMsg> Allocating message for messageId=0x00020005

    [t=0x0001fb3f] [tid=0x400ca000] ti.sdo.ce.VISA: [+E5] VISA_call(visa=0x107de0, msg=0x42398180): messageId=0x00020005, command=0x0

    [DSP] [t=+026,398 us] [tid=0x8b086ec8] ti.sdo.ce.node: [+5] NODE> 0x8b086d40 call(algHandle=0x8b086e08, msg=0x8d002180); messageId=0x00020005

    [DSP] [t=+000,114 us] [tid=0x8b086ec8] ti.sdo.ce.osal.Memory: [+E] Memory_cacheInv> Enter(addr=0xa05cb000, sizeInBytes=921600)

    [DSP] [t=+000,257 us] [tid=0x8b086ec8] ti.sdo.ce.osal.Memory: [+X] Memory_cacheInv> return

    [DSP] [t=+000,062 us] [tid=0x8b086ec8] ti.sdo.ce.osal.Memory: [+E] Memory_cacheInv> Enter(addr=0xa04ea000, sizeInBytes=921600)

    [DSP] [t=+000,251 us] [tid=0x8b086ec8] ti.sdo.ce.osal.Memory: [+X] Memory_cacheInv> return

    [DSP] [t=+000,065 us] [tid=0x8b086ec8] ti.sdo.ce.image.IMGENC: [+E] IMGENC_process> Enter (handle=0x8b086e08, inBufs=0x8b0895e0, outBufs=0x8b0895ec, inArgs=0x8d0022b8, outArgs=0x8d0022bc)

    [DSP] [t=+000,111 us] [tid=0x8b086ec8] ti.sdo.ce.VISA: [+5] VISA_enter(visa=0x8b086e08): algHandle = 0x8b086e40

    [DSP] [t=+000,066 us] [tid=0x8b086ec8] ti.sdo.ce.alg.Algorithm: [+E] Algorithm_activate> Enter(alg=0x8b086e40)

    [DSP] [t=+000,064 us] [tid=0x8b086ec8] ti.sdo.fc.dskt2: [+E] DSKT2_activateAlg> Enter (scratchId=1, alg=0x8c8007b8)

    [DSP] [t=+000,077 us] [tid=0x8b086ec8] ti.sdo.fc.dskt2: [+2] DSKT2_activateAlg> Last active algorithm 0x8c8007b0, current algorithm to be activated 0x8c8007b8

    [DSP] [t=+000,089 us] [tid=0x8b086ec8] ti.sdo.fc.dskt2: [+4] DSKT2_activateAlg> Real deactivation of algorithm 0x8c8007b0

    [DSP] [t=+000,078 us] [tid=0x8b086ec8] ti.sdo.fc.dskt2: [+X] DSKT2_activateAlg> Exit

    [DSP] [t=+000,053 us] [tid=0x8b086ec8] ti.sdo.ce.alg.Algorithm: [+X] Algorithm_activate> Exit

    [DSP] [t=+000,058 us] [tid=0x8b086ec8] ti.sdo.ce.examples.codecs.imgenc_copy: [+E] IMGENCCOPY_TI_process(0x8c8007b8, 0x8b0895e0, 0x8b0895ec, 0x8d0022b8, 0x8d0022bc)

    [DSP] [t=+028,130 us] [tid=0x8b086ec8] ti.sdo.ce.examples.codecs.imgenc_copy: [+2] IMGENCCOPY_TI_process> Processed 921600 bytes.

    [DSP] [t=+000,078 us] [tid=0x8b086ec8] ti.sdo.ce.VISA: [+5] VISA_exit(visa=0x8b086e08): algHandle = 0x8b086e40

    [DSP] [t=+000,067 us] [tid=0x8b086ec8] ti.sdo.ce.alg.Algorithm: [+E] Algorithm_deactivate> Enter(alg=0x8b086e40)

    [DSP] [t=+000,068 us] [tid=0x8b086ec8] ti.sdo.fc.dskt2: [+E] DSKT2_deactivateAlg> Enter (scratchId=1, algHandle=0x8c8007b8)

    [DSP] [t=+000,073 us] [tid=0x8b086ec8] ti.sdo.fc.dskt2: [+4] DSKT2_deactivateAlg> Lazy deactivate of algorithm 0x8c8007b8

    [DSP] [t=+000,081 us] [tid=0x8b086ec8] ti.sdo.fc.dskt2: [+X] DSKT2_deactivateAlg> Exit

    [DSP] [t=+000,057 us] [tid=0x8b086ec8] ti.sdo.ce.alg.Algorithm: [+X] Algorithm_deactivate> Exit

    [DSP] [t=+000,058 us] [tid=0x8b086ec8] ti.sdo.ce.image.IMGENC: [+X] IMGENC_process> Exit (handle=0x8b086e08, retVal=0x0)

    [DSP] [t=+000,073 us] [tid=0x8b086ec8] ti.sdo.ce.osal.Memory: [+E] Memory_cacheWb> Enter(addr=0xa04ea000, sizeInBytes=921600)

    [DSP] [t=+000,250 us] [tid=0x8b086ec8] ti.sdo.ce.osal.Memory: [+X] Memory_cacheWb> return

    [DSP] [t=+000,057 us] [tid=0x8b086ec8] ti.sdo.ce.node: [+5] NODE> returned from call(algHandle=0x8b086e08, msg=0x8d002180); messageId=0x00020005

    [t=0x000277b2] [tid=0x400ca000] ti.sdo.ce.Engine: [+X] Engine_fwriteTrace> returning count [2995]

    [t=0x000277db] [tid=0x400ca000] ti.sdo.ce.VISA: [+X5] VISA_call Completed: messageId=0x00020005, command=0x0, return(status=0)

    [t=0x00027806] [tid=0x400ca000] ti.sdo.ce.VISA: [+5] VISA_freeMsg(0x107de0, 0x42398180): Freeing message with messageId=0x00020005

    [t=0x00027830] [tid=0x400ca000] ti.sdo.ce.image.IMGENC: [+X] IMGENC_process> Exit (handle=0x107de0, retVal=0x0)

    Encode Processing: 32.165 ms

    [t=0x000278cc] [tid=0x400ca000] xdc.runtime.Main: [+2] App-> Encoder frame 0 process returned - 0x0)