The TI E2E™ design support forums will undergo maintenance from July 11 to July 13. If you need design support during this time, open a new support request with our customer support center.

This thread has been locked.

If you have a related question, please click the "Ask a related question" button in the top right corner. The newly created question will be automatically linked to this question.

AM62A7-Q1: 3840x2160: Unable to run VPAC with VISS and LDC at 30 fps

Part Number: AM62A7-Q1

Tool/software:

Hi,

When I run the following script to stream video from an AR0823 to ethernet, with VPAC using VISS and LDC, I am only able to get 22 fps, and the tiperfoverlay shows VISS and LDC using 48-49% each.

#!/bin/bash

MULTICASTADDR=224.1.1.1
PORT=5004

HEVC_PROFILE="main"
HEVC_LEVEL="1"
VIDEO_BITRATE=6000000
ENC_EXTRA_CONTROLS="enc,prepend_sps_and_pps_to_idr=1,video_gop_size=5,frame_level_rate_control_enable=1,video_bitrate_mode=0,vbv_buffer_size=3000,video_bitrate=${VIDEO_BITRATE}"

TEMP=$(getopt -o 'p:l:b:h' --long 'profile:,level:,bitrate:,input:,output:,help,mc:,port:' -n '$0' -- "$@")

if [ $? -ne 0 ]; then
	echo 'Terminating...' >&2
	exit 1
fi

eval set -- "$TEMP"
unset TEMP

while true; do
	case "$1" in
		'-h'|'--help')
			echo "$0 - H.265 compress a JPEG file into a one second stream"
			echo "Parameters:"
			echo "  -p main|main-still-picture|main-10 (HEVC Profile)"
			echo "  -l 1|2|2.1|3|3.1|4|4.1|5|5.1 (HEVC Level)"
			echo "  -b 0..700000000 (Video Bitrate)"
			echo "  --mc <multicast ip address>"
			echo "  --port <port number> (default 5000)"
			echo "  -h This help"
			exit 0
			shift
			continue
		;;
		'-p'|'--profile')
			case "$2" in
				'main'|'Main'|'0')
					HEVC_PROFILE="main"
					;;
				'main-still-picture'|'1')
					HEVC_PROFILE="main-still-picture"
					;;
				'main-10'|'2')
					HEVC_PROFILE="main-10"
					;;
				*)
					HEVC_PROFILE="main"
					;;
			esac
			echo "HEVC Profile: '$HEVC_PROFILE'"
			shift 2
			continue
		;;
		'-l'|'--level')
			case "$2" in
				'1'|'2'|'2.1'|'3'|'3.1'|'4'|'4.1'|'5'|'5.1')
					HEVC_LEVEL="$2"
					;;
				*)
					HEVC_LEVEL="1"
					;;
			esac
			echo "HEVC Level: '$HEVC_LEVEL'"
			shift 2
			continue
		;;
		'-b'|'--bitrate')
			echo "Video Bitrate: '$2'"
			VIDEO_BITRATE=$2
			shift 2
			continue
		;;
		'mc')
			MULTICASTADDR=$2
			shift 2
			continue
		;;
		'port')
			PORT=$2
			shift 2
			continue
		;;
		'--')
			shift
			break
		;;
		*)
			echo 'Internal error!' >&2
			exit 1
		;;
	esac
done

#media-ctl -V '"imx219 4-0010":0 [fmt:SRGGB10_1X10/1920x1080 field:none]'
#media-ctl -V '"ar0521 1-0036":0 [fmt:SGRBG8_1X8/3840x2160 field:none]'
media-ctl -V '"ar0823 1-0010":0 [fmt:SGRBG12_1X12/3840x2160 field:none]'
gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
	! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
	! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
		dcc-isp-file=/opt/imaging/ar0823/linear/dcc_viss.bin \
		sink_0::dcc-2a-file=/opt/imaging/ar0823/linear/dcc_2a.bin format-msb=11 \
	! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=/opt/imaging/ar0823/linear/dcc_ldc.bin \
	! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
	! queue max-size-buffers=1 leaky=0 \
	! tiperfoverlay title="Camera 1" \
	! v4l2h265enc extra-controls=${ENC_EXTRA_CONTROLS} \
	! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
	! rtph265pay config-interval=1 pt=96 \
	! udpsink host=${MULTICASTADDR} auto-multicast=true port=${PORT} 

When I remove the LDC, tiperfoverlay shows a CPU usage for the VISS of 68%.

#!/bin/bash

MULTICASTADDR=224.1.1.1
PORT=5004

HEVC_PROFILE="main"
HEVC_LEVEL="1"
VIDEO_BITRATE=6000000
ENC_EXTRA_CONTROLS="enc,prepend_sps_and_pps_to_idr=1,video_gop_size=5,frame_level_rate_control_enable=1,video_bitrate_mode=0,vbv_buffer_size=3000,video_bitrate=${VIDEO_BITRATE}"

TEMP=$(getopt -o 'p:l:b:h' --long 'profile:,level:,bitrate:,input:,output:,help,mc:,port:' -n '$0' -- "$@")

if [ $? -ne 0 ]; then
	echo 'Terminating...' >&2
	exit 1
fi

eval set -- "$TEMP"
unset TEMP

while true; do
	case "$1" in
		'-h'|'--help')
			echo "$0 - H.265 compress a JPEG file into a one second stream"
			echo "Parameters:"
			echo "  -p main|main-still-picture|main-10 (HEVC Profile)"
			echo "  -l 1|2|2.1|3|3.1|4|4.1|5|5.1 (HEVC Level)"
			echo "  -b 0..700000000 (Video Bitrate)"
			echo "  --mc <multicast ip address>"
			echo "  --port <port number> (default 5000)"
			echo "  -h This help"
			exit 0
			shift
			continue
		;;
		'-p'|'--profile')
			case "$2" in
				'main'|'Main'|'0')
					HEVC_PROFILE="main"
					;;
				'main-still-picture'|'1')
					HEVC_PROFILE="main-still-picture"
					;;
				'main-10'|'2')
					HEVC_PROFILE="main-10"
					;;
				*)
					HEVC_PROFILE="main"
					;;
			esac
			echo "HEVC Profile: '$HEVC_PROFILE'"
			shift 2
			continue
		;;
		'-l'|'--level')
			case "$2" in
				'1'|'2'|'2.1'|'3'|'3.1'|'4'|'4.1'|'5'|'5.1')
					HEVC_LEVEL="$2"
					;;
				*)
					HEVC_LEVEL="1"
					;;
			esac
			echo "HEVC Level: '$HEVC_LEVEL'"
			shift 2
			continue
		;;
		'-b'|'--bitrate')
			echo "Video Bitrate: '$2'"
			VIDEO_BITRATE=$2
			shift 2
			continue
		;;
		'mc')
			MULTICASTADDR=$2
			shift 2
			continue
		;;
		'port')
			PORT=$2
			shift 2
			continue
		;;
		'--')
			shift
			break
		;;
		*)
			echo 'Internal error!' >&2
			exit 1
		;;
	esac
done

#media-ctl -V '"imx219 4-0010":0 [fmt:SRGGB10_1X10/1920x1080 field:none]'
#media-ctl -V '"ar0521 1-0036":0 [fmt:SGRBG8_1X8/3840x2160 field:none]'
media-ctl -V '"ar0823 1-0010":0 [fmt:SGRBG12_1X12/3840x2160 field:none]'
gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
	! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
	! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
		dcc-isp-file=/opt/imaging/ar0823/linear/dcc_viss.bin \
		sink_0::dcc-2a-file=/opt/imaging/ar0823/linear/dcc_2a.bin format-msb=11 \
	! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
	! queue max-size-buffers=1 leaky=0 \
	! tiperfoverlay title="Camera 1" \
	! v4l2h265enc extra-controls=${ENC_EXTRA_CONTROLS} \
	! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
	! rtph265pay config-interval=1 pt=96 \
	! udpsink host=${MULTICASTADDR} auto-multicast=true port=${PORT} 

Is 3840x2160 @ 30 fps supported for the VPAC? If yes, how can I improve the frame rate of the first pipeline?

Regards,

Bas Vermeulen

  • Hello Bas,

    Is 3840x2160 @ 30 fps supported for the VPAC? If yes, how can I improve the frame rate of the first pipeline?

    Yes, AM62A VPAC can support up to 315MPixel per second. Your sensor is 3840x2160 @ 30fps = 249MPixel/s. It can be supported by AM62A VPAC.

    If yes, how can I improve the frame rate of the first pipeline?

    You can try to increase the buffer pool size, for example:

          gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
    	! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
    	! tiovxisp sink_0::pool-size=4 sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
    		dcc-isp-file=/opt/imaging/ar0823/linear/dcc_viss.bin \
    		sink_0::dcc-2a-file=/opt/imaging/ar0823/linear/dcc_2a.bin format-msb=11 \
    	! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=/opt/imaging/ar0823/linear/dcc_ldc.bin sink_0::pool-size=4 src::pool-size=4 \
    

    Regards,

    Jianzhong

  • Hi Jianzhong,

    Increasing the buffer-pool doesn't increase the frames per second. It's still at 22 fps instead of 30 fps.

    Are there any other things I can try? Simply streaming with v4l2-ctl -d 3 --stream-mmap gives me 30.02 fps, so the sensor/MIPI is able to sustain the required frame rate (and as said before, removing the LDC gets us back to an acceptable 29 fps according to the tiperfoverlay).

    Regards,

    Bas Vermeulen

  • Adding a queue element before tiovxldc should help:

          gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
    	! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
    	! tiovxisp sink_0::pool-size=4 sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
    		dcc-isp-file=/opt/imaging/ar0823/linear/dcc_viss.bin \
    		sink_0::dcc-2a-file=/opt/imaging/ar0823/linear/dcc_2a.bin format-msb=11 \
    	! queue ! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=/opt/imaging/ar0823/linear/dcc_ldc.bin sink_0::pool-size=4 src::pool-size=4 \

  • Hi Jianzhong,

    Adding a queue element gets me to 25 fps (with or without the pool-size of 4).

    Removing the tiperfoverlay element gets me to 26 fps, but still not getting to ~30.

    Regards,

    Bas Vermeulen

  • Hi Bas,

    Can you try to add a queue before v4l2h265enc and rtph265pay?

    gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
    	! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
    	! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
    		dcc-isp-file=/opt/imaging/ar0823/linear/dcc_viss.bin \
    		sink_0::dcc-2a-file=/opt/imaging/ar0823/linear/dcc_2a.bin format-msb=11 \
    	! queue ! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=/opt/imaging/ar0823/linear/dcc_ldc.bin \
    	! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
    	! queue max-size-buffers=1 leaky=0 \
    	! tiperfoverlay title="Camera 1" \
    	! queue ! v4l2h265enc extra-controls=${ENC_EXTRA_CONTROLS} \
    	! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
    	! queue ! rtph265pay config-interval=1 pt=96 \
    	! udpsink host=${MULTICASTADDR} auto-multicast=true port=${PORT} 

  • Adding a queue there drops the framerate to 23 fps.

  • Can you stream the camera data to a display instead of sending to the network, or use a fakesink after tiovxldc? That can help us find where the bottleneck is.

  • The following pipeline gives me 30 fps:

    gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
            ! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
            ! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
                    dcc-isp-file=/opt/imaging/ar0823/linear/dcc_viss.bin \
                    sink_0::dcc-2a-file=/opt/imaging/ar0823/linear/dcc_2a.bin format-msb=11 \
            ! queue \
            ! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=/opt/imaging/ar0823/linear/dcc_ldc.bin \
            ! queue max-size-buffers=1 leaky=0 \
            ! fakesink

    If I add a convert line and tiperfoverlay, that drops to 21:

    gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
            ! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
            ! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
                    dcc-isp-file=/opt/imaging/ar0823/linear/dcc_viss.bin \
                    sink_0::dcc-2a-file=/opt/imaging/ar0823/linear/dcc_2a.bin format-msb=11 \
            ! queue \
            ! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=/opt/imaging/ar0823/linear/dcc_ldc.bin \
            ! queue max-size-buffers=1 leaky=0 \
            ! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
            ! tiperfoverlay title="Camera 1" \
            ! fakesink

    Removing the tiperfoverlay from there gets me back to 30 fps.

    If I add the v4lh265enc line, the fps drops to 26 again. I can add the rtp payloader and the fps stays 26.

  • If I add a convert line and tiperfoverlay, that drops to 21:

    Can you try to add a queue before tiperfoverlay? For example:

            ! queue max-size-buffers=1 leaky=0 \
            ! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
            ! queue ! tiperfoverlay title="Camera 1" \
    

  • We came out at the following gstreamer pipeline that gets full performance (30 fps):

    #!/bin/bash
    
    MULTICASTADDR=224.1.1.1
    PORT=5004
    
    HEVC_PROFILE="main"
    HEVC_LEVEL="1"
    VIDEO_BITRATE=6000000
    ENC_EXTRA_CONTROLS="enc,prepend_sps_and_pps_to_idr=1,video_gop_size=5,frame_level_rate_control_enable=1,video_bitrate_mode=0,vbv_buffer_size=3000,video_bitrate=${VIDEO_BITRATE}"
    
    TEMP=$(getopt -o 'p:l:b:h' --long 'profile:,level:,bitrate:,input:,output:,help,mc:,port:' -n '$0' -- "$@")
    
    if [ $? -ne 0 ]; then
            echo 'Terminating...' >&2
            exit 1
    fi
    
    eval set -- "$TEMP"
    unset TEMP
    
    while true; do
            case "$1" in
                    '-h'|'--help')
                            echo "$0 - H.265 compress a JPEG file into a one second stream"
                            echo "Parameters:"
                            echo "  -p main|main-still-picture|main-10 (HEVC Profile)"
                            echo "  -l 1|2|2.1|3|3.1|4|4.1|5|5.1 (HEVC Level)"
                            echo "  -b 0..700000000 (Video Bitrate)"
                            echo "  --mc <multicast ip address>"
                            echo "  --port <port number> (default 5000)"
                            echo "  -h This help"
                            exit 0
                            shift
                            continue
                    ;;
                    '-p'|'--profile')
                            case "$2" in
                                    'main'|'Main'|'0')
                                            HEVC_PROFILE="main"
                                            ;;
                                    'main-still-picture'|'1')
                                            HEVC_PROFILE="main-still-picture"
                                            ;;
                                    'main-10'|'2')
                                            HEVC_PROFILE="main-10"
                                            ;;
                                    *)
                                            HEVC_PROFILE="main"
                                            ;;
                            esac
                            echo "HEVC Profile: '$HEVC_PROFILE'"
                            shift 2
                            continue
                    ;;
                    '-l'|'--level')
                            case "$2" in
                                    '1'|'2'|'2.1'|'3'|'3.1'|'4'|'4.1'|'5'|'5.1')
                                            HEVC_LEVEL="$2"
                                            ;;
                                    *)
                                            HEVC_LEVEL="1"
                                            ;;
                            esac
                            echo "HEVC Level: '$HEVC_LEVEL'"
                            shift 2
                            continue
                    ;;
                    '-b'|'--bitrate')
                            echo "Video Bitrate: '$2'"
                            VIDEO_BITRATE=$2
                            shift 2
                            continue
                    ;;
                    'mc')
                            MULTICASTADDR=$2
                            shift 2
                            continue
                    ;;
                    'port')
                            PORT=$2
                            shift 2
                            continue
                    ;;
                    '--')
                            shift
                            break
                    ;;
                    *)
                            echo 'Internal error!' >&2
                            exit 1
                    ;;
            esac
    done
    
    #media-ctl -V '"imx219 4-0010":0 [fmt:SRGGB10_1X10/1920x1080 field:none]'
    #media-ctl -V '"ar0521 1-0036":0 [fmt:SGRBG8_1X8/3840x2160 field:none]'
    media-ctl -V '"ar0823 1-0010":0 [fmt:SGRBG12_1X12/3840x2160 field:none]'
    gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
            ! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
            ! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
                    dcc-isp-file=/opt/imaging/ar0823/linear/dcc_viss.bin \
                    sink_0::dcc-2a-file=/opt/imaging/ar0823/linear/dcc_2a.bin format-msb=11 \
                    sink_0::pool-size=4 \
            ! queue max-size-buffers=1 leaky=0 \
            ! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=/opt/imaging/ar0823/linear/dcc_ldc.bin \
                    sink_0::pool-size=4 src::pool-size=4 \
            ! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
            ! queue max-size-buffers=1 leaky=0 \
            ! v4l2h265enc output-io-mode=dmabuf-import extra-controls=${ENC_EXTRA_CONTROLS} \
            ! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
            ! rtph265pay config-interval=1 pt=96 \
            ! udpsink host=${MULTICASTADDR} auto-multicast=true port=${PORT}
    

    The main modifications are adding a queue between tiovxisp and tiovxldc, moving the other queue down below the capsfilter line, and adding output-io-mode=dmabuf-import to the vhl2h265enc line.

    With that, I get the following statistics:

    perf_stats output:

    Summary of CPU load,
    ====================
    
    CPU: mpu1_0: TOTAL LOAD =  27.81 % ( HWI =   1.50 %, SWI =   1.25 % )
    CPU:  c7x_1: TOTAL LOAD =   0. 2 % ( HWI =   0. 0 %, SWI =   0. 0 % )
    
    HWA performance statistics,
    ===========================
    
    HWA:   VISS: LOAD =  69.50 % ( 255 MP/s )
    HWA:   LDC : LOAD =  72.49 % ( 256 MP/s )
    
    DDR performance statistics,
    ===========================
    
    DDR: READ  BW: AVG =   2905 MB/s
    DDR: WRITE BW: AVG =   1775 MB/s
    DDR: TOTAL BW: AVG =   4680 MB/s
    
    SoC temperature statistics
    ==========================
    
    thermal_zone0(DDR):     72.94 degree Celsius
    thermal_zone1(CPU):     71.33 degree Celsius
    thermal_zone2(C7x):     72.74 degree Celsius

    parse_gst_tracers.py:

    +-----------------------------------------------------------------------------------+
    |element                       latency      out-latancy      out-fps     frames     |
    +-----------------------------------------------------------------------------------+
    |capsfilter0                   0.51         33.31            30          17184      |
    |tiovxisp0                     25.16        33.31            30          17183      |
    |queue0                        0.91         33.31            30          17183      |
    |tiovxldc0                     25.92        33.30            30          17182      |
    |capsfilter1                   0.65         33.30            30          17182      |
    |queue1                        0.59         33.30            30          17182      |
    |v4l2h265enc0                  31.15        33.30            30          17181      |
    |capsfilter2                   0.65         33.30            30          17181      |
    |v4l2src0                      86.80        33.30            30          17181      |
    |rtph265pay0                   1.27         33.30            30          17181      |
    +-----------------------------------------------------------------------------------+

    Thank you very much for the help! This gets me to 30 frames per second, and a latency that's really nice.

    Regards,

    Bas Vermeulen

  • I need to follow up on this; I am trying to use a tee to have two streams running from the same video source at the same time. When I do this, my framerate drops to 25 (when using tee) or 15 (when using tiovxmultiscaler).

    The script I use to test:

    #!/bin/bash
    
    MULTICASTADDR1=224.1.1.1
    MULTICASTADDR2=224.1.1.2
    PORT=5004
    
    TEE=tee
    HEVC_PROFILE="main"
    HEVC_LEVEL="1"
    VIDEO_BITRATE=6000000
    ENC_EXTRA_CONTROLS="enc,prepend_sps_and_pps_to_idr=1,video_gop_size=5,frame_level_rate_control_enable=1,video_bitrate_mode=0,vbv_buffer_size=3000,video_bitrate=${VIDEO_BITRATE}"
    
    TEMP=$(getopt -o 'p:l:b:h' --long 'profile:,level:,bitrate:,input:,output:,help,mc:,port:' -n '$0' -- "$@")
    
    if [ $? -ne 0 ]; then
            echo 'Terminating...' >&2
            exit 1
    fi
    
    eval set -- "$TEMP"
    unset TEMP
    
    while true; do
            case "$1" in
                    '-h'|'--help')
                            echo "$0 - H.265 compress a JPEG file into a one second stream"
                            echo "Parameters:"
                            echo "  -p main|main-still-picture|main-10 (HEVC Profile)"
                            echo "  -l 1|2|2.1|3|3.1|4|4.1|5|5.1 (HEVC Level)"
                            echo "  -b 0..700000000 (Video Bitrate)"
                            echo "  --mc <multicast ip address>"
                            echo "  --port <port number> (default 5000)"
                            echo "  -h This help"
                            exit 0
                            shift
                            continue
                    ;;
                    '-p'|'--profile')
                            case "$2" in
                                    'main'|'Main'|'0')
                                            HEVC_PROFILE="main"
                                            ;;
                                    'main-still-picture'|'1')
                                            HEVC_PROFILE="main-still-picture"
                                            ;;
                                    'main-10'|'2')
                                            HEVC_PROFILE="main-10"
                                            ;;
                                    *)
                                            HEVC_PROFILE="main"
                                            ;;
                            esac
                            echo "HEVC Profile: '$HEVC_PROFILE'"
                            shift 2
                            continue
                    ;;
                    '-l'|'--level')
                            case "$2" in
                                    '1'|'2'|'2.1'|'3'|'3.1'|'4'|'4.1'|'5'|'5.1')
                                            HEVC_LEVEL="$2"
                                            ;;
                                    *)
                                            HEVC_LEVEL="1"
                                            ;;
                            esac
                            echo "HEVC Level: '$HEVC_LEVEL'"
                            shift 2
                            continue
                    ;;
                    '-b'|'--bitrate')
                            echo "Video Bitrate: '$2'"
                            VIDEO_BITRATE=$2
                            shift 2
                            continue
                    ;;
                    'mc')
                            MULTICASTADDR=$2
                            shift 2
                            continue
                    ;;
                    'port')
                            PORT=$2
                            shift 2
                            continue
                    ;;
                    '--')
                            shift
                            break
                    ;;
                    *)
                            echo 'Internal error!' >&2
                            exit 1
                    ;;
            esac
    done
    
    #media-ctl -V '"imx219 4-0010":0 [fmt:SRGGB10_1X10/1920x1080 field:none]'
    #media-ctl -V '"ar0521 1-0036":0 [fmt:SGRBG8_1X8/3840x2160 field:none]'
    media-ctl -V '"ar0823 1-0010":0 [fmt:SGRBG12_1X12/3840x2160 field:none]'
    gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
            ! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
            ! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
                    dcc-isp-file=/opt/imaging/ar0823/linear/dcc_viss.bin \
                    sink_0::dcc-2a-file=/opt/imaging/ar0823/linear/dcc_2a.bin format-msb=11 \
            ! queue max-size-buffers=1 leaky=0 \
            ! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=/opt/imaging/ar0823/linear/dcc_ldc.bin \
            ! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
            ! ${TEE} name=multi \
            multi. \
            ! queue max-size-buffers=1 leaky=0 name=qstream1 \
            ! v4l2h265enc output-io-mode=dmabuf-import extra-controls=${ENC_EXTRA_CONTROLS} \
            ! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
            ! rtph265pay config-interval=1 pt=96 \
            ! udpsink host=${MULTICASTADDR1} auto-multicast=true port=${PORT} \
            multi. \
            ! queue max-size-buffers=1 leaky=0 name=qstream2 \
            ! v4l2h265enc output-io-mode=dmabuf-import extra-controls=${ENC_EXTRA_CONTROLS} \
            ! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
            ! rtph265pay config-interval=1 pt=96 \
            ! udpsink host=${MULTICASTADDR2} auto-multicast=true port=${PORT}

    If you set the TEE variable in the script to tiovxmultiscaler, it will use the multiscaler, when set to tee it uses the tee element.

    My end goal is being able to dynamically change my pipeline from streaming to multiple fakesinks, to streaming to one or two network (UDP) sinks and a screen (all running at 30 fps).

    Removing the second branch of the tee will make the pipeline run at 30 fps.

    Any idea how to get this running at 30 fps with two or more branches?

    Regards,

    Bas Vermeulen

  • Hi Bas,

    If I understand correctly, what you are seeing is when you have a tee element added to the output to tiovxldc and stream two encoded streams you are seeing the FPS to be 25fps and if you don't use tee element and use tiovxmultiscaler, you are seeing the FPS to be 15. 

    Also can you share the tracers that you are seeing when using the script with tee element and multiscaler to see what element is causing the fps to drop?

    Best Regards,

    Suren

  • Hi Suren,

    Correct. If I drop the second encoder (and replace with a fakesink), I get back to 30 fps. If I use two encoders, I get 25 (tee) and 15 (tiovxmultiscaler).

    Output of parse_gst_tracers.py for the script with TEE=tee:

    +-----------------------------------------------------------------------------------+
    |element                       latency      out-latancy      out-fps     frames     |
    +-----------------------------------------------------------------------------------+
    |capsfilter0                   0.39         39.19            25          1033       |
    |tiovxisp0                     65.18        39.15            25          1031       |
    |queue0                        15.02        39.17            25          1031       |
    |tiovxldc0                     36.67        39.15            25          1030       |
    |capsfilter1                   0.55         39.15            25          1030       |
    |multi                         0.72         19.57            51          2060       |
    |qstream1                      0.53         39.15            25          1030       |
    |qstream2                      0.64         39.14            25          1030       |
    |v4l2h265enc1                  51.01        39.04            25          1028       |
    |capsfilter3                   0.65         39.04            25          1028       |
    |v4l2src0                      163.60       19.52            51          2057       |
    |rtph265pay1                   1.23         39.04            25          1028       |
    |v4l2h265enc0                  34.20        39.00            25          1029       |
    |capsfilter2                   0.64         39.00            25          1029       |
    |rtph265pay0                   1.23         39.00            25          1029       |
    +-----------------------------------------------------------------------------------+

    Output of parse_gst_tracers.py for the script with TEE=tiovxmultiscaler:

    +-----------------------------------------------------------------------------------+
    |element                       latency      out-latancy      out-fps     frames     |
    +-----------------------------------------------------------------------------------+
    |capsfilter0                   0.52         64.00            15          862        |
    |tiovxisp0                     111.86       63.98            15          860        |
    |queue0                        39.77        64.03            15          860        |
    |tiovxldc0                     25.57        64.00            15          859        |
    |capsfilter1                   0.72         64.00            15          859        |
    |multi                         36.01        31.97            31          1718       |
    |qstream1                      0.80         63.94            15          859        |
    |qstream2                      0.71         63.94            15          859        |
    |v4l2h265enc0                  32.39        63.82            15          858        |
    |capsfilter2                   0.70         63.82            15          858        |
    |v4l2src0                      258.53       31.92            31          1716       |
    |rtph265pay0                   1.46         63.82            15          858        |
    |v4l2h265enc1                  50.01        63.82            15          858        |
    |capsfilter3                   0.69         63.82            15          858        |
    |rtph265pay1                   1.39         63.82            15          858        |
    +-----------------------------------------------------------------------------------+

    I always find it difficult to judge what element is causing the extra latency.

    Regards,

    Bas Vermeulen

  • Hi Bas,

    Is it okay if you were to try the same experiment with 1080 resolution? I am suspecting two 4K stream encode would be an overkill on the system and causing frame drops.

    Best Regards,

    Suren

  • Hi Bas,

    Is it okay if you were to try the same experiment with 1080 resolution? I am suspecting two 4K stream encode would be an overkill on the system and causing frame drops.

    Best Regards,

    Suren

  • I have a pipeline running on the EVM (with the imx219 camera, and without LDC) where I have a 4K stream and a full HD stream running at the same time.

    #!/bin/bash
    
    MULTICASTADDR=224.1.1.1
    PORT=5000
    MULTICASTADDR1=224.2.2.1
    PORT1=5000
    
    HEVC_PROFILE="main"
    HEVC_LEVEL="1"
    VIDEO_BITRATE=0
    
    TEMP=$(getopt -o 'p:l:b:h' --long 'profile:,level:,bitrate:,input:,output:,help,mc:,port:,mc1:,port1:' -n '$0' -- "$@")
    
    if [ $? -ne 0 ]; then
            echo 'Terminating...' >&2
            exit 1
    fi
    
    eval set -- "$TEMP"
    unset TEMP
    
    while true; do
            case "$1" in
                    '-h'|'--help')
                            echo "$0 - H.265 compress a JPEG file into a one second stream"
                            echo "Parameters:"
                            echo "  -p main|main-still-picture|main-10 (HEVC Profile)"
                            echo "  -l 1|2|2.1|3|3.1|4|4.1|5|5.1 (HEVC Level)"
                            echo "  -b 0..700000000 (Video Bitrate)"
                            echo "  --mc <multicast ip address>"
                            echo "  --port <port number> (default 5000)"
                            echo "  --mc1 <multicast ip address>"
                            echo "  --port1 <port number> (default 5000)"
                            echo "  -h This help"
                            exit 0
                            shift
                            continue
                    ;;
                    '-p'|'--profile')
                            case "$2" in
                                    'main'|'Main'|'0')
                                            HEVC_PROFILE="main"
                                            ;;
                                    'main-still-picture'|'1')
                                            HEVC_PROFILE="main-still-picture"
                                            ;;
                                    'main-10'|'2')
                                            HEVC_PROFILE="main-10"
                                            ;;
                                    *)
                                            HEVC_PROFILE="main"
                                            ;;
                            esac
                            echo "HEVC Profile: '$HEVC_PROFILE'"
                            shift 2
                            continue
                    ;;
                    '-l'|'--level')
                            case "$2" in
                                    '1'|'2'|'2.1'|'3'|'3.1'|'4'|'4.1'|'5'|'5.1')
                                            HEVC_LEVEL="$2"
                                            ;;
                                    *)
                                            HEVC_LEVEL="1"
                                            ;;
                            esac
                            echo "HEVC Level: '$HEVC_LEVEL'"
                            shift 2
                            continue
                    ;;
                    '-b'|'--bitrate')
                            echo "Video Bitrate: '$2'"
                            VIDEO_BITRATE=$2
                            shift 2
                            continue
                    ;;
                    'mc')
                            MULTICASTADDR=$2
                            shift 2
                            continue
                    ;;
                    'port')
                            PORT=$2
                            shift 2
                            continue
                    ;;
                    'mc1')
                            MULTICASTADDR1=$2
                            shift 2
                            continue
                    ;;
                    'port1')
                            PORT1=$2
                            shift 2
                            continue
                    ;;
                    '--')
                            shift
                            break
                    ;;
                    *)
                            echo 'Internal error!' >&2
                            exit 1
                    ;;
            esac
    done
    
    media-ctl -V '"imx219 4-0010":0 [fmt:SRGGB10_1X10/1920x1080 field:none]'
    gst-launch-1.0 -q v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
            ! video/x-bayer, width=1920, height=1080, framerate=30/1, format=rggb10 \
            ! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_SONY_IMX219_RPI" \
                    dcc-isp-file=/opt/imaging/imx219/linear/dcc_viss_10b.bin \
                    sink_0::dcc-2a-file=/opt/imaging/imx219/linear/dcc_2a_10b.bin format-msb=9 \
            ! video/x-raw, format=NV12, width=1920, height=1080, framerate=30/1 \
            ! tiovxmultiscaler name=multi target=0 \
            multi.src_0 \
            ! video/x-raw, format=NV12, width=1920, height=1080, framerate=30/1 \
            ! mosaic.sink_0 \
            multi.src_1 \
            ! video/x-raw, format=NV12,width=1920, height=1080, framerate=30/1 \
            ! mosaic.sink_1 \
            multi.src_2 \
            ! video/x-raw, format=NV12,width=1920, height=1080, framerate=30/1 \
            ! mosaic.sink_2 \
            multi.src_3 \
            ! video/x-raw, format=NV12,width=1920, height=1080, framerate=30/1 \
            ! mosaic.sink_3 \
            multi.src_4 \
            ! video/x-raw, format=NV12,width=1920, height=1080, framerate=30/1 \
            ! queue max-size-buffers=1 leaky=0 \
            ! v4l2h265enc extra-controls="enc,prepend_sps_and_pps_to_idr=1,video_gop_size=5" \
            ! rtph265pay config-interval=1 pt=96 \
            ! udpsink host=${MULTICASTADDR1} auto-multicast=true port=${PORT1} \
            tiovxmosaic name=mosaic target=2 \
            sink_0::startx="<0>" sink_0::starty="<0>" \
            sink_1::startx="<1920>" sink_1::starty="<1080>" \
            sink_2::startx="<0>" sink_2::starty="<1080>" \
            sink_3::startx="<1920>" sink_3::starty="<0>" \
            ! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
            ! queue max-size-buffers=1 leaky=0 \
            ! v4l2h265enc extra-controls="enc,prepend_sps_and_pps_to_idr=1,video_gop_size=5" \
            ! rtph265pay config-interval=1 pt=96 \
            ! udpsink host=${MULTICASTADDR} auto-multicast=true port=${PORT}

    parse_gst_tracers.py output:

    +-----------------------------------------------------------------------------------+
    |element                       latency      out-latancy      out-fps     frames     |
    +-----------------------------------------------------------------------------------+
    |capsfilter0                   0.27         33.33            30          6594       |
    |tiovxisp0                     8.55         33.32            30          6601       |
    |capsfilter1                   0.38         33.32            30          6605       |
    |multi                         11.87        6.66             150         33040      |
    |capsfilter2                   0.37         33.32            30          6612       |
    |capsfilter3                   0.23         33.32            30          6618       |
    |capsfilter4                   0.21         33.32            30          6618       |
    |capsfilter5                   0.20         33.32            30          6618       |
    |capsfilter6                   0.21         33.32            30          6618       |
    |queue0                        0.27         33.32            30          6618       |
    |v4l2h265enc0                  27.46        33.31            30          6617       |
    |v4l2src0                      79.19        16.66            60          13232      |
    |rtph265pay0                   0.53         33.31            30          6617       |
    |mosaic                        40.56        33.31            30          6616       |
    |capsfilter7                   0.35         33.31            30          6620       |
    |queue1                        0.25         33.31            30          6621       |
    |v4l2h265enc1                  44.34        33.30            30          6622       |
    |rtph265pay1                   0.73         33.30            30          6623       |
    +-----------------------------------------------------------------------------------+

    The only difference with what I am trying now is that this pipeline doesn't have the LDC, and I am using the tiovxmultiscaler + tiovxmosaic to generate a 4K stream.

    That's why I don't really understand why everything is so slow when using the larger camera.

    The encoding should be fine. I just don't understand why I can't translate this to our phyboard with the 4K camera.

    Regards,

    Bas

  • Hi Bas,

    Let me try and run some pipelines on my AM62A board early next week and get back to you with the observations on my end.

    Have a great weekend!

    Best Regards,

    Suren

  • Hi ,

    Were you able to run some pipelines, and what were your observations?

    Regards,

    Bas Vermeulen

  • Hi Bas,

    I tried the below pipeline with 1080p and with videotestsrc. 

    [2024-10-09 16:32:43.589] root@am62axx-evm:/opt/edgeai-gst-apps# GST_TRACERS="latency(flags=pipeline+element)" GST_DEBUG=GST_TRACER:7 GST_DEBUG_FILE="/run/latency_4k.txt" \
    [2024-10-09 16:32:46.483] > gst-launch-1.0 -v videotestsrc ! \
    [2024-10-09 16:32:46.535] > video/x-raw, width=1920, height=1080, framerate=60/1, format=NV12 ! \
    [2024-10-09 16:32:46.635] > tee name=tee_split0 \
    [2024-10-09 16:32:46.687] > tee_split0. ! queue ! v4l2h265enc ! rtph265pay ! udpsink host=127.0.0.1 port=5001 \
    [2024-10-09 16:32:46.845] > tee_split0. ! queue ! v4l2h265enc ! rtph265pay ! udpsink host=127.0.0.1 port=6001 
    [2024-10-09 16:32:48.405] Setting pipeline to PAUSED ...
    +-----------------------------------------------------------------------------------+
    [2024-10-09 17:08:49.619] |element                       latency      out-latancy      out-fps     frames     |
    [2024-10-09 17:08:49.635] +-----------------------------------------------------------------------------------+
    [2024-10-09 17:08:49.636] |capsfilter0                   0.21         22.12            45          302        |
    [2024-10-09 17:08:49.651] |tee_split0                    0.88         11.06            90          604        |
    [2024-10-09 17:08:49.651] |queue0                        19.00        22.12            45          302        |
    [2024-10-09 17:08:49.667] |queue1                        21.27        22.12            45          302        |
    [2024-10-09 17:08:49.667] |v4l2h265enc1                  39.21        22.04            45          301        |
    [2024-10-09 17:08:49.683] |videotestsrc0                 61.15        11.02            90          601        |
    [2024-10-09 17:08:49.683] |rtph265pay1                   0.46         22.01            45          300        |
    [2024-10-09 17:08:49.699] |v4l2h265enc0                  39.47        21.98            45          301        |
    [2024-10-09 17:08:49.699] |rtph265pay0                   0.44         21.97            45          301        |
    [2024-10-09 17:08:49.700] +-----------------------------------------------------------------------------------+
    

    Not sure, why the tee element is receiving half of frames. Will try and experiment more on this. 

    Best Regards,

    Suren

  • Hi Bas,

    Continuing on running IMX219 with 4K resolution:

    Below pipeline:

    GST_TRACERS="latency(flags=pipeline+element)" GST_DEBUG=GST_TRACER:7 GST_DEBUG_FILE="/run/latency_4k-enc-1.txt" \
    gst-launch-1.0 v4l2src device=/dev/video-imx219-cam0 io-mode=dmabuf-import ! queue max-size-buffers=1 leaky=2 ! \
    video/x-bayer, width=3280, height=2464, framerate=15/1, format=rggb10 ! \
    tiovxisp sink_0::pool-size=4 sink_0::device=/dev/v4l-imx219-subdev0 sensor-name="SENSOR_SONY_IMX219_RPI" \
    dcc-isp-file=/opt/imaging/imx219/linear/dcc_viss_10b.bin \
    sink_0::dcc-2a-file=/opt/imaging/imx219/linear/dcc_2a_10b.bin format-msb=9 ! \
    video/x-raw, format=NV12, width=3280, height=2464, framerate=15/1 ! queue ! tiovxmultiscaler ! queue ! \
    video/x-raw, format=NV12, width=1920, height=1080, framerate=15/1 ! \
    v4l2h264enc ! rtph264pay ! tee name=t1 \
    t1. ! queue ! udpsink host=127.0.0.1 port=5001 \
    t1. ! queue ! udpsink host=127.0.0.1 port=6001

    Best Regards,

    Suren

  • Hi Suren,

    Unfortunately the problem isn't visible when using 4K @ 15 fps. Do you have a possibility to test with 4K @ 30 fps?

    Regards,

    Bas

  • Hi Bas,

    Unfortunately, I don't have a camera that can run 4K@30fps. 

    Best Regards,

    Suren

  • Hi ,

    Did you get any insights from your team after our discussion?

    Regards,

    Bas

  • Hi Bas,

    Its Diwali week and most of the folks are on vacation. Will have an update next week, when they will be back in office.

    Apologies for the delay.

    Best Regards,

    Suren

  • Hi ,

    I hope you and your team had a nice Diwali week. Were you able to get any insights from your team after our discussion?

    Regards,

    Bas Vermeulen

  • Hi Bas,

    I briefly had a discussion and the expert thinks this could be due to networking.

    Can we try to dump the encoded streams into file instead of being streamed on network and do we see the same issue? 

    AFAIK, fakesink shows 30fps. is that correct?

    Best Regards,

    Suren

  • When using the fakesink I am not using the encoder. I can test with the fakesink after the RTP Payloader.

  • I've replaced udpsink on both paths with fakesink, and then I get 30 fps.

    Any idea what the best way is to tune the performance of the udpsink?

  • Hi Bas,

    We might have to do trial and error on the properties of udpsink. 

    Can you try putting sync=false with udpsink? Play with buffer-size parameter,   

    Also we can try playing with max-size-buffers, min-threshold-bytes of the queue element.

    Let me know how it goes.

    Best Regards,

    Suren

  • Hi Suren, sorry for the delay.

    I'm trying to recreate the two full paths with fakesink @ 30 fps, and seem to be failing there. The moment I reintroduce the encoder, the fps drops to 25, so it's possible that that is the limiting factor. I've experimented with adding a tiovxmultiscaler on the second stream, and this drops the latency for the encoder element down to below 33 ms, which should be fine. The framerate still drops to 25 though.

    I currently have scripts for the following scenarios:

    1. Pipeline with tee to two streams, one with udpsink and one with fakesink
      Sensor -> ISP -> LDC -> tee -> queue -> encoder -> rtp payloader -> udpsink to multicast address
                                                     |-> queue -> fakesink
      This pipeline gets 30 fps or close to it.
    2. Pipeline with tee to two streams, both with udpsink
      Sensor -> ISP -> LDC -> tee -> queue -> encoder -> rtp payloader -> udpsink to multicast address 1
                                                    |-> queue -> tiovxmultiscaler to 1920x1080 -> encoder -> rtp payloader -> udpsink to multicast address 2
      This pipeline gets 25-26 fps
    3. Pipeline with tee to two streams, with encoder and rtp payloader, to fakesinks
      Sensor -> ISP -> LDC -> tee -> queue -> encoder -> rtp payloader -> fakesink
                                                    |-> queue -> tiovxmultiscaler to 1920x1080 -> encoder -> rtp payloader -> fakesink
      This pipeline gets 25-26 fps
    4. Pipeline with tee to two streams, one with udpsink and one with fakesink
      Sensor -> ISP -> LDC -> tee -> queue -> encoder -> rtp payloader -> udpsink to multicast address
                                                     |-> queue -> tiovxmultiscaler to 1920x1080 -> fakesink
      This pipeline gets 26 fps or close to it.
    5. Pipeline with tee to two streams, both with udpsink
      Sensor -> ISP -> LDC -> tee -> queue -> encoder -> rtp payloader -> udpsink to multicast address 1
                                                    |-> queue -> encoder -> rtp payloader -> udpsink to multicast address 2
      This pipeline gets 25 fps

    I'm unsure what causes the drop in framerate. What annoys me is that I'm sure I tried things correctly before, but apparently that was wrong. The moment I use a serious element behind the second streams queue, my framerate drops to around 25 fps.

    Using a single multiscaler to lower the resolution of the second stream helps a little bit.

    I've tried increasing the max-size-buffers on the queue, but that didn't help (from 1 to 4). I don't want to increase those if possible, as that will add to the latency of my system.

    Regards,

    Bas Vermeulen

  • Hi Bas

    Sensor -> ISP -> LDC -> tee -> queue -> encoder -> rtp payloader -> udpsink to multicast address 1
                                                  |-> queue -> tiovxmultiscaler to 1920x1080 -> encoder -> rtp payloader -> udpsink to multicast address 2

    In the above scenario, you are streaming a 4K encoded stream  and the other one FHD correct? 

    What if you use the output of LDC-> Multiscaler to FHD-> Encode -> tee -> queue -> udpstream1

                                                                                                                         -> queue -> udpstream2 

    Does this work with no drops in FPS? I am suspecting 4K + FHD both @30fps would be a stretch.. I will try to run the pipeline with 5MP resolution and FHD next week and update you with the results.

    Best Regards,

    Suren

  • That gets me 24 frames per second.

    +-----------------------------------------------------------------------------------+
    |element                       latency      out-latancy      out-fps     frames     |
    +-----------------------------------------------------------------------------------+
    |capsfilter0                   0.41         41.05            24          749        |
    |tiovxisp0                     68.63        40.98            24          747        |
    |queue0                        16.84        41.00            24          746        |
    |tiovxldc0                     39.73        40.98            24          745        |
    |capsfilter1                   0.54         40.98            24          745        |
    |queue1                        17.07        41.00            24          745        |
    |tiovxmultiscaler0             39.37        40.98            24          744        |
    |capsfilter2                   0.68         40.98            24          744        |
    |queue2                        0.53         40.98            24          744        |
    |multi                         1.07         20.49            48          1488       |
    |qstream1                      0.68         40.98            24          744        |
    |qstream2                      0.72         40.98            24          744        |
    |v4l2h265enc0                  19.65        40.87            24          744        |
    |capsfilter3                   0.58         40.87            24          744        |
    |v4l2src0                      210.66       20.45            48          1487       |
    |rtph265pay0                   1.08         40.87            24          744        |
    |v4l2h265enc1                  27.26        40.87            24          743        |
    |capsfilter4                   0.52         40.87            24          743        |
    |rtph265pay1                   1.05         40.87            24          743        |
    +-----------------------------------------------------------------------------------+

    The script I used:

    #!/bin/bash
    
    DCC_ISP_FILE=/opt/imaging/ar0823/linear/dcc_viss.bin
    DCC_2A_FILE=/opt/imaging/ar0823/linear/dcc_2a.bin
    LDC_DCC_FILE=/opt/imaging/ar0823/linear/dcc_ldc.bin
    
    MULTICASTADDR1=224.1.1.1
    MULTICASTADDR2=224.1.1.2
    PORT=5004
    
    if [ x$TEE == x ]; then
        TEE=tee
    fi
    HEVC_PROFILE="main"
    HEVC_LEVEL="1"
    VIDEO_BITRATE=6000000
    ENC_EXTRA_CONTROLS="enc,prepend_sps_and_pps_to_idr=1,video_gop_size=5,frame_level_rate_control_enable=1,video_bitrate_mode=0,vbv_buffer_size=3000,video_bitrate=${VIDEO_BITRATE}"
    
    TEMP=$(getopt -o 'p:l:b:h' --long 'profile:,level:,bitrate:,input:,output:,help,mc:,port:' -n '$0' -- "$@")
    
    if [ $? -ne 0 ]; then
            echo 'Terminating...' >&2
            exit 1
    fi
    
    eval set -- "$TEMP"
    unset TEMP
    
    while true; do
            case "$1" in
                    '-h'|'--help')
                            echo "$0 - H.265 compress a JPEG file into a one second stream"
                            echo "Parameters:"
                            echo "  -p main|main-still-picture|main-10 (HEVC Profile)"
                            echo "  -l 1|2|2.1|3|3.1|4|4.1|5|5.1 (HEVC Level)"
                            echo "  -b 0..700000000 (Video Bitrate)"
                            echo "  --mc <multicast ip address>"
                            echo "  --port <port number> (default 5000)"
                            echo "  -h This help"
                            exit 0
                            shift
                            continue
                    ;;
                    '-p'|'--profile')
                            case "$2" in
                                    'main'|'Main'|'0')
                                            HEVC_PROFILE="main"
                                            ;;
                                    'main-still-picture'|'1')
                                            HEVC_PROFILE="main-still-picture"
                                            ;;
                                    'main-10'|'2')
                                            HEVC_PROFILE="main-10"
                                            ;;
                                    *)
                                            HEVC_PROFILE="main"
                                            ;;
                            esac
                            echo "HEVC Profile: '$HEVC_PROFILE'"
                            shift 2
                            continue
                    ;;
                    '-l'|'--level')
                            case "$2" in
                                    '1'|'2'|'2.1'|'3'|'3.1'|'4'|'4.1'|'5'|'5.1')
                                            HEVC_LEVEL="$2"
                                            ;;
                                    *)
                                            HEVC_LEVEL="1"
                                            ;;
                            esac
                            echo "HEVC Level: '$HEVC_LEVEL'"
                            shift 2
                            continue
                    ;;
                    '-b'|'--bitrate')
                            echo "Video Bitrate: '$2'"
                            VIDEO_BITRATE=$2
                            shift 2
                            continue
                    ;;
                    'mc')
                            MULTICASTADDR=$2
                            shift 2
                            continue
                    ;;
                    'port')
                            PORT=$2
                            shift 2
                            continue
                    ;;
                    '--')
                            shift
                            break
                    ;;
                    *)
                            echo 'Internal error!' >&2
                            exit 1
                    ;;
            esac
    done
    
    #media-ctl -V '"imx219 4-0010":0 [fmt:SRGGB10_1X10/1920x1080 field:none]'
    #media-ctl -V '"ar0521 1-0036":0 [fmt:SGRBG8_1X8/3840x2160 field:none]'
    media-ctl -V '"ar0823 1-0010":0 [fmt:SGRBG12_1X12/3840x2160 field:none]'
    gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
            ! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
            ! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
                    dcc-isp-file=${DCC_ISP_FILE} \
                    sink_0::dcc-2a-file=${DCC_2A_FILE} format-msb=11 \
            ! queue max-size-buffers=1 leaky=0 \
            ! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=${LDC_DCC_FILE} \
            ! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
            ! queue max-size-buffers=1 leaky=0 \
            ! tiovxmultiscaler \
            ! video/x-raw, format=NV12, width=1920, height=1080, framerate=30/1 \
            ! queue max-size-buffers=1 leaky=0 \
            ! ${TEE} name=multi \
            multi.src_0 \
            ! queue max-size-buffers=1 leaky=0 name=qstream1 \
            ! v4l2h265enc output-io-mode=dmabuf-import extra-controls=${ENC_EXTRA_CONTROLS} \
            ! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
            ! rtph265pay config-interval=1 pt=96 mtu=1400 \
            ! udpsink host=${MULTICASTADDR1} auto-multicast=true port=${PORT} \
            multi.src_1 \
            ! queue max-size-buffers=1 leaky=0 name=qstream2 \
            ! v4l2h265enc output-io-mode=dmabuf-import extra-controls=${ENC_EXTRA_CONTROLS} \
            ! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
            ! rtph265pay config-interval=1 pt=96 mtu=1400 \
            ! udpsink host=${MULTICASTADDR2} auto-multicast=true port=${PORT}
    

    Regards,

    Bas Vermeulen

  • Hi Suren,

    Would it help if we send you one of our camera setups? That would be a phyboard + am62a SOM, a camera board and an assembly to hold everything together. Add ethernet, a micro usb cable for serial port access and a USB C power cable for power.

    The software is based on SDK 9.2.0, and would be included.

    Let me know if this would help you in your investigation of the issue.

    Regards,

    Bas Vermeulen

  • On another note, with the imx219 4K + FHD stream, I stream both streams at the same time with the udpsink. So the networking should not have a problem with the amount of data that is being sent.

    The same pipeline also shows that the encoder is able to encode 3840x2160 and 1920x1080 at the same time; that's part of the reason I wanted that tested, so that I could be sure there wasn't a bottleneck in that area.

    I'm just not sure where the bottleneck is currently.

  • Hi Bas,

    Sent you a private message. 

    Best Regards,

    Suren

  • Hi Suren,

    Any progress on this, and is there anything I can do to help your testing? This is a high priority at our end, and if possible we would like to resolve before the end of the year.

    Regards,

    Bas Vermeulen

  • Hi Bas,

    Since it was a Thanksgiving week, had no chance to work on this. Just wanted to let you know we have received the package at our dock. 

    Jianzhong, my colleague will try and run the setup and will reach out to you soon.

    Best Regards,

    Suren

  • Hi and Jianzhong,

    Were you able to run the setup, and were you able to get some insights into the possible bottlenecks?

    It's starting to get more and more critical that I get this problem resolved and/or the restrictions clear.

    Regards,

    Bas Vermeulen

  • Hello Bas,

    Sorry for our late response. Suren is on a business trip.

    That's why I don't really understand why everything is so slow when using the larger camera.

    The encoding should be fine. I just don't understand why I can't translate this to our phyboard with the 4K camera.

    One possible reason is the DDR utilization. When you use the 4K camera, the DDR read/write by CSI Rx receiver, ISP, and encoder all go up. Do you have the perf_stat tool available in your custom Linux - github.com/.../perf_stats You can use this tool to profile the DDR utilization.

    You can also do some hand calculation. For example, attached is a spreadsheet that calculates DDR usage by CSI2 Rx and ISP. You can add others like encoder, rtp, etc.

    DDR-analysis.xlsx

    Once you have the total DDR utilization estimate, you can see how much room there is below your DDR bandwidth. A rule of thumb is to have at least 25% margin.  

    Regards,

    Jianzhong

  • Hi Jianzhong Xu,

    I have sent Bryan a mail with a link to a compiled version of perf_stats for our board.

    I've also done some additional testing, compiled the mbw utility for the board, and ran it. 

    One instance caps one CPU at 100%, and gets 4 GB/s (2 GB/s read and 2 GB/s write).
    Four instances cap all CPUs at 100%, and gets about 8 GB/s throughput (4 GB/s read and 4 GB/s write).

    So the LPDDR4 doesn't seem to be the bottleneck by itself, as it can reach 4 GB/s read and write for a total bandwidth of 8 GB/s (at least).

    When running my scripts, there's no CPU fully loaded (most average about 35-45%) with most time spent in kernel (htop shows the usage as red).

    The script with one udpsink connected to the tee

    #!/bin/bash
    
    DCC_ISP_FILE=/opt/imaging/ar0823/linear/dcc_viss.bin
    DCC_2A_FILE=/opt/imaging/ar0823/linear/dcc_2a.bin
    LDC_DCC_FILE=/opt/imaging/ar0823/linear/dcc_ldc.bin
    
    MULTICASTADDR1=224.1.1.1
    MULTICASTADDR2=224.1.1.2
    PORT=5004
    
    if [ x$TEE == x ]; then
        TEE=tee
    fi
    HEVC_PROFILE="main"
    HEVC_LEVEL="1"
    VIDEO_BITRATE=6000000
    ENC_EXTRA_CONTROLS="enc,prepend_sps_and_pps_to_idr=1,video_gop_size=5,frame_level_rate_control_enable=1,video_bitrate_mode=0,vbv_buffer_size=3000,video_bitrate=${VIDEO_BITRATE}"
    
    TEMP=$(getopt -o 'p:l:b:h' --long 'profile:,level:,bitrate:,input:,output:,help,mc:,port:' -n '$0' -- "$@")
    
    if [ $? -ne 0 ]; then
            echo 'Terminating...' >&2
            exit 1
    fi
    
    eval set -- "$TEMP"
    unset TEMP
    
    while true; do
            case "$1" in
                    '-h'|'--help')
                            echo "$0 - H.265 compress a JPEG file into a one second stream"
                            echo "Parameters:"
                            echo "  -p main|main-still-picture|main-10 (HEVC Profile)"
                            echo "  -l 1|2|2.1|3|3.1|4|4.1|5|5.1 (HEVC Level)"
                            echo "  -b 0..700000000 (Video Bitrate)"
                            echo "  --mc <multicast ip address>"
                            echo "  --port <port number> (default 5000)"
                            echo "  -h This help"
                            exit 0
                            shift
                            continue
                    ;;
                    '-p'|'--profile')
                            case "$2" in
                                    'main'|'Main'|'0')
                                            HEVC_PROFILE="main"
                                            ;;
                                    'main-still-picture'|'1')
                                            HEVC_PROFILE="main-still-picture"
                                            ;;
                                    'main-10'|'2')
                                            HEVC_PROFILE="main-10"
                                            ;;
                                    *)
                                            HEVC_PROFILE="main"
                                            ;;
                            esac
                            echo "HEVC Profile: '$HEVC_PROFILE'"
                            shift 2
                            continue
                    ;;
                    '-l'|'--level')
                            case "$2" in
                                    '1'|'2'|'2.1'|'3'|'3.1'|'4'|'4.1'|'5'|'5.1')
                                            HEVC_LEVEL="$2"
                                            ;;
                                    *)
                                            HEVC_LEVEL="1"
                                            ;;
                            esac
                            echo "HEVC Level: '$HEVC_LEVEL'"
                            shift 2
                            continue
                    ;;
                    '-b'|'--bitrate')
                            echo "Video Bitrate: '$2'"
                            VIDEO_BITRATE=$2
                            shift 2
                            continue
                    ;;
                    'mc')
                            MULTICASTADDR=$2
                            shift 2
                            continue
                    ;;
                    'port')
                            PORT=$2
                            shift 2
                            continue
                    ;;
                    '--')
                            shift
                            break
                    ;;
                    *)
                            echo 'Internal error!' >&2
                            exit 1
                    ;;
            esac
    done
    
    #media-ctl -V '"imx219 4-0010":0 [fmt:SRGGB10_1X10/1920x1080 field:none]'
    #media-ctl -V '"ar0521 1-0036":0 [fmt:SGRBG8_1X8/3840x2160 field:none]'
    media-ctl -V '"ar0823 1-0010":0 [fmt:SGRBG12_1X12/3840x2160 field:none]'
    gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
            ! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
            ! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
                    dcc-isp-file=${DCC_ISP_FILE} \
                    sink_0::dcc-2a-file=${DCC_2A_FILE} format-msb=11 \
            ! queue max-size-buffers=1 leaky=0 \
            ! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=${LDC_DCC_FILE} \
            ! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
            ! queue max-size-buffers=1 leaky=0 \
            ! ${TEE} name=multi \
            multi.src_0 \
            ! queue max-size-buffers=1 leaky=0 name=qstream1 \
            ! v4l2h265enc output-io-mode=dmabuf-import extra-controls=${ENC_EXTRA_CONTROLS} \
            ! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
            ! rtph265pay config-interval=1 pt=96 mtu=1400 \
            ! udpsink host=${MULTICASTADDR1} auto-multicast=true port=${PORT} \
            multi.src_1 \
            ! queue max-size-buffers=1 leaky=0 name=qstream2 \
            ! fakesink

    gets a memory bandwidth use of around 4.7 GB/s

    Summary of CPU load,
    ====================
    
    CPU: mpu1_0: TOTAL LOAD =  28.60 % ( HWI =   1.71 %, SWI =   1.46 % )
    CPU:  c7x_1: TOTAL LOAD =   0. 2 % ( HWI =   0. 0 %, SWI =   0. 0 % )
    
    HWA performance statistics,
    ===========================
    
    HWA:   VISS: LOAD =  68.80 % ( 253 MP/s )
    HWA:   LDC : LOAD =  72.77 % ( 255 MP/s )
    
    DDR performance statistics,
    ===========================
    
    DDR: READ  BW: AVG =   2899 MB/s
    DDR: WRITE BW: AVG =   1801 MB/s
    DDR: TOTAL BW: AVG =   4700 MB/s
    
    SoC temperature statistics
    ==========================
    
    thermal_zone0(DDR):     69.51 degree Celsius
    thermal_zone1(CPU):     69.31 degree Celsius
    thermal_zone2(C7x):     70.93 degree Celsius

    and shows the expected 248+ MP/s throughput for the ISP and LDC as well.

    When I switch to two UDP sinks in the same pipeline

    --- record-4k-to-rtp-multicast.ar0822.tee+udpsink+fakesink.sh
    +++ record-4k-to-rtp-multicast.ar0822.tee+udpsinkx2.sh
    @@ -121,7 +121,10 @@
            ! udpsink host=${MULTICASTADDR1} auto-multicast=true port=${PORT} \
             multi.src_1 \
             ! queue max-size-buffers=1 leaky=0 name=qstream2 \
    -       ! fakesink
    +        ! v4l2h265enc output-io-mode=dmabuf-import extra-controls=${ENC_EXTRA_CONTROLS} \
    +        ! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
    +       ! rtph265pay config-interval=1 pt=96 mtu=1400 \
    +       ! udpsink host=${MULTICASTADDR2} auto-multicast=true port=${PORT}
    

    The DDR bandwidth goes up a little (from about 4.7 GB/s to about 4.8 GB/s), but the ISP and LDC throughput drops to ~218 MP/s

    Summary of CPU load,
    ====================
    
    CPU: mpu1_0: TOTAL LOAD =  33.49 % ( HWI =   1.98 %, SWI =   1.48 % )
    CPU:  c7x_1: TOTAL LOAD =   0. 1 % ( HWI =   0. 0 %, SWI =   0. 0 % )
    
    HWA performance statistics,
    ===========================
    
    HWA:   VISS: LOAD =  59.19 % ( 217 MP/s )
    HWA:   LDC : LOAD =  61.29 % ( 215 MP/s )
    
    DDR performance statistics,
    ===========================
    
    DDR: READ  BW: AVG =   3058 MB/s
    DDR: WRITE BW: AVG =   1710 MB/s
    DDR: TOTAL BW: AVG =   4768 MB/s
    
    SoC temperature statistics
    ==========================
    
    thermal_zone0(DDR):     71.33 degree Celsius
    thermal_zone1(CPU):     70.93 degree Celsius
    thermal_zone2(C7x):     72.54 degree Celsius

    htop shows a similar CPU usage, ranging to 20-45% CPU on all cores. None of the cores are at 100%, so the bottleneck doesn't seem to be the CPU.

    What else could be the limiting factor here? The DDR is able to run a sustained throughput of at least 8 GB/s, while my scripts only use 4.7 GB/s;
    The CPU load is nicely spaced between cores, and the process doesn't seem to be CPU bound.

    I'm just unsure how to find out what the bottleneck could be.

    Regards,

    Bas Vermeulen

  • Hi Bas,

    Why are you setting max-size-buffers=1?
    Can you please remove this and try

    Regards
    Rahul T R

  • Hi Rahul,

    I'm using max-size-buffers=1 for the queue to try and keep the total pipeline latency low.

    I've tried removing them all, and get the same framerate (25-26).

    The modified script:

    gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
            ! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
            ! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
                    dcc-isp-file=${DCC_ISP_FILE} \
                    sink_0::dcc-2a-file=${DCC_2A_FILE} format-msb=11 \
            ! queue leaky=0 \
            ! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=${LDC_DCC_FILE} \
            ! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
            ! queue leaky=0 \
            ! ${TEE} name=multi \
            multi.src_0 \
            ! queue leaky=0 name=qstream1 \
            ! v4l2h265enc output-io-mode=dmabuf-import extra-controls=${ENC_EXTRA_CONTROLS} \
            ! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
            ! rtph265pay config-interval=1 pt=96 mtu=1400 \
            ! udpsink host=${MULTICASTADDR1} auto-multicast=true port=${PORT} \
            multi.src_1 \
            ! queue leaky=0 name=qstream2 \
            ! v4l2h265enc output-io-mode=dmabuf-import extra-controls=${ENC_EXTRA_CONTROLS} \
            ! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
            ! rtph265pay config-interval=1 pt=96 mtu=1400 \
            ! udpsink host=${MULTICASTADDR2} auto-multicast=true port=${PORT}

    The output of parse_gst_tracers.py:

    +-----------------------------------------------------------------------------------+
    |element                       latency      out-latancy      out-fps     frames     |
    +-----------------------------------------------------------------------------------+
    |capsfilter0                   0.38         39.05            25          3745       |
    |tiovxisp0                     66.01        39.04            25          3743       |
    |queue0                        14.96        39.05            25          3743       |
    |tiovxldc0                     37.82        39.04            25          3742       |
    |capsfilter1                   0.50         39.04            25          3742       |
    |queue1                        0.40         39.04            25          3742       |
    |multi                         0.73         19.52            51          7484       |
    |qstream1                      0.47         39.04            25          3742       |
    |qstream2                      0.47         39.04            25          3742       |
    |v4l2h265enc1                  50.87        39.01            25          3741       |
    |capsfilter3                   0.60         39.01            25          3741       |
    |v4l2src0                      165.33       19.50            51          7482       |
    |rtph265pay1                   1.10         39.01            25          3741       |
    |v4l2h265enc0                  33.89        39.00            25          3741       |
    |capsfilter2                   0.57         39.00            25          3741       |
    |rtph265pay0                   1.08         39.00            25          3741       |
    +-----------------------------------------------------------------------------------+

    The output of perf_stats:

    Summary of CPU load,
    ====================
    
    CPU: mpu1_0: TOTAL LOAD =  31.72 % ( HWI =   1.77 %, SWI =   1.26 % )
    CPU:  c7x_1: TOTAL LOAD =   0.14 % ( HWI =   0. 0 %, SWI =   0. 0 % )
    
    HWA performance statistics,
    ===========================
    
    HWA:   VISS: LOAD =  59.49 % ( 219 MP/s )
    HWA:   LDC : LOAD =  60.40 % ( 215 MP/s )
    
    DDR performance statistics,
    ===========================
    
    DDR: READ  BW: AVG =   2968 MB/s
    DDR: WRITE BW: AVG =   1618 MB/s
    DDR: TOTAL BW: AVG =   4586 MB/s
    
    SoC temperature statistics
    ==========================
    
    thermal_zone0(DDR):     65.20 degree Celsius
    thermal_zone1(CPU):     64.58 degree Celsius
    thermal_zone2(C7x):     66.65 degree Celsius

    Regards,

    Bas Vermeulen

  • Hi ,

    Back from christmas holiday.

    Any further ideas on this? I've tried to measure the data throughput on the LPDDR4, but that can/should be able to handle a lot more than I am using at the moment. I've seen LPDDR throughput around 8.2 GB/s total

    Summary of CPU load,
    ====================
    
    CPU: mpu1_0: TOTAL LOAD = 100. 0 % ( HWI =   0.48 %, SWI =   0.24 % )
    CPU:  c7x_1: TOTAL LOAD =   0.14 % ( HWI =   0. 0 %, SWI =   0. 0 % )
    
    HWA performance statistics,
    ===========================
    
    
    DDR performance statistics,
    ===========================
    
    DDR: READ  BW: AVG =   4064 MB/s
    DDR: WRITE BW: AVG =   4030 MB/s
    DDR: TOTAL BW: AVG =   8094 MB/s
    
    SoC temperature statistics
    ==========================
    
    thermal_zone0(DDR):     67.68 degree Celsius
    thermal_zone1(CPU):     67.88 degree Celsius
    thermal_zone2(C7x):     68.70 degree Celsius

    I'm unsure what other bottlenecks are present in the system.

    Regards,

    Bas Vermeulen

  • Hi Bas,

    When I switch to two UDP sinks in the same pipeline
    The DDR bandwidth goes up a little (from about 4.7 GB/s to about 4.8 GB/s), but the ISP and LDC throughput drops to ~218 MP/s

    When you switched to two UDP sinks, you also added a second "v4l2h265enc" element. I'm wondering if this slowed down the pipeline. It looks like the two "v4l2h265enc" elements are doing the same thing. Can you try to move the "tee" to after v4l2h265enc or even after rtph265pay?

    Regards,

    Jianzhong

  • Hi Jianzhong,

    In the actual application I would like to be able to scale down the second stream to a different resolution. It's the same now, but we'll switch to something different once this problem is resolved.

    Regards,

    Bas Vermeulen

  • Ok. For now, can you try the following to see where the problem comes from:

    1. move the "tee" to after v4l2h265enc or rtph265pay

    2. scale down the second stream to a lower resolution

  • Hi Jianzhong Xu,

    I've modified the pipeline to scale down the second stream to a lower resolution (1920x1080) with the following script:

    #!/bin/bash
    
    DCC_ISP_FILE=/opt/imaging/ar0823/linear/dcc_viss.bin
    DCC_2A_FILE=/opt/imaging/ar0823/linear/dcc_2a.bin
    LDC_DCC_FILE=/opt/imaging/ar0823/linear/dcc_ldc.bin
    
    MULTICASTADDR1=224.1.1.1
    MULTICASTADDR2=224.1.1.2
    PORT=5004
    
    if [ x$TEE == x ]; then
        TEE=tee
    fi
    HEVC_PROFILE="main"
    HEVC_LEVEL="1"
    VIDEO_BITRATE=6000000
    ENC_EXTRA_CONTROLS="enc,prepend_sps_and_pps_to_idr=1,video_gop_size=5,frame_level_rate_control_enable=1,video_bitrate_mode=0,vbv_buffer_size=3000,video_bitrate=${VIDEO_BITRATE}"
    
    TEMP=$(getopt -o 'p:l:b:h' --long 'profile:,level:,bitrate:,input:,output:,help,mc:,port:' -n '$0' -- "$@")
    
    if [ $? -ne 0 ]; then
            echo 'Terminating...' >&2
            exit 1
    fi
    
    eval set -- "$TEMP"
    unset TEMP
    
    while true; do
            case "$1" in
                    '-h'|'--help')
                            echo "$0 - H.265 compress a JPEG file into a one second stream"
                            echo "Parameters:"
                            echo "  -p main|main-still-picture|main-10 (HEVC Profile)"
                            echo "  -l 1|2|2.1|3|3.1|4|4.1|5|5.1 (HEVC Level)"
                            echo "  -b 0..700000000 (Video Bitrate)"
                            echo "  --mc <multicast ip address>"
                            echo "  --port <port number> (default 5000)"
                            echo "  -h This help"
                            exit 0
                            shift
                            continue
                    ;;
                    '-p'|'--profile')
                            case "$2" in
                                    'main'|'Main'|'0')
                                            HEVC_PROFILE="main"
                                            ;;
                                    'main-still-picture'|'1')
                                            HEVC_PROFILE="main-still-picture"
                                            ;;
                                    'main-10'|'2')
                                            HEVC_PROFILE="main-10"
                                            ;;
                                    *)
                                            HEVC_PROFILE="main"
                                            ;;
                            esac
                            echo "HEVC Profile: '$HEVC_PROFILE'"
                            shift 2
                            continue
                    ;;
                    '-l'|'--level')
                            case "$2" in
                                    '1'|'2'|'2.1'|'3'|'3.1'|'4'|'4.1'|'5'|'5.1')
                                            HEVC_LEVEL="$2"
                                            ;;
                                    *)
                                            HEVC_LEVEL="1"
                                            ;;
                            esac
                            echo "HEVC Level: '$HEVC_LEVEL'"
                            shift 2
                            continue
                    ;;
                    '-b'|'--bitrate')
                            echo "Video Bitrate: '$2'"
                            VIDEO_BITRATE=$2
                            shift 2
                            continue
                    ;;
                    'mc')
                            MULTICASTADDR=$2
                            shift 2
                            continue
                    ;;
                    'port')
                            PORT=$2
                            shift 2
                            continue
                    ;;
                    '--')
                            shift
                            break
                    ;;
                    *)
                            echo 'Internal error!' >&2
                            exit 1
                    ;;
            esac
    done
    
    #media-ctl -V '"imx219 4-0010":0 [fmt:SRGGB10_1X10/1920x1080 field:none]'
    #media-ctl -V '"ar0521 1-0036":0 [fmt:SGRBG8_1X8/3840x2160 field:none]'
    media-ctl -V '"ar0823 1-0010":0 [fmt:SGRBG12_1X12/3840x2160 field:none]'
    gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
            ! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
            ! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
                    dcc-isp-file=${DCC_ISP_FILE} \
                    sink_0::dcc-2a-file=${DCC_2A_FILE} format-msb=11 \
            ! queue max-size-buffers=1 leaky=0 \
            ! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=${LDC_DCC_FILE} \
            ! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
            ! queue max-size-buffers=1 leaky=0 \
            ! ${TEE} name=multi \
            multi.src_0 \
            ! queue max-size-buffers=1 leaky=0 name=qstream1 \
            ! v4l2h265enc output-io-mode=dmabuf-import extra-controls=${ENC_EXTRA_CONTROLS} \
            ! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
            ! rtph265pay config-interval=1 pt=96 mtu=1400 \
            ! udpsink host=${MULTICASTADDR1} auto-multicast=true port=${PORT} \
            multi.src_1 \
            ! queue max-size-buffers=1 leaky=0 name=qstream2 \
            ! tiovxmultiscaler \
            ! video/x-raw, format=NV12, width=1920, height=1080, framerate=30/1 \
            ! v4l2h265enc output-io-mode=dmabuf-import extra-controls=${ENC_EXTRA_CONTROLS} \
            ! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
            ! rtph265pay config-interval=1 pt=96 mtu=1400 \
            ! udpsink host=${MULTICASTADDR2} auto-multicast=true port=${PORT}
    


    The framerate stays at 26:

    +-----------------------------------------------------------------------------------+
    |element                       latency      out-latancy      out-fps     frames     |
    +-----------------------------------------------------------------------------------+
    |capsfilter0                   0.39         38.20            26          3653       |
    |tiovxisp0                     61.96        38.18            26          3651       |
    |queue0                        13.80        38.18            26          3651       |
    |tiovxldc0                     36.62        38.18            26          3650       |
    |capsfilter1                   0.57         38.18            26          3650       |
    |queue1                        0.60         38.18            26          3650       |
    |multi                         0.91         19.09            52          7300       |
    |qstream1                      0.69         38.18            26          3650       |
    |qstream2                      11.63        38.18            26          3650       |
    |tiovxmultiscaler0             35.25        38.18            26          3649       |
    |capsfilter3                   0.62         38.18            26          3649       |
    |v4l2h265enc0                  30.28        38.15            26          3649       |
    |capsfilter2                   0.69         38.15            26          3649       |
    |v4l2src0                      169.08       19.08            52          7297       |
    |rtph265pay0                   1.43         38.14            26          3649       |
    |v4l2h265enc1                  26.15        38.16            26          3648       |
    |capsfilter4                   0.58         38.16            26          3648       |
    |rtph265pay1                   1.12         38.16            26          3648       |
    +-----------------------------------------------------------------------------------+

    The perf_stats gives me these statistics:

    Summary of CPU load,
    ====================
    
    CPU: mpu1_0: TOTAL LOAD =  37.43 % ( HWI =   2. 1 %, SWI =   1.75 % )
    CPU:  c7x_1: TOTAL LOAD =   0.15 % ( HWI =   0. 0 %, SWI =   0. 0 % )
    
    HWA performance statistics,
    ===========================
    
    HWA:   VISS: LOAD =  61. 4 % ( 224 MP/s )
    HWA:   LDC : LOAD =  63.20 % ( 222 MP/s )
    HWA:   MSC0: LOAD =  89. 4 % ( 330 MP/s )
    
    DDR performance statistics,
    ===========================
    
    DDR: READ  BW: AVG =   2984 MB/s
    DDR: WRITE BW: AVG =   1631 MB/s
    DDR: TOTAL BW: AVG =   4615 MB/s
    
    SoC temperature statistics
    ==========================
    
    thermal_zone0(DDR):     61.22 degree Celsius
    thermal_zone1(CPU):     60.37 degree Celsius
    thermal_zone2(C7x):     62.06 degree Celsius

    I've also modified a script to put the tee behind the v4l2h265encoder:

    #!/bin/bash
    
    DCC_ISP_FILE=/opt/imaging/ar0823/linear/dcc_viss.bin
    DCC_2A_FILE=/opt/imaging/ar0823/linear/dcc_2a.bin
    LDC_DCC_FILE=/opt/imaging/ar0823/linear/dcc_ldc.bin
    
    MULTICASTADDR1=224.1.1.1
    MULTICASTADDR2=224.1.1.2
    PORT=5004
    
    if [ x$TEE == x ]; then
        TEE=tee
    fi
    HEVC_PROFILE="main"
    HEVC_LEVEL="1"
    VIDEO_BITRATE=6000000
    ENC_EXTRA_CONTROLS="enc,prepend_sps_and_pps_to_idr=1,video_gop_size=5,frame_level_rate_control_enable=1,video_bitrate_mode=0,vbv_buffer_size=3000,video_bitrate=${VIDEO_BITRATE}"
    
    TEMP=$(getopt -o 'p:l:b:h' --long 'profile:,level:,bitrate:,input:,output:,help,mc:,port:' -n '$0' -- "$@")
    
    if [ $? -ne 0 ]; then
            echo 'Terminating...' >&2
            exit 1
    fi
    
    eval set -- "$TEMP"
    unset TEMP
    
    while true; do
            case "$1" in
                    '-h'|'--help')
                            echo "$0 - H.265 compress a JPEG file into a one second stream"
                            echo "Parameters:"
                            echo "  -p main|main-still-picture|main-10 (HEVC Profile)"
                            echo "  -l 1|2|2.1|3|3.1|4|4.1|5|5.1 (HEVC Level)"
                            echo "  -b 0..700000000 (Video Bitrate)"
                            echo "  --mc <multicast ip address>"
                            echo "  --port <port number> (default 5000)"
                            echo "  -h This help"
                            exit 0
                            shift
                            continue
                    ;;
                    '-p'|'--profile')
                            case "$2" in
                                    'main'|'Main'|'0')
                                            HEVC_PROFILE="main"
                                            ;;
                                    'main-still-picture'|'1')
                                            HEVC_PROFILE="main-still-picture"
                                            ;;
                                    'main-10'|'2')
                                            HEVC_PROFILE="main-10"
                                            ;;
                                    *)
                                            HEVC_PROFILE="main"
                                            ;;
                            esac
                            echo "HEVC Profile: '$HEVC_PROFILE'"
                            shift 2
                            continue
                    ;;
                    '-l'|'--level')
                            case "$2" in
                                    '1'|'2'|'2.1'|'3'|'3.1'|'4'|'4.1'|'5'|'5.1')
                                            HEVC_LEVEL="$2"
                                            ;;
                                    *)
                                            HEVC_LEVEL="1"
                                            ;;
                            esac
                            echo "HEVC Level: '$HEVC_LEVEL'"
                            shift 2
                            continue
                    ;;
                    '-b'|'--bitrate')
                            echo "Video Bitrate: '$2'"
                            VIDEO_BITRATE=$2
                            shift 2
                            continue
                    ;;
                    'mc')
                            MULTICASTADDR=$2
                            shift 2
                            continue
                    ;;
                    'port')
                            PORT=$2
                            shift 2
                            continue
                    ;;
                    '--')
                            shift
                            break
                    ;;
                    *)
                            echo 'Internal error!' >&2
                            exit 1
                    ;;
            esac
    done
    
    #media-ctl -V '"imx219 4-0010":0 [fmt:SRGGB10_1X10/1920x1080 field:none]'
    #media-ctl -V '"ar0521 1-0036":0 [fmt:SGRBG8_1X8/3840x2160 field:none]'
    media-ctl -V '"ar0823 1-0010":0 [fmt:SGRBG12_1X12/3840x2160 field:none]'
    gst-launch-1.0 -v -e v4l2src device=/dev/video3 io-mode=dmabuf-import do-timestamp=true \
            ! video/x-bayer, width=3840, height=2160, framerate=30/1, format=grbg12 \
            ! tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_ONSEMI_AR0823" \
                    dcc-isp-file=${DCC_ISP_FILE} \
                    sink_0::dcc-2a-file=${DCC_2A_FILE} format-msb=11 \
            ! queue leaky=0 \
            ! tiovxldc sensor-name="SENSOR_ONSEMI_AR0823" dcc-file=${LDC_DCC_FILE} \
            ! video/x-raw, format=NV12, width=3840, height=2160, framerate=30/1 \
            ! queue leaky=0 \
            ! v4l2h265enc output-io-mode=dmabuf-import extra-controls=${ENC_EXTRA_CONTROLS} \
            ! "video/x-h265, profile=(string)${HEVC_PROFILE}, level=(string)${HEVC_LEVEL}" \
            ! ${TEE} name=multi \
            multi.src_0 \
            ! queue leaky=0 name=qstream1 \
            ! rtph265pay config-interval=1 pt=96 mtu=1400 \
            ! udpsink host=${MULTICASTADDR1} auto-multicast=true port=${PORT} \
            multi.src_1 \
            ! queue leaky=0 name=qstream2 \
            ! rtph265pay config-interval=1 pt=96 mtu=1400 \
            ! udpsink host=${MULTICASTADDR2} auto-multicast=true port=${PORT}
    

    This gets me 30 fps:

    +-----------------------------------------------------------------------------------+
    |element                       latency      out-latancy      out-fps     frames     |
    +-----------------------------------------------------------------------------------+
    |capsfilter0                   0.46         33.35            29          5117       |
    |tiovxisp0                     25.12        33.33            30          5118       |
    |queue0                        0.89         33.33            29          5121       |
    |tiovxldc0                     25.77        33.33            30          5127       |
    |capsfilter1                   0.67         33.33            30          5130       |
    |queue1                        0.67         33.33            30          5134       |
    |v4l2h265enc0                  31.23        33.32            30          5138       |
    |capsfilter2                   0.68         33.32            30          5140       |
    |multi                         0.87         16.66            60          10284      |
    |qstream1                      0.65         33.32            30          5147       |
    |qstream2                      0.94         33.32            30          5151       |
    |v4l2src0                      88.51        16.66            60          10307      |
    |rtph265pay0                   1.44         33.32            30          5156       |
    |rtph265pay1                   1.29         33.32            30          5158       |
    +-----------------------------------------------------------------------------------+

    And the following statistics from perf_stats:

    Summary of CPU load,
    ====================
    
    CPU: mpu1_0: TOTAL LOAD =  34.24 % ( HWI =   1.98 %, SWI =   1.73 % )
    CPU:  c7x_1: TOTAL LOAD =   0.14 % ( HWI =   0. 0 %, SWI =   0. 0 % )
    
    HWA performance statistics,
    ===========================
    
    HWA:   VISS: LOAD =  69. 6 % ( 254 MP/s )
    HWA:   LDC : LOAD =  71.82 % ( 255 MP/s )
    
    DDR performance statistics,
    ===========================
    
    DDR: READ  BW: AVG =   2907 MB/s
    DDR: WRITE BW: AVG =   1757 MB/s
    DDR: TOTAL BW: AVG =   4664 MB/s
    
    SoC temperature statistics
    ==========================
    
    thermal_zone0(DDR):     62.27 degree Celsius
    thermal_zone1(CPU):     61.43 degree Celsius
    thermal_zone2(C7x):     62.90 degree Celsius

    Unfortunately I need to modify the streams before encoding, and can't use something like this for my application.

    Any further ideas? I'd like to understand what the bottleneck is.

    Regards,

    Bas Vermeulen

  • Hi Bas,

    I think the bottleneck is running 2x 8M@30fps encodings. I'll let comment on the encoder capability.

    Regards,

    Jianzhong

  • Hi Bas,

    The MSC (tiovxmultiscaler0) may be the bottleneck for the pipeline throughput. Please try the following to enable MSC to process Y data and UV data in parallel:

    1. update Gstreamer plugin: https://github.com/TexasInstruments/edgeai-gst-plugins/pull/368/files

    2. update edgeai-tiovx-modules by replacing /opt/edgeai-tiovx-modules/src/tiovx_multi_scaler_module.c and /opt/edgeai-tiovx-modules/include/tiovx_multi_scaler_module.h by the attached files.

    tiovx_multi_scaler_module.c
    /*
     *
     * Copyright (c) 2021 Texas Instruments Incorporated
     *
     * All rights reserved not granted herein.
     *
     * Limited License.
     *
     * Texas Instruments Incorporated grants a world-wide, royalty-free, non-exclusive
     * license under copyrights and patents it now or hereafter owns or controls to make,
     * have made, use, import, offer to sell and sell ("Utilize") this software subject to the
     * terms herein.  With respect to the foregoing patent license, such license is granted
     * solely to the extent that any such patent is necessary to Utilize the software alone.
     * The patent license shall not apply to any combinations which include this software,
     * other than combinations with devices manufactured by or for TI ("TI Devices").
     * No hardware patent is licensed hereunder.
     *
     * Redistributions must preserve existing copyright notices and reproduce this license
     * (including the above copyright notice and the disclaimer and (if applicable) source
     * code license limitations below) in the documentation and/or other materials provided
     * with the distribution
     *
     * Redistribution and use in binary form, without modification, are permitted provided
     * that the following conditions are met:
     *
     * *       No reverse engineering, decompilation, or disassembly of this software is
     * permitted with respect to any software provided in binary form.
     *
     * *       any redistribution and use are licensed by TI for use only with TI Devices.
     *
     * *       Nothing shall obligate TI to provide you with source code for the software
     * licensed and provided to you in object code.
     *
     * If software source code is provided to you, modification and redistribution of the
     * source code are permitted provided that the following conditions are met:
     *
     * *       any redistribution and use of the source code, including any resulting derivative
     * works, are licensed by TI for use only with TI Devices.
     *
     * *       any redistribution and use of any object code compiled from the source code
     * and any resulting derivative works, are licensed by TI for use only with TI Devices.
     *
     * Neither the name of Texas Instruments Incorporated nor the names of its suppliers
     *
     * may be used to endorse or promote products derived from this software without
     * specific prior written permission.
     *
     * DISCLAIMER.
     *
     * THIS SOFTWARE IS PROVIDED BY TI AND TI'S LICENSORS "AS IS" AND ANY EXPRESS
     * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     * IN NO EVENT SHALL TI AND TI'S LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
     * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
     * OF THE POSSIBILITY OF SUCH DAMAGE.
     *
     */
    #include "tiovx_multi_scaler_module.h"
    
    static vx_status tiovx_multi_scaler_module_configure_scaler_coeffs(vx_context context, TIOVXMultiScalerModuleObj *obj)
    {
        vx_status status = VX_SUCCESS;
    
        tivx_vpac_msc_coefficients_t coeffs;
    
        tiovx_multi_scaler_module_set_coeff(&coeffs, obj->interpolation_method);
    
        /* Set Coefficients */
        obj->coeff_obj = vxCreateUserDataObject(context,
                                    "tivx_vpac_msc_coefficients_t",
                                    sizeof(tivx_vpac_msc_coefficients_t),
                                    NULL);
        status = vxGetStatus((vx_reference)obj->coeff_obj);
    
        if((vx_status)VX_SUCCESS == status)
        {
            vxSetReferenceName((vx_reference)obj->coeff_obj, "multi_scaler_node_coeff_obj");
    
            status = vxCopyUserDataObject(obj->coeff_obj, 0,
                                        sizeof(tivx_vpac_msc_coefficients_t),
                                        &coeffs,
                                        VX_WRITE_ONLY,
                                        VX_MEMORY_TYPE_HOST);
        }
        else
        {
            TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create scaler coeffs object! \n");
        }
    
        return status;
    }
    
    static vx_status tiovx_multi_scaler_module_configure_crop_params(vx_context context, TIOVXMultiScalerModuleObj *obj)
    {
        vx_status status = VX_SUCCESS;
        vx_int32 out;
    
        for (out = 0; out < obj->num_outputs; out++)
        {
            obj->crop_obj[out] = vxCreateUserDataObject(context,
                    "tivx_vpac_msc_crop_params_t",
                    sizeof(tivx_vpac_msc_crop_params_t),
                    NULL);
    
            status = vxGetStatus((vx_reference)obj->crop_obj[out]);
    
            if((vx_status)VX_SUCCESS == status)
            {
                status = vxCopyUserDataObject(obj->crop_obj[out], 0,
                        sizeof(tivx_vpac_msc_crop_params_t),
                        obj->crop_params + out,
                        VX_WRITE_ONLY,
                        VX_MEMORY_TYPE_HOST);
            }
    
            if((vx_status)VX_SUCCESS != status)
            {
                TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Creating user data object for crop params failed!, %d\n", out);
            }
        }
    
        return status;
    }
    
    static vx_status tiovx_multi_scaler_module_configure_input_params(vx_context context, TIOVXMultiScalerModuleObj *obj)
    {
        vx_status status = VX_SUCCESS;
        tivx_vpac_msc_input_params_t input_prm;
    
        tivx_vpac_msc_input_params_init(&input_prm);
    
    #if !defined(J721E) //This feature is supported only on VPAC3 and VPAC3L
        if (obj->num_outputs <= 2) {
            input_prm.is_enable_simul_processing = 1;
        }
    #endif
        obj->input_prm_obj = vxCreateUserDataObject(context,
                                               "tivx_vpac_msc_input_params_t",
                                               sizeof(tivx_vpac_msc_input_params_t),
                                               &input_prm);
    
        return status;
    }
    
    static vx_status tiovx_multi_scaler_module_create_scaler_input(vx_context context, TIOVXMultiScalerModuleObj *obj)
    {
        vx_status status = VX_SUCCESS;
    
        vx_image in_img;
        vx_int32 buf;
    
        if(obj->input.bufq_depth > TIOVX_MODULES_MAX_BUFQ_DEPTH)
        {
            TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Input buffer queue depth %d greater than max supported %d!\n", obj->input.bufq_depth, TIOVX_MODULES_MAX_BUFQ_DEPTH);
            return VX_FAILURE;
        }
    
        for(buf = 0; buf < TIOVX_MODULES_MAX_BUFQ_DEPTH; buf++)
        {
            obj->input.arr[buf]  = NULL;
            obj->input.image_handle[buf]  = NULL;
        }
    
        in_img  = vxCreateImage(context, obj->input.width, obj->input.height, obj->color_format);
        status = vxGetStatus((vx_reference)in_img);
    
        if((vx_status)VX_SUCCESS == status)
        {
            for(buf = 0; buf < obj->input.bufq_depth; buf++)
            {
                obj->input.arr[buf]  = vxCreateObjectArray(context, (vx_reference)in_img, obj->num_channels);
    
                status = vxGetStatus((vx_reference)obj->input.arr[buf]);
                if(status != VX_SUCCESS)
                {
                    TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create input array! \n");
                    break;
                }
                obj->input.image_handle[buf] = (vx_image)vxGetObjectArrayItem((vx_object_array)obj->input.arr[buf], 0);
            }
    
            vxReleaseImage(&in_img);
        }
        else
        {
            TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create input image template! \n");
        }
    
        return status;
    }
    
    static vx_status tiovx_multi_scaler_module_create_scaler_outputs(vx_context context, TIOVXMultiScalerModuleObj *obj)
    {
        vx_status status = VX_SUCCESS;
        vx_int32 out, buf;
    
        if(obj->num_outputs > TIOVX_MULTI_SCALER_MODULE_MAX_OUTPUTS)
        {
            TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Number of outputs %d greater than max supported %d!\n", obj->num_outputs, TIOVX_MULTI_SCALER_MODULE_MAX_OUTPUTS);
            return VX_FAILURE;
        }
    
        for(out = 0; out < obj->num_outputs; out++)
        {
            if(obj->output[out].bufq_depth > TIOVX_MODULES_MAX_BUFQ_DEPTH)
            {
                TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Output buffer queue depth %d greater than max supported %d!\n", obj->output[out].bufq_depth, TIOVX_MODULES_MAX_BUFQ_DEPTH);
                return VX_FAILURE;
            }
        }
    
        for(out = 0; out < TIOVX_MULTI_SCALER_MODULE_MAX_OUTPUTS; out++)
        {
            for(buf = 0; buf < TIOVX_MODULES_MAX_BUFQ_DEPTH; buf++)
            {
                obj->output[out].arr[buf]  = NULL;
                obj->output[out].image_handle[buf]  = NULL;
            }
        }
    
        for(out = 0; out < obj->num_outputs; out++)
        {
            vx_image out_img;
    
            out_img = vxCreateImage(context, obj->output[out].width, obj->output[out].height, obj->color_format);
            status = vxGetStatus((vx_reference)out_img);
    
            if(status == VX_SUCCESS)
            {
                for(buf = 0; buf < obj->output[out].bufq_depth; buf++)
                {
                    obj->output[out].arr[buf]  = vxCreateObjectArray(context, (vx_reference)out_img, obj->num_channels);
    
                    status = vxGetStatus((vx_reference)obj->output[out].arr[buf]);
                    if(status != VX_SUCCESS)
                    {
                        TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create output array! \n");
                        break;
                    }
                    else
                    {
                        vx_char name[VX_MAX_REFERENCE_NAME];
    
                        snprintf(name, VX_MAX_REFERENCE_NAME, "scaler_node_output_arr%d_buf%d", out, buf);
    
                        vxSetReferenceName((vx_reference)obj->output[out].arr[buf], name);
                    }
    
                    obj->output[out].image_handle[buf] = (vx_image)vxGetObjectArrayItem((vx_object_array)obj->output[out].arr[buf], 0);
                }
                vxReleaseImage(&out_img);
            }
            else
            {
                TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create output image template! \n");
                break;
            }
        }
    
        if(obj->en_multi_scalar_output == 1)
        {
            char file_path[TIVX_FILEIO_FILE_PATH_LENGTH];
    
            strcpy(file_path, obj->output_file_path);
            obj->file_path   = vxCreateArray(context, VX_TYPE_UINT8, TIVX_FILEIO_FILE_PATH_LENGTH);
            status = vxGetStatus((vx_reference)obj->file_path);
            if(status == VX_SUCCESS)
            {
                vxSetReferenceName((vx_reference)obj->file_path, "scaler_write_node_file_path");
    
                vxAddArrayItems(obj->file_path, TIVX_FILEIO_FILE_PATH_LENGTH, &file_path[0], 1);
            }
            else
            {
                TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create file path object for fileio!\n");
            }
    
            for(out = 0; out < obj->num_outputs; out++)
            {
                char file_prefix[TIVX_FILEIO_FILE_PREFIX_LENGTH];
    
                sprintf(file_prefix, "scaler_output_%d", out);
                obj->file_prefix[out] = vxCreateArray(context, VX_TYPE_UINT8, TIVX_FILEIO_FILE_PREFIX_LENGTH);
                status = vxGetStatus((vx_reference)obj->file_prefix[out]);
                if(status == VX_SUCCESS)
                {
                    vx_char name[VX_MAX_REFERENCE_NAME];
    
                    snprintf(name, VX_MAX_REFERENCE_NAME, "scaler_write_node_file_prefix_%d", out);
    
                    vxSetReferenceName((vx_reference)obj->file_prefix[out], name);
    
                    vxAddArrayItems(obj->file_prefix[out], TIVX_FILEIO_FILE_PREFIX_LENGTH, &file_prefix[0], 1);
                }
                else
                {
                    TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create file prefix object for output %d!\n", out);
                    break;
                }
    
                obj->write_cmd[out] = vxCreateUserDataObject(context, "tivxFileIOWriteCmd", sizeof(tivxFileIOWriteCmd), NULL);
                status = vxGetStatus((vx_reference)obj->write_cmd[out]);
                if(status != VX_SUCCESS)
                {
                    TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create write cmd object for output %d!\n", out);
                    break;
                }
                else
                {
                    vx_char name[VX_MAX_REFERENCE_NAME];
    
                    snprintf(name, VX_MAX_REFERENCE_NAME, "scaler_write_node_write_cmd_%d", out);
    
                    vxSetReferenceName((vx_reference)obj->write_cmd[out], name);
                }
            }
    
        }
        else
        {
            obj->file_path   = NULL;
            for(out = 0; out < TIOVX_MULTI_SCALER_MODULE_MAX_OUTPUTS; out++)
            {
                obj->file_prefix[out] = NULL;
                obj->write_node[out]  = NULL;
                obj->write_cmd[out]   = NULL;
            }
        }
    
        return status;
    }
    
    vx_status tiovx_multi_scaler_module_init(vx_context context, TIOVXMultiScalerModuleObj *obj)
    {
        vx_status status = VX_SUCCESS;
    
        TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Configuring scaler coeffs!\n");
        status = tiovx_multi_scaler_module_configure_scaler_coeffs(context, obj);
    
        if((vx_status)VX_SUCCESS == status)
        {
            TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Creating scaler input!\n");
            status = tiovx_multi_scaler_module_create_scaler_input(context, obj);
        }
    
        if((vx_status)VX_SUCCESS == status)
        {
            TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Creating scaler output!\n");
            status = tiovx_multi_scaler_module_create_scaler_outputs(context, obj);
        }
    
        if((vx_status)VX_SUCCESS == status)
        {
            TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Configuring crop params!\n");
            status = tiovx_multi_scaler_module_configure_crop_params(context, obj);
        }
    
        if((vx_status)VX_SUCCESS == status)
        {
            TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Configuring input params!\n");
            status = tiovx_multi_scaler_module_configure_input_params(context, obj);
        }
    
        return status;
    }
    
    vx_status tiovx_multi_scaler_module_deinit(TIOVXMultiScalerModuleObj *obj)
    {
        vx_status status = VX_SUCCESS;
    
        vx_int32 out, buf;
    
        TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Releasing coeffs!\n");
        status = vxReleaseUserDataObject(&obj->coeff_obj);
    
        for(buf = 0; buf < obj->input.bufq_depth; buf++)
        {
            if((vx_status)VX_SUCCESS == status)
            {
                TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Releasing input image handle, bufq %d!\n", buf);
                status = vxReleaseImage(&obj->input.image_handle[buf]);
            }
            if((vx_status)VX_SUCCESS == status)
            {
                TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Releasing input image arr, bufq %d!\n", buf);
                status = vxReleaseObjectArray(&obj->input.arr[buf]);
            }
        }
    
        for(out = 0; out < obj->num_outputs; out++)
        {
            for(buf = 0; buf < obj->output[out].bufq_depth; buf++)
            {
                if((vx_status)VX_SUCCESS == status)
                {
                    TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Releasing output image handle, bufq = %d!\n", buf);
                    status = vxReleaseImage(&obj->output[out].image_handle[buf]);
                }
                if((vx_status)VX_SUCCESS == status)
                {
                    TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Releasing output image arr, bufq %d!\n", buf);
                    status = vxReleaseObjectArray(&obj->output[out].arr[buf]);
                }
            }
            status = vxReleaseUserDataObject(obj->crop_obj + out);
        }
    
        if(obj->en_multi_scalar_output == 1)
        {
            if((vx_status)VX_SUCCESS == status)
            {
                TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Releasing output path array!\n");
                status = vxReleaseArray(&obj->file_path);
            }
    
            for(out = 0; out < obj->num_outputs; out++)
            {
                if((vx_status)VX_SUCCESS == status)
                {
                    TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Releasing output %d file prefix array!\n", out);
                    status = vxReleaseArray(&obj->file_prefix[out]);
                }
                if((vx_status)VX_SUCCESS == status)
                {
                    TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Releasing output %d write command object!\n", out);
                    status = vxReleaseUserDataObject(&obj->write_cmd[out]);
                }
            }
        }
    
        return status;
    }
    
    vx_status tiovx_multi_scaler_module_delete(TIOVXMultiScalerModuleObj *obj)
    {
        vx_status status = VX_SUCCESS;
    
        vx_int32 num_outputs = obj->num_outputs;
        vx_int32 out;
    
        if(obj->node != NULL)
        {
            TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Releasing node!\n");
            status = vxReleaseNode(&obj->node);
        }
        for(out = 0; out < num_outputs; out++)
        {
            if(obj->write_node[out] != NULL)
            {
                if((vx_status)VX_SUCCESS == status)
                {
                    TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Releasing write node [%d]!\n", out);
                    status = vxReleaseNode(&obj->write_node[out]);
                }
            }
        }
    
        return status;
    }
    
    vx_status tiovx_multi_scaler_module_create(vx_graph graph, TIOVXMultiScalerModuleObj *obj, vx_object_array input_arr, const char* target_string)
    {
        vx_status status = VX_SUCCESS;
    
        vx_image input;
        vx_image output1, output2, output3, output4, output5;
    
        if(input_arr != NULL)
        {
            input = (vx_image)vxGetObjectArrayItem((vx_object_array)input_arr, 0);
        }
        else
        {
            if(obj->input.arr[0] != NULL)
            {
                input = (vx_image)vxGetObjectArrayItem((vx_object_array)obj->input.arr[0], 0);
            }
            else
            {
                input = NULL;
            }
        }
    
        if(obj->output[0].arr[0] != NULL)
        {
            output1 = (vx_image)vxGetObjectArrayItem((vx_object_array)obj->output[0].arr[0], 0);
        }
        else
        {
            output1 = NULL;
        }
    
        if(obj->output[1].arr[0] != NULL)
        {
            output2 = (vx_image)vxGetObjectArrayItem((vx_object_array)obj->output[1].arr[0], 0);
        }
        else
        {
            output2 = NULL;
        }
    
        if(obj->output[2].arr[0] != NULL)
        {
            output3 = (vx_image)vxGetObjectArrayItem((vx_object_array)obj->output[2].arr[0], 0);
        }
        else
        {
            output3 = NULL;
        }
    
        if(obj->output[3].arr[0] != NULL)
        {
            output4 = (vx_image)vxGetObjectArrayItem((vx_object_array)obj->output[3].arr[0], 0);
        }
        else
        {
            output4 = NULL;
        }
    
        if(obj->output[4].arr[0] != NULL)
        {
            output5 = (vx_image)vxGetObjectArrayItem((vx_object_array)obj->output[4].arr[0], 0);
        }
        else
        {
            output5 = NULL;
        }
    
        obj->node = tivxVpacMscScaleNode(graph, input, output1, output2, output3, output4, output5);
    
        status = vxGetStatus((vx_reference)obj->node);
    
        if((vx_status)VX_SUCCESS == status)
        {
            vxSetNodeTarget(obj->node, VX_TARGET_STRING, target_string);
            vxSetReferenceName((vx_reference)obj->node, "scaler_node");
    
            vx_bool replicate[] = { vx_true_e, vx_true_e, vx_true_e, vx_true_e, vx_true_e, vx_true_e};
    
            if(output1 == NULL)
                replicate[1] = vx_false_e;
            if(output1 == NULL)
                replicate[2] = vx_false_e;
            if(output2 == NULL)
                replicate[3] = vx_false_e;
            if(output3 == NULL)
                replicate[4] = vx_false_e;
            if(output4 == NULL)
                replicate[5] = vx_false_e;
    
            vxReplicateNode(graph, obj->node, replicate, 6);
    
            if(obj->en_multi_scalar_output == 1)
            {
                if(output1 != NULL)
                {
                    status = tiovx_multi_scaler_module_add_write_output_node(graph, obj, 0);
                    if(status != VX_SUCCESS)
                    {
                        TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create write node for output1!\n");
                    }
                }
                if(output2 != NULL)
                {
                    status = tiovx_multi_scaler_module_add_write_output_node(graph, obj, 1);
                    if(status != VX_SUCCESS)
                    {
                        TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create write node for output2!\n");
                    }
                }
                if(output3 != NULL)
                {
                    status = tiovx_multi_scaler_module_add_write_output_node(graph, obj, 2);
                    if(status != VX_SUCCESS)
                    {
                        TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create write node for output3!\n");
                    }
                }
                if(output4 != NULL)
                {
                    status = tiovx_multi_scaler_module_add_write_output_node(graph, obj, 3);
                    if(status != VX_SUCCESS)
                    {
                        TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create write node for output4!\n");
                    }
                }
                if(output5 != NULL)
                {
                    status = tiovx_multi_scaler_module_add_write_output_node(graph, obj, 4);
                    if(status != VX_SUCCESS)
                    {
                        TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create write node for output5!\n");
                    }
                }
    
            }
        }
        else
        {
            TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create scaler node! \n");
        }
    
        if(input != NULL)
            vxReleaseImage(&input);
    
        if(output1 != NULL)
            vxReleaseImage(&output1);
        if(output2 != NULL)
            vxReleaseImage(&output2);
        if(output3 != NULL)
            vxReleaseImage(&output3);
        if(output4 != NULL)
            vxReleaseImage(&output4);
        if(output5 != NULL)
            vxReleaseImage(&output5);
    
        return status;
    }
    
    vx_status tiovx_multi_scaler_module_release_buffers(TIOVXMultiScalerModuleObj *obj)
    {
        vx_status status = VX_SUCCESS;
    
        void      *virtAddr[TIOVX_MODULES_MAX_REF_HANDLES] = {NULL};
        vx_uint32   size[TIOVX_MODULES_MAX_REF_HANDLES];
        vx_uint32   numEntries;
        vx_int32 out, bufq, ch;
    
        /* Free input handles */
        for(bufq = 0; bufq < obj->input.bufq_depth; bufq++)
        {
            for(ch = 0; ch < obj->num_channels; ch++)
            {
                vx_reference ref = vxGetObjectArrayItem(obj->input.arr[bufq], ch);
                status = vxGetStatus((vx_reference)ref);
    
                if((vx_status)VX_SUCCESS == status)
                {
                    /* Export handles to get valid size information. */
                    status = tivxReferenceExportHandle(ref,
                                                       virtAddr,
                                                       size,
                                                       TIOVX_MODULES_MAX_REF_HANDLES,
                                                       &numEntries);
    
                    if((vx_status)VX_SUCCESS == status)
                    {
                        vx_int32 ctr;
                        /* Currently the vx_image buffers are alloated in one shot for multiple planes.
                            So if we are freeing this buffer then we need to get only the first plane
                            pointer address but add up the all the sizes to free the entire buffer */
                        vx_uint32 freeSize = 0;
                        for(ctr = 0; ctr < numEntries; ctr++)
                        {
                            freeSize += size[ctr];
                        }
    
                        if(virtAddr[0] != NULL)
                        {
                            TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Freeing input, bufq=%d, ch=%d, addr = 0x%016lX, size = %d \n", bufq, ch, (vx_uint64)virtAddr[0], freeSize);
                            tivxMemFree(virtAddr[0], freeSize, TIVX_MEM_EXTERNAL);
                        }
    
                        for(ctr = 0; ctr < numEntries; ctr++)
                        {
                            virtAddr[ctr] = NULL;
                        }
    
                        /* Assign NULL handles to the OpenVx objects as it will avoid
                            doing a tivxMemFree twice, once now and once during release */
                        status = tivxReferenceImportHandle(ref,
                                                        (const void **)virtAddr,
                                                        (const uint32_t *)size,
                                                        numEntries);
                    }
                    vxReleaseReference(&ref);
                }
            }
        }
    
        /* Free output handles */
        for(out = 0; out < obj->num_outputs; out++)
        {
            for(bufq = 0; bufq < obj->output[out].bufq_depth; bufq++)
            {
                for(ch = 0; ch < obj->num_channels; ch++)
                {
                    vx_reference ref = vxGetObjectArrayItem(obj->output[out].arr[bufq], ch);
                    status = vxGetStatus((vx_reference)ref);
    
                    if((vx_status)VX_SUCCESS == status)
                    {
                        /* Export handles to get valid size information. */
                        status = tivxReferenceExportHandle(ref,
                                                           virtAddr,
                                                           size,
                                                           TIOVX_MODULES_MAX_REF_HANDLES,
                                                           &numEntries);
    
                        if((vx_status)VX_SUCCESS == status)
                        {
                            vx_int32 ctr;
                            /* Currently the vx_image buffers are alloated in one shot for multiple planes.
                               So if we are freeing this buffer then we need to get only the first plane
                               pointer address but add up the all the sizes to free the entire buffer */
                            vx_uint32 freeSize = 0;
                            for(ctr = 0; ctr < numEntries; ctr++)
                            {
                                freeSize += size[ctr];
                            }
    
                            if(virtAddr[0] != NULL)
                            {
                                TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] Freeing output[%d], bufq=%d, ch=%d, addr = 0x%016lX, size = %d \n", out, bufq, ch, (vx_uint64)virtAddr[0], freeSize);
                                tivxMemFree(virtAddr[0], freeSize, TIVX_MEM_EXTERNAL);
                            }
    
                            for(ctr = 0; ctr < numEntries; ctr++)
                            {
                                virtAddr[ctr] = NULL;
                            }
    
                            /* Assign NULL handles to the OpenVx objects as it will avoid
                                doing a tivxMemFree twice, once now and once during release */
                            status = tivxReferenceImportHandle(ref,
                                                            (const void **)virtAddr,
                                                            (const uint32_t *)size,
                                                            numEntries);
                        }
                        vxReleaseReference(&ref);
                    }
                }
            }
        }
    
        if ((vx_status)VX_SUCCESS != status)
        {
            TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] tivxReferenceExportHandle() failed.\n");
        }
    
        return status;
    }
    
    vx_status tiovx_multi_scaler_module_add_write_output_node(vx_graph graph, TIOVXMultiScalerModuleObj *obj, vx_int32 out)
    {
        vx_status status = VX_SUCCESS;
    
        /* Need to improve this section, currently one write node can take only one image. */
        vx_image output_img = (vx_image)vxGetObjectArrayItem(obj->output[out].arr[0], 0);
        obj->write_node[out] = tivxWriteImageNode(graph, output_img, obj->file_path, obj->file_prefix[out]);
        vxReleaseImage(&output_img);
    
        status = vxGetStatus((vx_reference)obj->write_node[out]);
    
        if((vx_status)VX_SUCCESS == status)
        {
            vxSetNodeTarget(obj->write_node[out], VX_TARGET_STRING, TIVX_TARGET_MPU_0);
    
            vx_bool replicate[] = { vx_true_e, vx_false_e, vx_false_e};
            vxReplicateNode(graph, obj->write_node[out], replicate, 3);
        }
        else
        {
            TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Unable to create fileio write node for storing outputs! \n");
        }
    
        return (status);
    }
    
    vx_status tiovx_multi_scaler_module_send_write_output_cmd(TIOVXMultiScalerModuleObj *obj, vx_uint32 start_frame, vx_uint32 num_frames, vx_uint32 num_skip)
    {
        vx_status status = VX_SUCCESS;
    
        tivxFileIOWriteCmd write_cmd;
        vx_int32 out;
    
        write_cmd.start_frame = start_frame;
        write_cmd.num_frames = num_frames;
        write_cmd.num_skip = num_skip;
    
        for(out = 0; out < obj->num_outputs; out++)
        {
            status = vxCopyUserDataObject(obj->write_cmd[out], 0, sizeof(tivxFileIOWriteCmd),\
                      &write_cmd, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
    
            if((vx_status)VX_SUCCESS == status)
            {
                vx_reference refs[2];
    
                refs[0] = (vx_reference)obj->write_cmd[out];
    
                status = tivxNodeSendCommand(obj->write_node[out], TIVX_CONTROL_CMD_SEND_TO_ALL_REPLICATED_NODES,
                                        TIVX_FILEIO_CMD_SET_FILE_WRITE,
                                        refs, 1u);
    
                if(VX_SUCCESS != status)
                {
                    TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] write node send command failed!\n");
                }
    
                TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] write node send command success!\n");
            }
        }
    
        return (status);
    }
    
    void tiovx_multi_scaler_module_set_coeff(tivx_vpac_msc_coefficients_t *coeff, uint32_t interpolation)
    {
        uint32_t i;
        uint32_t idx;
        uint32_t weight;
    
        idx = 0;
        coeff->single_phase[0][idx ++] = 0;
        coeff->single_phase[0][idx ++] = 0;
        coeff->single_phase[0][idx ++] = 256;
        coeff->single_phase[0][idx ++] = 0;
        coeff->single_phase[0][idx ++] = 0;
        idx = 0;
        coeff->single_phase[1][idx ++] = 0;
        coeff->single_phase[1][idx ++] = 0;
        coeff->single_phase[1][idx ++] = 256;
        coeff->single_phase[1][idx ++] = 0;
        coeff->single_phase[1][idx ++] = 0;
    
        if (VX_INTERPOLATION_BILINEAR == interpolation)
        {
            idx = 0;
            for(i=0; i<32; i++)
            {
                weight = i<<2;
                coeff->multi_phase[0][idx ++] = 0;
                coeff->multi_phase[0][idx ++] = 0;
                coeff->multi_phase[0][idx ++] = 256-weight;
                coeff->multi_phase[0][idx ++] = weight;
                coeff->multi_phase[0][idx ++] = 0;
            }
            idx = 0;
            for(i=0; i<32; i++)
            {
                weight = (i+32)<<2;
                coeff->multi_phase[1][idx ++] = 0;
                coeff->multi_phase[1][idx ++] = 0;
                coeff->multi_phase[1][idx ++] = 256-weight;
                coeff->multi_phase[1][idx ++] = weight;
                coeff->multi_phase[1][idx ++] = 0;
            }
            idx = 0;
            for(i=0; i<32; i++)
            {
                weight = i<<2;
                coeff->multi_phase[2][idx ++] = 0;
                coeff->multi_phase[2][idx ++] = 0;
                coeff->multi_phase[2][idx ++] = 256-weight;
                coeff->multi_phase[2][idx ++] = weight;
                coeff->multi_phase[2][idx ++] = 0;
            }
            idx = 0;
            for(i=0; i<32; i++)
            {
                weight = (i+32)<<2;
                coeff->multi_phase[3][idx ++] = 0;
                coeff->multi_phase[3][idx ++] = 0;
                coeff->multi_phase[3][idx ++] = 256-weight;
                coeff->multi_phase[3][idx ++] = weight;
                coeff->multi_phase[3][idx ++] = 0;
            }
        }
        else /* STR_VX_INTERPOLATION_NEAREST_NEIGHBOR */
        {
            idx = 0;
            for(i=0; i<32; i++)
            {
                coeff->multi_phase[0][idx ++] = 0;
                coeff->multi_phase[0][idx ++] = 0;
                coeff->multi_phase[0][idx ++] = 256;
                coeff->multi_phase[0][idx ++] = 0;
                coeff->multi_phase[0][idx ++] = 0;
            }
            idx = 0;
            for(i=0; i<32; i++)
            {
                coeff->multi_phase[1][idx ++] = 0;
                coeff->multi_phase[1][idx ++] = 0;
                coeff->multi_phase[1][idx ++] = 0;
                coeff->multi_phase[1][idx ++] = 256;
                coeff->multi_phase[1][idx ++] = 0;
            }
            idx = 0;
            for(i=0; i<32; i++)
            {
                coeff->multi_phase[2][idx ++] = 0;
                coeff->multi_phase[2][idx ++] = 0;
                coeff->multi_phase[2][idx ++] = 256;
                coeff->multi_phase[2][idx ++] = 0;
                coeff->multi_phase[2][idx ++] = 0;
            }
            idx = 0;
            for(i=0; i<32; i++)
            {
                coeff->multi_phase[3][idx ++] = 0;
                coeff->multi_phase[3][idx ++] = 0;
                coeff->multi_phase[3][idx ++] = 0;
                coeff->multi_phase[3][idx ++] = 256;
                coeff->multi_phase[3][idx ++] = 0;
            }
        }
    }
    
    vx_status tiovx_multi_scaler_module_update_filter_coeffs(TIOVXMultiScalerModuleObj *obj)
    {
        vx_status status = VX_SUCCESS;
    
        vx_reference refs[1];
    
        refs[0] = (vx_reference)obj->coeff_obj;
        if((vx_status)VX_SUCCESS == status)
        {
          status = tivxNodeSendCommand(obj->node, 0u,
                                      TIVX_VPAC_MSC_CMD_SET_COEFF,
                                      refs, 1u);
    
          TIOVX_MODULE_PRINTF("[MULTI-SCALER-MODULE] App Send MSC Command Done!\n");
        }
    
        if((vx_status)VX_SUCCESS != status)
        {
            TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Node send command failed!\n");
        }
    
        return status;
    }
    
    void tiovx_multi_scaler_module_crop_params_init(TIOVXMultiScalerModuleObj *obj)
    {
        vx_int32 out;
    
        for (out = 0; out < obj->num_outputs; out++)
        {
            obj->crop_params[out].crop_start_x = 0;
            obj->crop_params[out].crop_start_y = 0;
            obj->crop_params[out].crop_width = obj->input.width;
            obj->crop_params[out].crop_height = obj->input.height;
        }
    }
    
    vx_status tiovx_multi_scaler_module_update_crop_params(TIOVXMultiScalerModuleObj *obj)
    {
        vx_status status = VX_SUCCESS;
        vx_reference refs[TIOVX_MULTI_SCALER_MODULE_MAX_OUTPUTS];
        vx_int32 out;
    
        for (out = 0; out < obj->num_outputs; out++)
        {
            refs[out] = (vx_reference)obj->crop_obj[out];
        }
    
        status = tivxNodeSendCommand(obj->node, 0u,
                TIVX_VPAC_MSC_CMD_SET_CROP_PARAMS,
                refs, obj->num_outputs);
    
        if((vx_status)VX_SUCCESS != status)
        {
            TIOVX_MODULE_ERROR("[MULTI-SCALER-MODULE] Node send command TIVX_VPAC_MSC_CMD_SET_CROP_PARAMS, failed!\n");
        }
    
        return status;
    }
    
    vx_status tiovx_multi_scaler_module_update_input_params(TIOVXMultiScalerModuleObj *obj)
    {
        vx_status status = VX_SUCCESS;
        vx_reference refs[1];
    
        refs[0] = (vx_reference)(obj->input_prm_obj);
        status = tivxNodeSendCommand(obj->node, 0u,
                                     TIVX_VPAC_MSC_CMD_SET_INPUT_PARAMS,
                                     refs, 1u);
    
        if((vx_status)VX_SUCCESS != status)
        {
            TIOVX_MODULE_ERROR(
                    "[MULTI-SCALER-MODULE] Node send command "
                    "TIVX_VPAC_MSC_CMD_SET_INPUT_PARAMS, failed!\n");
        }
    
        vxReleaseUserDataObject(&(obj->input_prm_obj));
    
        return status;
    }
    
    
    tiovx_multi_scaler_module.h

    3. run scripts /opt/edgeai-gst-apps/scripts/install_tiovx_modules.sh and /opt/edgeai-gst-apps/scripts/install_gst_plugins.sh

    This should reduce MSC processing time to ~25msec and camera capture + VISS + LDC + MSC should run at 30fps.

    Regards,

    Jianzhong

  • Hi Jianzhong,

    I'm trying to integrate these changes on top of SDK 9.2, but am running into some trouble. What versions of edgeai-gst-plugins and edgeai-tiovx-modules should I take as a base for this?

    Regards,

    Bas Vermeulen