TDA4VM: Unable to run real time inference of yolox_s_lite on TDA4VM

Chaitanya Suryawanshi

Part Number: TDA4VM

Tool/software:

I have trained the yolox_s_lite model on custom dataset using EDGEAI-MODELMAKER.

Due to local system shut down, the training was stopped at 158th epoch, so I have converted .pth file of model at 158th epoch to .onnx format using EDGEAI-MMDETECTION present inside EDGEAI-TENSORLAB repo.

Then I compiled this .onnx model with the onnxrt_ep.py file present inside the EDGEAI-TIDL-TOOLS repo. While compilation I got the error which says that "IR & OPSET version are mismatched which were before IR10 & OPSET17 , then using below script, I converted them to desired IR and OPSET version which were IR9 & OPSET12,

"""

m = onnx.load(src)

print("Before -> IR:", m.ir_version, "opsets:",
[(imp.domain or "ai.onnx", imp.version) for imp in m.opset_import])

# Force IR version to 9 (do NOT touch opset)
m.ir_version = 9

onnx.save(m, dst)

# Optional: checker may complain about strict IR/opset pairing; you can skip it
try:
from onnx import checker
checker.check_model(dst)
print("ONNX checker passed.")
except Exception as e:
print("Checker warning:", e)

print("Saved:", dst)

"""

Then the compilation using TIDL TOOLS was successful.

But still when I am inferecing .onnx model in the PC, it is predicting the bounding boxes correctly , but when I am uploading model, its prototxt and artifacts in the TDA4VM board, it is not able to detect the bounding boxes , only the camera based video starts running without detections ???

What should be the reasons??

Could you please provide the solution to it ?

For reference, below is the script that we are using for inferencing on TDA4VM board:

import os

import cv2

import numpy as np

import onnxruntime as ort

import time

import gi

gi.require_version('Gst', '1.0')

from gi.repository import Gst

# --- Set TIDL Environment Variables ---

os.environ["TIDL_RT_PERFSTATS"] = "1"

os.environ["TIDL_RT_LOG_LEVEL"] = "2"

# Initialize GStreamer

Gst.init(None)

# --- GStreamer OUTPUT PIPELINE ---

gst_output_pipeline = (

"appsrc name=src is-live=true block=true format=GST_FORMAT_TIME "

"caps=video/x-raw,format=BGR,width=1280,height=720,framerate=30/1 ! "

"videoconvert ! queue ! waylandsink sync=false"

)

pipeline = Gst.parse_launch(gst_output_pipeline)

appsrc = pipeline.get_by_name("src")

pipeline.set_state(Gst.State.PLAYING)

# --- GStreamer INPUT PIPELINE ---

camera_pipeline = (

"v4l2src device=/dev/video2 ! "

"image/jpeg,width=1280,height=720,framerate=30/1 ! jpegdec ! "

"videoconvert ! video/x-raw,format=BGR ! appsink"

)

cap = cv2.VideoCapture(camera_pipeline, cv2.CAP_GSTREAMER)

if not cap.isOpened():

print(" ERROR: Could not open /dev/video2")

exit(1)

# --- Load TIDL-compiled model ----

model_path = "/opt/model_zoo/158_onnxrt_Dataset_Buit_Over_COCO_edgeai-mmdet_yolox_s_lite__model_onnx/model/yolox_s_lite_158_ir9_opset12.onnx"

session = ort.InferenceSession(

model_path,

providers=["TIDLExecutionProvider", "CPUExecutionProvider"],

provider_options=[

{"artifacts_folder": "/opt/model_zoo/158_onnxrt_Dataset_Buit_Over_COCO_edgeai-mmdet_yolox_s_lite__model_onnx/artifacts",

"platform": "J7"},

{}

]

)

input_name = session.get_inputs()[0].name

output_names = [o.name for o in session.get_outputs()]

# --- Custom 7 classes ---

CLASSES = ["person", "bicycle", "car", "motorcycle", "bus", "truck", "rickshaw"]

# ---------- PREPROCESS ----------

def preprocess(image, W=640, H=640, layout="NCHW"):

resized = cv2.resize(image, (W, H), interpolation=cv2.INTER_LINEAR)

rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

if layout == "NCHW":

blob = rgb.transpose(2, 0, 1)[None, ...]

else:

blob = rgb[None, ...]

return np.ascontiguousarray(blob, dtype=np.float32) # ️ no /255.0

# ---------- NMS (NumPy implementation) ----------

def nms_numpy(boxes, scores, conf_threshold=0.3, nms_threshold=0.4):

boxes = np.array(boxes)

scores = np.array(scores)

# Filter by confidence

keep = scores >= conf_threshold

boxes, scores = boxes[keep], scores[keep]

indices = np.where(keep)[0]

if len(boxes) == 0:

return []

x1 = boxes[:, 0]

y1 = boxes[:, 1]

x2 = boxes[:, 0] + boxes[:, 2]

y2 = boxes[:, 1] + boxes[:, 3]

areas = (x2 - x1 + 1) * (y2 - y1 + 1)

order = scores.argsort()[::-1]

keep_indices = []

while order.size > 0:

i = order[0]

keep_indices.append(indices[i])

xx1 = np.maximum(x1[i], x1[order[1:]])

yy1 = np.maximum(y1[i], y1[order[1:]])

xx2 = np.minimum(x2[i], x2[order[1:]])

yy2 = np.minimum(y2[i], y2[order[1:]])

w = np.maximum(0.0, xx2 - xx1 + 1)

h = np.maximum(0.0, yy2 - yy1 + 1)

inter = w * h

iou = inter / (areas[i] + areas[order[1:]] - inter)

inds = np.where(iou <= nms_threshold)[0]

order = order[inds + 1]

return keep_indices

# ---------- GENERIC DECODER ----------

def try_parse_outputs(outs):

def looks_like_boxes(a):

return a.ndim >= 2 and a.shape[-1] in (5, 6, 7) and np.issubdtype(a.dtype, np.floating)

def looks_like_labels(a):

return np.issubdtype(a.dtype, np.integer) and (a.ndim in (1, 2, 3))

def squeeze_to_2d(a):

a = np.array(a)

while a.ndim > 2:

a = a.reshape(-1, a.shape[-1])

return a

if len(outs) == 2:

a, b = outs

if looks_like_boxes(a) and looks_like_labels(b):

dets, labs = squeeze_to_2d(a), squeeze_to_2d(b).reshape(-1)

elif looks_like_boxes(b) and looks_like_labels(a):

dets, labs = squeeze_to_2d(b), squeeze_to_2d(a).reshape(-1)

else:

raise RuntimeError("Cannot classify outputs.")

if dets.shape[1] >= 5:

boxes, scores = dets[:, :4], dets[:, 4]

if dets.shape[1] >= 6:

classes = dets[:, 5].astype(np.int32)

else:

classes = labs.astype(np.int32)

return boxes, scores, classes

elif len(outs) == 1:

x = np.array(outs[0])

while x.ndim > 2:

x = x.reshape(-1, x.shape[-1])

if x.shape[1] in (6, 7):

boxes, scores, classes = x[:, :4], x[:, 4], x[:, 5].astype(np.int32)

return boxes, scores, classes

raise RuntimeError("Unexpected model outputs layout.")

print(" Running object detection with TIDL acceleration... Press Ctrl+C to stop.")

try:

while True:

start_time = time.time()

ret, frame = cap.read()

if not ret:

print("️ Failed to read frame")

continue

# Inference

outs = session.run(output_names, {input_name: preprocess(frame)})

boxes_xyxy, scores, class_ids = try_parse_outputs(outs)

# Filter + NMS

CONF_THRESHOLD, NMS_THRESHOLD = 0.3, 0.4

keep = scores >= CONF_THRESHOLD

boxes_xyxy, scores, class_ids = boxes_xyxy[keep], scores[keep], class_ids[keep]

boxes_xywh = boxes_xyxy.copy()

boxes_xywh[:, 2] -= boxes_xyxy[:, 0]

boxes_xywh[:, 3] -= boxes_xyxy[:, 1]

boxes_list = boxes_xywh.astype(int).tolist()

scores_list = scores.astype(float).tolist()

idxs = nms_numpy(boxes_list, scores_list, CONF_THRESHOLD, NMS_THRESHOLD)

# Draw detections

sx, sy = frame.shape[1] / 640.0, frame.shape[0] / 640.0

for i in idxs:

x, y, w, h = boxes_list[i]

x1, y1 = int(round(x * sx)), int(round(y * sy))

x2, y2 = int(round((x + w) * sx)), int(round((y + h) * sy))

cls = int(class_ids[i])

name = CLASSES[cls] if 0 <= cls < len(CLASSES) else str(cls)

cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

cv2.putText(frame, f"{name} {scores_list[i]:.2f}", (x1, max(0, y1 - 5)),

cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

if cls == 0:

print("label:", name, f"{scores_list[i]:.2f}", "x1:", x1, "y1:", y1, "x2:", x2, "y2:", y2)

# Send to GStreamer sink

data = frame.tobytes()

buf = Gst.Buffer.new_allocate(None, len(data), None)

buf.fill(0, data)

buf.duration = Gst.util_uint64_scale_int(1, Gst.SECOND, 30)

timestamp = int(time.time() * Gst.SECOND)

buf.pts = buf.dts = timestamp

appsrc.emit("push-buffer", buf)

# Frame rate limiting

elapsed = time.time() - start_time

delay = max(0, (1 / 30) - elapsed)

time.sleep(delay)

except KeyboardInterrupt:

print(" Interrupted. Cleaning up...")

finally:

cap.release()

pipeline.set_state(Gst.State.NULL)

1 day ago

0 Christina Kuruvilla 1 day ago

TI__Expert 5050 points

Hi Chaitanya,

Could you share your SDK version, model, model-artifacts and the artifacts you created? You mentioned on PC that this ran correctly right?

I recommend running the model based off of our base flow found on our Github before running your own script, just to eliminate variables and ensure that the model is able to run properly on the device.

More information on how to can be found here: https://github.com/TexasInstruments/edgeai-tidl-tools/tree/master

Warm regards,

Christina

Processors

Processors forum

TDA4VM: Unable to run real time inference of yolox_s_lite on TDA4VM