Tool/software:
I have trained the yolox_s_lite model on custom dataset using EDGEAI-MODELMAKER.
Due to local system shut down, the training was stopped at 158th epoch, so I have converted .pth file of model at 158th epoch to .onnx format using EDGEAI-MMDETECTION present inside EDGEAI-TENSORLAB repo.
Then I compiled this .onnx model with the onnxrt_ep.py file present inside the EDGEAI-TIDL-TOOLS repo. While compilation I got the error which says that "IR & OPSET version are mismatched which were before IR10 & OPSET17 , then using below script, I converted them to desired IR and OPSET version which were IR9 & OPSET12,
"""
m = onnx.load(src)
print("Before -> IR:", m.ir_version, "opsets:",
[(imp.domain or "ai.onnx", imp.version) for imp in m.opset_import])
# Force IR version to 9 (do NOT touch opset)
m.ir_version = 9
onnx.save(m, dst)
# Optional: checker may complain about strict IR/opset pairing; you can skip it
try:
from onnx import checker
checker.check_model(dst)
print("ONNX checker passed.")
except Exception as e:
print("Checker warning:", e)
print("Saved:", dst)
"""
Then the compilation using TIDL TOOLS was successful.
But still when I am inferecing .onnx model in the PC, it is predicting the bounding boxes correctly , but when I am uploading model, its prototxt and artifacts in the TDA4VM board, it is not able to detect the bounding boxes , only the camera based video starts running without detections ???
What should be the reasons??
Could you please provide the solution to it ?
For reference, below is the script that we are using for inferencing on TDA4VM board:
import os
import cv2
import numpy as np
import onnxruntime as ort
import time
import gi
gi.require_version('Gst', '1.0')
from gi.repository import Gst
# --- Set TIDL Environment Variables ---
os.environ["TIDL_RT_PERFSTATS"] = "1"
os.environ["TIDL_RT_LOG_LEVEL"] = "2"
# Initialize GStreamer
Gst.init(None)
# --- GStreamer OUTPUT PIPELINE ---
gst_output_pipeline = (
"appsrc name=src is-live=true block=true format=GST_FORMAT_TIME "
"caps=video/x-raw,format=BGR,width=1280,height=720,framerate=30/1 ! "
"videoconvert ! queue ! waylandsink sync=false"
)
pipeline = Gst.parse_launch(gst_output_pipeline)
appsrc = pipeline.get_by_name("src")
pipeline.set_state(Gst.State.PLAYING)
# --- GStreamer INPUT PIPELINE ---
camera_pipeline = (
"v4l2src device=/dev/video2 ! "
"image/jpeg,width=1280,height=720,framerate=30/1 ! jpegdec ! "
"videoconvert ! video/x-raw,format=BGR ! appsink"
)
cap = cv2.VideoCapture(camera_pipeline, cv2.CAP_GSTREAMER)
if not cap.isOpened():
print(" ERROR: Could not open /dev/video2")
exit(1)
# --- Load TIDL-compiled model ----
model_path = "/opt/model_zoo/158_onnxrt_Dataset_Buit_Over_COCO_edgeai-mmdet_yolox_s_lite__model_onnx/model/yolox_s_lite_158_ir9_opset12.onnx"
session = ort.InferenceSession(
model_path,
providers=["TIDLExecutionProvider", "CPUExecutionProvider"],
provider_options=[
{"artifacts_folder": "/opt/model_zoo/158_onnxrt_Dataset_Buit_Over_COCO_edgeai-mmdet_yolox_s_lite__model_onnx/artifacts",
"platform": "J7"},
{}
]
)
input_name = session.get_inputs()[0].name
output_names = [o.name for o in session.get_outputs()]
# --- Custom 7 classes ---
CLASSES = ["person", "bicycle", "car", "motorcycle", "bus", "truck", "rickshaw"]
# ---------- PREPROCESS ----------
def preprocess(image, W=640, H=640, layout="NCHW"):
resized = cv2.resize(image, (W, H), interpolation=cv2.INTER_LINEAR)
rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
if layout == "NCHW":
blob = rgb.transpose(2, 0, 1)[None, ...]
else:
blob = rgb[None, ...]
return np.ascontiguousarray(blob, dtype=np.float32) # ️ no /255.0
# ---------- NMS (NumPy implementation) ----------
def nms_numpy(boxes, scores, conf_threshold=0.3, nms_threshold=0.4):
boxes = np.array(boxes)
scores = np.array(scores)
# Filter by confidence
keep = scores >= conf_threshold
boxes, scores = boxes[keep], scores[keep]
indices = np.where(keep)[0]
if len(boxes) == 0:
return []
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 0] + boxes[:, 2]
y2 = boxes[:, 1] + boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep_indices = []
while order.size > 0:
i = order[0]
keep_indices.append(indices[i])
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
iou = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(iou <= nms_threshold)[0]
order = order[inds + 1]
return keep_indices
# ---------- GENERIC DECODER ----------
def try_parse_outputs(outs):
def looks_like_boxes(a):
return a.ndim >= 2 and a.shape[-1] in (5, 6, 7) and np.issubdtype(a.dtype, np.floating)
def looks_like_labels(a):
return np.issubdtype(a.dtype, np.integer) and (a.ndim in (1, 2, 3))
def squeeze_to_2d(a):
a = np.array(a)
while a.ndim > 2:
a = a.reshape(-1, a.shape[-1])
return a
if len(outs) == 2:
a, b = outs
if looks_like_boxes(a) and looks_like_labels(b):
dets, labs = squeeze_to_2d(a), squeeze_to_2d(b).reshape(-1)
elif looks_like_boxes(b) and looks_like_labels(a):
dets, labs = squeeze_to_2d(b), squeeze_to_2d(a).reshape(-1)
else:
raise RuntimeError("Cannot classify outputs.")
if dets.shape[1] >= 5:
boxes, scores = dets[:, :4], dets[:, 4]
if dets.shape[1] >= 6:
classes = dets[:, 5].astype(np.int32)
else:
classes = labs.astype(np.int32)
return boxes, scores, classes
elif len(outs) == 1:
x = np.array(outs[0])
while x.ndim > 2:
x = x.reshape(-1, x.shape[-1])
if x.shape[1] in (6, 7):
boxes, scores, classes = x[:, :4], x[:, 4], x[:, 5].astype(np.int32)
return boxes, scores, classes
raise RuntimeError("Unexpected model outputs layout.")
print(" Running object detection with TIDL acceleration... Press Ctrl+C to stop.")
try:
while True:
start_time = time.time()
ret, frame = cap.read()
if not ret:
print("️ Failed to read frame")
continue
# Inference
outs = session.run(output_names, {input_name: preprocess(frame)})
boxes_xyxy, scores, class_ids = try_parse_outputs(outs)
# Filter + NMS
CONF_THRESHOLD, NMS_THRESHOLD = 0.3, 0.4
keep = scores >= CONF_THRESHOLD
boxes_xyxy, scores, class_ids = boxes_xyxy[keep], scores[keep], class_ids[keep]
boxes_xywh = boxes_xyxy.copy()
boxes_xywh[:, 2] -= boxes_xyxy[:, 0]
boxes_xywh[:, 3] -= boxes_xyxy[:, 1]
boxes_list = boxes_xywh.astype(int).tolist()
scores_list = scores.astype(float).tolist()
idxs = nms_numpy(boxes_list, scores_list, CONF_THRESHOLD, NMS_THRESHOLD)
# Draw detections
sx, sy = frame.shape[1] / 640.0, frame.shape[0] / 640.0
for i in idxs:
x, y, w, h = boxes_list[i]
x1, y1 = int(round(x * sx)), int(round(y * sy))
x2, y2 = int(round((x + w) * sx)), int(round((y + h) * sy))
cls = int(class_ids[i])
name = CLASSES[cls] if 0 <= cls < len(CLASSES) else str(cls)
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, f"{name} {scores_list[i]:.2f}", (x1, max(0, y1 - 5)),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
if cls == 0:
print("label:", name, f"{scores_list[i]:.2f}", "x1:", x1, "y1:", y1, "x2:", x2, "y2:", y2)
# Send to GStreamer sink
data = frame.tobytes()
buf = Gst.Buffer.new_allocate(None, len(data), None)
buf.fill(0, data)
buf.duration = Gst.util_uint64_scale_int(1, Gst.SECOND, 30)
timestamp = int(time.time() * Gst.SECOND)
buf.pts = buf.dts = timestamp
appsrc.emit("push-buffer", buf)
# Frame rate limiting
elapsed = time.time() - start_time
delay = max(0, (1 / 30) - elapsed)
time.sleep(delay)
except KeyboardInterrupt:
print(" Interrupted. Cleaning up...")
finally:
cap.release()
pipeline.set_state(Gst.State.NULL)