Tool/software:
Hello, I generated model artifacts for an ONNX model(patchcore implementation) which uses resnet 50.
Now, whenever I run the following infernece script on the board, trying to use the TIDL accelaration:
I get this error:
2024-09-05 03:56:32.346482125 [W:onnxruntime:, execution_frame.cc:835 VerifyOutputSizes] Expected shape from model of {1,1024,14,14} does not match actual shape of {1,1,1,1024,14,14} for output input.332
I don't know why this happening because I know these extra dimensions are added when model artifacts are generated. I have a tflite model running with similar dimensions for a different application, I don't know what the issue is here.
import os
import cv2
import numpy as np
import torch
from torchvision import transforms
import onnxruntime as ort
import faiss
from PIL import Image
from scipy.ndimage import gaussian_filter
import gi
import time
gi.require_version('Gst', '1.0')
from gi.repository import Gst
# Import necessary components from train.py
from train import embedding_concat, reshape_embedding, min_max_norm, cvt2heatmap, heatmap_on_image, get_args
# Define transforms (ensure these match those used in train.py)
data_transforms = transforms.Compose([
transforms.Resize((224, 224), Image.LANCZOS), # Reduced resolution
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
inv_normalize = transforms.Normalize(
mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225],
std=[1 / 0.229, 1 / 0.224, 1 / 0.225]
)
def gstreamer_pipeline():
return (
'v4l2src device=/dev/video3 io-mode=dmabuf-import ! '
'video/x-bayer, width=640, height=480, framerate=15/1, format=rggb10 ! '
'tiovxisp sink_0::device=/dev/v4l-subdev2 sensor-name="SENSOR_SONY_IMX219_RPI" '
'dcc-isp-file=/opt/imaging/imx219/linear/dcc_viss_10b_640x480.bin sink_0::dcc-2a-file=/opt/imaging/imx219/linear/dcc_2a_10b_640x480.bin '
'! video/x-raw, format=NV12, width=640, height=480, framerate=15/1 ! '
'videoconvert ! video/x-raw, format=BGR ! appsink'
)
def heatmap_on_image(heatmap, image, alpha=0.5, colormap=cv2.COLORMAP_JET):
if heatmap.shape != image.shape:
heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]))
heatmap = cv2.applyColorMap(np.uint8(heatmap), colormap)
overlay = cv2.addWeighted(heatmap, alpha, image, 1 - alpha, 0)
return overlay
def main():
# Initialize GStreamer
Gst.init(None)
# Timing model load
start_time = time.time()
# Path to the ONNX model file
onnx_model_path = '/opt/edgeai-tidl-artifacts/cl-ort-patchcore/patchcore_model.onnx'
options = {
'artifacts_folder': '/opt/edgeai-tidl-artifacts/cl-ort-patchcore'
}
so = ort.SessionOptions()
# Specify execution providers with TIDL configuration
ep_list = ['TIDLExecutionProvider', 'CPUExecutionProvider']
# Load the ONNX model with TIDL acceleration
ort_session = ort.InferenceSession(onnx_model_path, providers=ep_list, provider_options=[options, {}], sess_options=so)
model_load_time = time.time() - start_time
print(f"Model loading time: {model_load_time:.4f} seconds")
# Get input and output details
input_name = ort_session.get_inputs()[0].name
output_names = [output.name for output in ort_session.get_outputs()]
# Get arguments
args = get_args()
# Update the dataset path to your actual path on the board
args.dataset_path = '/opt/edgeai-gst-apps/PatchCore_anomaly_detection'
args.category = 'bottle' # Ensure this is set to the correct category
# Load the FAISS index
start_time = time.time()
index_path = os.path.join(args.dataset_path, 'embeddings', args.category, 'index.faiss')
index = faiss.read_index(index_path)
if torch.cuda.is_available():
res = faiss.StandardGpuResources()
index = faiss.index_cpu_to_gpu(res, 0, index)
faiss_load_time = time.time() - start_time
print(f"FAISS index loading time: {faiss_load_time:.4f} seconds")
# Function to run inference on ONNX model
def run_onnx_inference(ort_session, input_data):
start_time = time.time()
outputs = ort_session.run(output_names, {input_name: input_data})
inference_time = time.time() - start_time
print(f"Model inference time: {inference_time:.4f} seconds")
return outputs
# Initialize video capture with GStreamer pipeline
cap = cv2.VideoCapture(gstreamer_pipeline(), cv2.CAP_GSTREAMER)
if not cap.isOpened():
print("Error: Unable to open video source.")
return
frame_count = 0
total_processing_time = 0
while cap.isOpened():
start_time = time.time()
ret, frame = cap.read()
if not ret:
break
frame_read_time = time.time() - start_time
print(f"Frame read time: {frame_read_time:.4f} seconds")
# Preprocess frame
start_time = time.time()
pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
input_tensor = data_transforms(pil_img).unsqueeze(0).numpy().astype(np.float32)
preprocessing_time = time.time() - start_time
print(f"Frame preprocessing time: {preprocessing_time:.4f} seconds")
# Run ONNX inference
start_time = time.time()
features = run_onnx_inference(ort_session, input_tensor)
inference_time = time.time() - start_time
# Convert features to tensors
start_time = time.time()
features = [torch.tensor(f) for f in features]
# Extract embeddings and perform the same steps as in the test_step
embeddings = []
for feature in features:
m = torch.nn.AvgPool2d(3, 1, 1)
embeddings.append(m(feature))
embedding_ = embedding_concat(embeddings[0], embeddings[1])
embedding_test = np.array(reshape_embedding(np.array(embedding_)))
feature_extraction_time = time.time() - start_time
print(f"Feature extraction time: {feature_extraction_time:.4f} seconds")
# Search the FAISS index
start_time = time.time()
score_patches, _ = index.search(embedding_test, k=args.n_neighbors)
faiss_search_time = time.time() - start_time
print(f"FAISS search time: {faiss_search_time:.4f} seconds")
# Postprocess anomaly map
start_time = time.time()
anomaly_map = score_patches[:, 0].reshape((28, 28))
N_b = score_patches[np.argmax(score_patches[:, 0])]
w = (1 - (np.max(np.exp(N_b)) / np.sum(np.exp(N_b))))
score = w * max(score_patches[:, 0]) # Image-level score
anomaly_map_resized = cv2.resize(anomaly_map, (224, 224))
anomaly_map_resized_blur = gaussian_filter(anomaly_map_resized, sigma=2) # Reduced sigma for faster processing
anomaly_map_norm = min_max_norm(anomaly_map_resized_blur)
anomaly_map_norm_hm = cvt2heatmap(anomaly_map_norm * 255)
anomaly_map_norm_hm_resized = cv2.resize(anomaly_map_norm_hm, (frame.shape[1], frame.shape[0]))
heatmap_overlay_time = time.time() - start_time
print(f"Heatmap overlay time: {heatmap_overlay_time:.4f} seconds")
hm_on_img = heatmap_on_image(anomaly_map_norm_hm_resized, frame, alpha=0.3) # More transparent overlay
# Display result
start_time = time.time()
cv2.imshow('Anomaly Detection', hm_on_img)
display_time = time.time() - start_time
print(f"Display time: {display_time:.4f} seconds")
frame_processing_time = time.time() - start_time
total_processing_time += frame_processing_time
frame_count += 1
print(f"Frame processing time: {frame_processing_time:.4f} seconds")
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
average_processing_time = total_processing_time / frame_count if frame_count else 0
print(f"Average frame processing time: {average_processing_time:.4f} seconds")
if __name__ == '__main__':
main()