Quantcast
Channel: Raspberry Pi Forums
Viewing all articles
Browse latest Browse all 2513

General discussion • Raspberry Pi 4 B Object Detection YOLO11 converted to Tflite not working

$
0
0
Hi, I'm a beginner to machine learning, I have trained a YOLO11 object detection model and it is working fine when I use it in the Raspberry pi but is so slow, digging the internet I found that you can convert YOLO models to Tflite to significantly boost performance, I converted my model and I tried the out .tflite file on vscode and it is working fine, but trying on the raspberry pi gives me an error, for context this the code I have

Code:

import argparseimport sysimport timeimport cv2import mediapipe as mpfrom mediapipe.tasks import pythonfrom mediapipe.tasks.python import visionfrom utils import visualizefrom picamera2 import Picamera2# Global variables to calculate FPSCOUNTER, FPS = 0, 0START_TIME = time.time()picam2 = Picamera2()picam2.preview_configuration.main.size = (640,480)picam2.preview_configuration.main.format = "RGB888"picam2.preview_configuration.align()picam2.configure("preview")picam2.start()def run(model: str, max_results: int, score_threshold: float,         camera_id: int, width: int, height: int) -> None:  """Continuously run inference on images acquired from the camera.  Args:    model: Name of the TFLite object detection model.    max_results: Max number of detection results.    score_threshold: The score threshold of detection results.    camera_id: The camera id to be passed to OpenCV.    width: The width of the frame captured from the camera.    height: The height of the frame captured from the camera.  """  # Visualization parameters  row_size = 50  # pixels  left_margin = 24  # pixels  text_color = (0, 0, 0)  # black  font_size = 1  font_thickness = 1  fps_avg_frame_count = 10  detection_frame = None  detection_result_list = []  def save_result(result: vision.ObjectDetectorResult, unused_output_image: mp.Image, timestamp_ms: int):      global FPS, COUNTER, START_TIME      # Calculate the FPS      if COUNTER % fps_avg_frame_count == 0:          FPS = fps_avg_frame_count / (time.time() - START_TIME)          START_TIME = time.time()      detection_result_list.append(result)      COUNTER += 1  # Initialize the object detection model  base_options = python.BaseOptions(model_asset_path=model)  options = vision.ObjectDetectorOptions(base_options=base_options,                                         running_mode=vision.RunningMode.LIVE_STREAM,                                         max_results=max_results, score_threshold=score_threshold,                                         result_callback=save_result)  detector = vision.ObjectDetector.create_from_options(options)  # Continuously capture images from the camera and run inference  while True:    im= picam2.capture_array()  #    success, image = cap.read()    image=cv2.resize(im,(640,480))    image = cv2.flip(image, -1)    # Convert the image from BGR to RGB as required by the TFLite model.    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_image)    # Run object detection using the model.    detector.detect_async(mp_image, time.time_ns() // 1_000_000)    # Show the FPS    fps_text = 'FPS = {:.1f}'.format(FPS)    text_location = (left_margin, row_size)    current_frame = image    cv2.putText(current_frame, fps_text, text_location, cv2.FONT_HERSHEY_DUPLEX,                font_size, text_color, font_thickness, cv2.LINE_AA)    if detection_result_list:        # print(detection_result_list)        current_frame = visualize(current_frame, detection_result_list[0])        detection_frame = current_frame        detection_result_list.clear()    if detection_frame is not None:        cv2.imshow('object_detection', detection_frame)    # Stop the program if the ESC key is pressed.    if cv2.waitKey(1) == 27:      break  detector.close()  cap.release()  cv2.destroyAllWindows()def main():  parser = argparse.ArgumentParser(      formatter_class=argparse.ArgumentDefaultsHelpFormatter)  parser.add_argument(      '--model',      help='Path of the object detection model.',      required=False,#      default='efficientdet_lite0.tflite')      default='best_float32.tflite')  parser.add_argument(      '--maxResults',      help='Max number of detection results.',      required=False,      default=5)  parser.add_argument(      '--scoreThreshold',      help='The score threshold of detection results.',      required=False,      type=float,      default=0.25)  # Finding the camera ID can be very reliant on platform-dependent methods.   # One common approach is to use the fact that camera IDs are usually indexed sequentially by the OS, starting from 0.   # Here, we use OpenCV and create a VideoCapture object for each potential ID with 'cap = cv2.VideoCapture(i)'.  # If 'cap' is None or not 'cap.isOpened()', it indicates the camera ID is not available.  parser.add_argument(      '--cameraId', help='Id of camera.', required=False, type=int, default=0)  parser.add_argument(      '--frameWidth',      help='Width of frame to capture from camera.',      required=False,      type=int,      default=640)  parser.add_argument(      '--frameHeight',      help='Height of frame to capture from camera.',      required=False,      type=int,      default=480)  args = parser.parse_args()  run(args.model, int(args.maxResults),      args.scoreThreshold, int(args.cameraId), args.frameWidth, args.frameHeight)if __name__ == '__main__':  main()
when running the code, I get an error

Traceback (most recent call last):
File "/home/delta/TFlite/detect1.py", line 154, in <module>
main()
File "/home/delta/TFlite/detect1.py", line 149, in main
run(args.model, int(args.maxResults),
File "/home/delta/TFlite/detect1.py", line 67, in run
detector = vision.ObjectDetector.create_from_options(options)
File "/home/delta/.local/lib/python3.11/site-packages/mediapipe/tasks/python/vision/object_detector.py", line 238, in create_from_options
return cls(
File "/home/delta/.local/lib/python3.11/site-packages/mediapipe/tasks/python/vision/core/base_vision_task_api.py", line 70, in __init__
self._runner = _TaskRunner.create(graph_config, packet_callback)
RuntimeError: Input tensor has type float32: it requires specifying NormalizationOptions metadata to preprocess input images.

Can anyone help me how to resolve this?

Statistics: Posted by Fineapple — Sat Feb 08, 2025 10:16 am



Viewing all articles
Browse latest Browse all 2513

Trending Articles