Hi, I'm a beginner to machine learning, I have trained a YOLO11 object detection model and it is working fine when I use it in the Raspberry pi but is so slow, digging the internet I found that you can convert YOLO models to Tflite to significantly boost performance, I converted my model and I tried the out .tflite file on vscode and it is working fine, but trying on the raspberry pi gives me an error, for context this the code I have when running the code, I get an error
Traceback (most recent call last):
File "/home/delta/TFlite/detect1.py", line 154, in <module>
main()
File "/home/delta/TFlite/detect1.py", line 149, in main
run(args.model, int(args.maxResults),
File "/home/delta/TFlite/detect1.py", line 67, in run
detector = vision.ObjectDetector.create_from_options(options)
File "/home/delta/.local/lib/python3.11/site-packages/mediapipe/tasks/python/vision/object_detector.py", line 238, in create_from_options
return cls(
File "/home/delta/.local/lib/python3.11/site-packages/mediapipe/tasks/python/vision/core/base_vision_task_api.py", line 70, in __init__
self._runner = _TaskRunner.create(graph_config, packet_callback)
RuntimeError: Input tensor has type float32: it requires specifying NormalizationOptions metadata to preprocess input images.
Can anyone help me how to resolve this?
Code:
import argparseimport sysimport timeimport cv2import mediapipe as mpfrom mediapipe.tasks import pythonfrom mediapipe.tasks.python import visionfrom utils import visualizefrom picamera2 import Picamera2# Global variables to calculate FPSCOUNTER, FPS = 0, 0START_TIME = time.time()picam2 = Picamera2()picam2.preview_configuration.main.size = (640,480)picam2.preview_configuration.main.format = "RGB888"picam2.preview_configuration.align()picam2.configure("preview")picam2.start()def run(model: str, max_results: int, score_threshold: float, camera_id: int, width: int, height: int) -> None: """Continuously run inference on images acquired from the camera. Args: model: Name of the TFLite object detection model. max_results: Max number of detection results. score_threshold: The score threshold of detection results. camera_id: The camera id to be passed to OpenCV. width: The width of the frame captured from the camera. height: The height of the frame captured from the camera. """ # Visualization parameters row_size = 50 # pixels left_margin = 24 # pixels text_color = (0, 0, 0) # black font_size = 1 font_thickness = 1 fps_avg_frame_count = 10 detection_frame = None detection_result_list = [] def save_result(result: vision.ObjectDetectorResult, unused_output_image: mp.Image, timestamp_ms: int): global FPS, COUNTER, START_TIME # Calculate the FPS if COUNTER % fps_avg_frame_count == 0: FPS = fps_avg_frame_count / (time.time() - START_TIME) START_TIME = time.time() detection_result_list.append(result) COUNTER += 1 # Initialize the object detection model base_options = python.BaseOptions(model_asset_path=model) options = vision.ObjectDetectorOptions(base_options=base_options, running_mode=vision.RunningMode.LIVE_STREAM, max_results=max_results, score_threshold=score_threshold, result_callback=save_result) detector = vision.ObjectDetector.create_from_options(options) # Continuously capture images from the camera and run inference while True: im= picam2.capture_array() # success, image = cap.read() image=cv2.resize(im,(640,480)) image = cv2.flip(image, -1) # Convert the image from BGR to RGB as required by the TFLite model. rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_image) # Run object detection using the model. detector.detect_async(mp_image, time.time_ns() // 1_000_000) # Show the FPS fps_text = 'FPS = {:.1f}'.format(FPS) text_location = (left_margin, row_size) current_frame = image cv2.putText(current_frame, fps_text, text_location, cv2.FONT_HERSHEY_DUPLEX, font_size, text_color, font_thickness, cv2.LINE_AA) if detection_result_list: # print(detection_result_list) current_frame = visualize(current_frame, detection_result_list[0]) detection_frame = current_frame detection_result_list.clear() if detection_frame is not None: cv2.imshow('object_detection', detection_frame) # Stop the program if the ESC key is pressed. if cv2.waitKey(1) == 27: break detector.close() cap.release() cv2.destroyAllWindows()def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--model', help='Path of the object detection model.', required=False,# default='efficientdet_lite0.tflite') default='best_float32.tflite') parser.add_argument( '--maxResults', help='Max number of detection results.', required=False, default=5) parser.add_argument( '--scoreThreshold', help='The score threshold of detection results.', required=False, type=float, default=0.25) # Finding the camera ID can be very reliant on platform-dependent methods. # One common approach is to use the fact that camera IDs are usually indexed sequentially by the OS, starting from 0. # Here, we use OpenCV and create a VideoCapture object for each potential ID with 'cap = cv2.VideoCapture(i)'. # If 'cap' is None or not 'cap.isOpened()', it indicates the camera ID is not available. parser.add_argument( '--cameraId', help='Id of camera.', required=False, type=int, default=0) parser.add_argument( '--frameWidth', help='Width of frame to capture from camera.', required=False, type=int, default=640) parser.add_argument( '--frameHeight', help='Height of frame to capture from camera.', required=False, type=int, default=480) args = parser.parse_args() run(args.model, int(args.maxResults), args.scoreThreshold, int(args.cameraId), args.frameWidth, args.frameHeight)if __name__ == '__main__': main()
Traceback (most recent call last):
File "/home/delta/TFlite/detect1.py", line 154, in <module>
main()
File "/home/delta/TFlite/detect1.py", line 149, in main
run(args.model, int(args.maxResults),
File "/home/delta/TFlite/detect1.py", line 67, in run
detector = vision.ObjectDetector.create_from_options(options)
File "/home/delta/.local/lib/python3.11/site-packages/mediapipe/tasks/python/vision/object_detector.py", line 238, in create_from_options
return cls(
File "/home/delta/.local/lib/python3.11/site-packages/mediapipe/tasks/python/vision/core/base_vision_task_api.py", line 70, in __init__
self._runner = _TaskRunner.create(graph_config, packet_callback)
RuntimeError: Input tensor has type float32: it requires specifying NormalizationOptions metadata to preprocess input images.
Can anyone help me how to resolve this?
Statistics: Posted by Fineapple — Sat Feb 08, 2025 10:16 am