EasternEdgeRobotics · PatrickOBrien06 · Apr 25, 2026
diff --git a/Standard_Model.pt b/Standard_Model.pt
diff --git a/yolo_detect.py b/yolo_detect.py
@@ -0,0 +1,184 @@
+import os
+os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;tcp|max_delay;0"
+import sys
+import argparse
+import glob
+import time
+
+import cv2
+import numpy as np
+from ultralytics import YOLO
+
+# Parse user inputs
+model_path = "Standard_Model.pt"
+img_source = "usb0" # Use this for USB CAM
+# img_source = "rtsp://" # Use this for RTSP
+min_thresh = 0.50
+user_res = "1260x720"
+record = False
+source_type = "usb" # Use this for USB CAM
+# source_type = "video" # Use this for RTSP
+
+# Check if model file exists and is valid
+if (not os.path.exists(model_path)):
+    print('ERROR: Model path is invalid or model was not found. Make sure the model filename was entered correctly.')
+    sys.exit(0)
+
+# Load the model into memory and get labemap
+model = YOLO(model_path, task='detect')
+labels = model.names
+
+usb_idx = int(img_source[3:])
+
+# Parse user-specified display resolution
+resize = False
+if user_res:
+    resize = True
+    resW, resH = int(user_res.split('x')[0]), int(user_res.split('x')[1])
+
+# Check if recording is valid and set up recording
+if record:
+    # Set up recording
+    record_name = 'demo1.avi'
+    record_fps = 30
+    recorder = cv2.VideoWriter(record_name, cv2.VideoWriter_fourcc(*'MJPG'), record_fps, (resW,resH))
+
+# Load or initialize image source
+if source_type == 'video' or source_type == 'usb':
+
+    if source_type == 'video': cap_arg = img_source
+    elif source_type == 'usb': cap_arg = usb_idx
+
+    cap = cv2.VideoCapture(cap_arg)
+
+    # Set camera or video resolution if specified by user
+    if user_res:
+        ret = cap.set(3, resW)
+        ret = cap.set(4, resH)
+
+# Set bounding box colors (using the Tableu 10 color scheme)
+bbox_colors = [(164,120,87), (68,148,228), (93,97,209), (178,182,133), (88,159,106), 
+              (96,202,231), (159,124,168), (169,162,241), (98,118,150), (172,176,184)]
+
+# Initialize control and status variables
+avg_frame_rate = 0
+frame_rate_buffer = []
+fps_avg_len = 200
+img_count = 0
+
+# Begin inference loop
+while True:
+
+    t_start = time.perf_counter()
+
+    # Load frame from image source
+    if source_type == 'image' or source_type == 'folder': # If source is image or image folder, load the image using its filename
+        if img_count >= len(imgs_list):
+            print('All images have been processed. Exiting program.')
+            sys.exit(0)
+        img_filename = imgs_list[img_count]
+        frame = cv2.imread(img_filename)
+        img_count = img_count + 1
+
+    elif source_type == 'video': # If source is a video, load next frame from video file
+        ret, frame = cap.read()
+        if not ret:
+            print('Reached end of the video file. Exiting program.')
+            break
+
+    elif source_type == 'usb': # If source is a USB camera, grab frame from camera
+        ret, frame = cap.read()
+        if (frame is None) or (not ret):
+            print('Unable to read frames from the camera. This indicates the camera is disconnected or not working. Exiting program.')
+            break
+
+    elif source_type == 'picamera': # If source is a Picamera, grab frames using picamera interface
+        frame = cap.capture_array()
+        if (frame is None):
+            print('Unable to read frames from the Picamera. This indicates the camera is disconnected or not working. Exiting program.')
+            break
+
+    # Resize frame to desired display resolution
+    if resize == True:
+        frame = cv2.resize(frame,(resW,resH))
+
+    # Run inference on frame
+    results = model(frame, verbose=False)
+
+    # Extract results
+    detections = results[0].boxes
+
+    # Initialize variable for basic object counting example
+    object_count = 0
+
+    # Go through each detection and get bbox coords, confidence, and class
+    for i in range(len(detections)):
+
+        # Get bounding box coordinates
+        # Ultralytics returns results in Tensor format, which have to be converted to a regular Python array
+        xyxy_tensor = detections[i].xyxy.cpu() # Detections in Tensor format in CPU memory
+        xyxy = xyxy_tensor.numpy().squeeze() # Convert tensors to Numpy array
+        xmin, ymin, xmax, ymax = xyxy.astype(int) # Extract individual coordinates and convert to int
+
+        # Get bounding box class ID and name
+        classidx = int(detections[i].cls.item())
+        classname = labels[classidx]
+
+        # Get bounding box confidence
+        conf = detections[i].conf.item()
+
+        # Draw box if confidence threshold is high enough
+        if conf > min_thresh:
+
+            color = bbox_colors[classidx % 10]
+            cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), color, 2)
+
+            label = f'{classname}: {int(conf*100)}%'
+            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) # Get font size
+            label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
+            cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), color, cv2.FILLED) # Draw white box to put label text in
+            cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) # Draw label text
+
+            # Basic example: count the number of objects in the image
+            object_count = object_count + 1
+
+    # Calculate and draw framerate (if using video, USB, or Picamera source)
+    if source_type == 'video' or source_type == 'usb':
+        cv2.putText(frame, f'FPS: {avg_frame_rate:0.2f}', (10,20), cv2.FONT_HERSHEY_SIMPLEX, .7, (0,255,255), 2) # Draw framerate
+
+    # Display detection results
+    cv2.putText(frame, f'Number of objects: {object_count}', (10,40), cv2.FONT_HERSHEY_SIMPLEX, .7, (0,255,255), 2) # Draw total number of detected objects
+    cv2.imshow('YOLO detection results',frame) # Display image
+    if record: recorder.write(frame)
+
+    # If inferencing on individual images, wait for user keypress before moving to next image. Otherwise, wait 5ms before moving to next frame.
+    if source_type == 'video' or source_type == 'usb':
+        key = cv2.waitKey(5)
+
+    if key == ord('q') or key == ord('Q'): # Press 'q' to quit
+        break
+    elif key == ord('s') or key == ord('S'): # Press 's' to pause inference
+        cv2.waitKey()
+    elif key == ord('p') or key == ord('P'): # Press 'p' to save a picture of results on this frame
+        cv2.imwrite('capture.png',frame)
+
+    # Calculate FPS for this frame
+    t_stop = time.perf_counter()
+    frame_rate_calc = float(1/(t_stop - t_start))
+
+    # Append FPS result to frame_rate_buffer (for finding average FPS over multiple frames)
+    if len(frame_rate_buffer) >= fps_avg_len:
+        temp = frame_rate_buffer.pop(0)
+        frame_rate_buffer.append(frame_rate_calc)
+    else:
+        frame_rate_buffer.append(frame_rate_calc)
+
+    # Calculate average FPS for past frames
+    avg_frame_rate = np.mean(frame_rate_buffer)
+
+# Clean up
+print(f'Average pipeline FPS: {avg_frame_rate:.2f}')
+if source_type == 'video' or source_type == 'usb':
+    cap.release()
+if record: recorder.release()
+cv2.destroyAllWindows()