Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added Standard_Model.pt
Binary file not shown.
184 changes: 184 additions & 0 deletions yolo_detect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
import os
os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;tcp|max_delay;0"
import sys
import argparse
import glob
import time

import cv2
import numpy as np
from ultralytics import YOLO

# Parse user inputs
model_path = "Standard_Model.pt"
img_source = "usb0" # Use this for USB CAM
# img_source = "rtsp://" # Use this for RTSP
min_thresh = 0.50
user_res = "1260x720"
record = False
source_type = "usb" # Use this for USB CAM
# source_type = "video" # Use this for RTSP

# Check if model file exists and is valid
if (not os.path.exists(model_path)):
print('ERROR: Model path is invalid or model was not found. Make sure the model filename was entered correctly.')
sys.exit(0)

# Load the model into memory and get labemap
model = YOLO(model_path, task='detect')
labels = model.names

usb_idx = int(img_source[3:])

# Parse user-specified display resolution
resize = False
if user_res:
resize = True
resW, resH = int(user_res.split('x')[0]), int(user_res.split('x')[1])

# Check if recording is valid and set up recording
if record:
# Set up recording
record_name = 'demo1.avi'
record_fps = 30
recorder = cv2.VideoWriter(record_name, cv2.VideoWriter_fourcc(*'MJPG'), record_fps, (resW,resH))

# Load or initialize image source
if source_type == 'video' or source_type == 'usb':

if source_type == 'video': cap_arg = img_source
elif source_type == 'usb': cap_arg = usb_idx

cap = cv2.VideoCapture(cap_arg)

# Set camera or video resolution if specified by user
if user_res:
ret = cap.set(3, resW)
ret = cap.set(4, resH)

# Set bounding box colors (using the Tableu 10 color scheme)
bbox_colors = [(164,120,87), (68,148,228), (93,97,209), (178,182,133), (88,159,106),
(96,202,231), (159,124,168), (169,162,241), (98,118,150), (172,176,184)]

# Initialize control and status variables
avg_frame_rate = 0
frame_rate_buffer = []
fps_avg_len = 200
img_count = 0

# Begin inference loop
while True:

t_start = time.perf_counter()

# Load frame from image source
if source_type == 'image' or source_type == 'folder': # If source is image or image folder, load the image using its filename
if img_count >= len(imgs_list):
print('All images have been processed. Exiting program.')
sys.exit(0)
img_filename = imgs_list[img_count]
frame = cv2.imread(img_filename)
img_count = img_count + 1

elif source_type == 'video': # If source is a video, load next frame from video file
ret, frame = cap.read()
if not ret:
print('Reached end of the video file. Exiting program.')
break

elif source_type == 'usb': # If source is a USB camera, grab frame from camera
ret, frame = cap.read()
if (frame is None) or (not ret):
print('Unable to read frames from the camera. This indicates the camera is disconnected or not working. Exiting program.')
break

elif source_type == 'picamera': # If source is a Picamera, grab frames using picamera interface
frame = cap.capture_array()
if (frame is None):
print('Unable to read frames from the Picamera. This indicates the camera is disconnected or not working. Exiting program.')
break

# Resize frame to desired display resolution
if resize == True:
frame = cv2.resize(frame,(resW,resH))

# Run inference on frame
results = model(frame, verbose=False)

# Extract results
detections = results[0].boxes

# Initialize variable for basic object counting example
object_count = 0

# Go through each detection and get bbox coords, confidence, and class
for i in range(len(detections)):

# Get bounding box coordinates
# Ultralytics returns results in Tensor format, which have to be converted to a regular Python array
xyxy_tensor = detections[i].xyxy.cpu() # Detections in Tensor format in CPU memory
xyxy = xyxy_tensor.numpy().squeeze() # Convert tensors to Numpy array
xmin, ymin, xmax, ymax = xyxy.astype(int) # Extract individual coordinates and convert to int

# Get bounding box class ID and name
classidx = int(detections[i].cls.item())
classname = labels[classidx]

# Get bounding box confidence
conf = detections[i].conf.item()

# Draw box if confidence threshold is high enough
if conf > min_thresh:

color = bbox_colors[classidx % 10]
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), color, 2)

label = f'{classname}: {int(conf*100)}%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), color, cv2.FILLED) # Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) # Draw label text

# Basic example: count the number of objects in the image
object_count = object_count + 1

# Calculate and draw framerate (if using video, USB, or Picamera source)
if source_type == 'video' or source_type == 'usb':
cv2.putText(frame, f'FPS: {avg_frame_rate:0.2f}', (10,20), cv2.FONT_HERSHEY_SIMPLEX, .7, (0,255,255), 2) # Draw framerate

# Display detection results
cv2.putText(frame, f'Number of objects: {object_count}', (10,40), cv2.FONT_HERSHEY_SIMPLEX, .7, (0,255,255), 2) # Draw total number of detected objects
cv2.imshow('YOLO detection results',frame) # Display image
if record: recorder.write(frame)

# If inferencing on individual images, wait for user keypress before moving to next image. Otherwise, wait 5ms before moving to next frame.
if source_type == 'video' or source_type == 'usb':
key = cv2.waitKey(5)

if key == ord('q') or key == ord('Q'): # Press 'q' to quit
break
elif key == ord('s') or key == ord('S'): # Press 's' to pause inference
cv2.waitKey()
elif key == ord('p') or key == ord('P'): # Press 'p' to save a picture of results on this frame
cv2.imwrite('capture.png',frame)

# Calculate FPS for this frame
t_stop = time.perf_counter()
frame_rate_calc = float(1/(t_stop - t_start))

# Append FPS result to frame_rate_buffer (for finding average FPS over multiple frames)
if len(frame_rate_buffer) >= fps_avg_len:
temp = frame_rate_buffer.pop(0)
frame_rate_buffer.append(frame_rate_calc)
else:
frame_rate_buffer.append(frame_rate_calc)

# Calculate average FPS for past frames
avg_frame_rate = np.mean(frame_rate_buffer)

# Clean up
print(f'Average pipeline FPS: {avg_frame_rate:.2f}')
if source_type == 'video' or source_type == 'usb':
cap.release()
if record: recorder.release()
cv2.destroyAllWindows()