I needed more space for my business, so I moved to my big workshop space where our music studio was.
I’ve installed Yolo (v8) and generated an image using ChatGPT with many objects.
Installing Yolo:
See https://docs.ultralytics.com/quickstart/#install-ultralytics
Using below python script I get a text file with hits and an image with objectboxes.
import cv2 import random from ultralytics import YOLO # Load YOLOv8 model model = YOLO('yolov8n.pt') input_image_path = 'input.jpg' image = cv2.imread(input_image_path) def get_random_color(): return [random.randint(0, 255) for _ in range(3)] class_colors = {i: get_random_color() for i in range(len(model.names))} results = model(input_image_path) output_txt_path = 'output.txt' with open(output_txt_path, 'w') as f: for result in results: for box in result.boxes: cls = int(box.cls[0]) confidence = box.conf[0].item() bbox = box.xyxy[0].cpu().numpy() class_name = model.names[cls] # Write text file f.write(f"Class: {class_name}, Confidence: {confidence:.2f}, BBox: {bbox}\n") color = class_colors[cls] cv2.rectangle(image, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 3) # Thicker rectangle label = f'{class_name} {confidence:.2f}' font_scale = 1.0 # Larger font size font_thickness = 2 # Thicker font cv2.putText(image, label, (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, font_thickness) output_image_path = 'output_with_boxes.jpg' cv2.imwrite(output_image_path, image) print(f"Detected objects saved to {output_txt_path}") print(f"Output image with boxes saved to {output_image_path}"
Text file
Class: car, Confidence: 0.91
Class: car, Confidence: 0.90
Class: giraffe, Confidence: 0.90
Class: car, Confidence: 0.87
Class: bicycle, Confidence: 0.85
Class: person, Confidence: 0.77
Class: person, Confidence: 0.68
Class: bus, Confidence: 0.66
Class: sheep, Confidence: 0.64
Class: zebra, Confidence: 0.62
Class: umbrella, Confidence: 0.60
Class: bicycle, Confidence: 0.56
Class: umbrella, Confidence: 0.54
Class: airplane, Confidence: 0.52
Class: person, Confidence: 0.51
Class: person, Confidence: 0.48
Class: bicycle, Confidence: 0.44
Class: person, Confidence: 0.43
Class: stop sign, Confidence: 0.40
Class: umbrella, Confidence: 0.39
Class: motorcycle, Confidence: 0.39
Class: bicycle, Confidence: 0.38
Class: person, Confidence: 0.37
Class: person, Confidence: 0.35
Class: teddy bear, Confidence: 0.29
Class: truck, Confidence: 0.27
Class: airplane, Confidence: 0.26
Class: bus, Confidence: 0.25
Class: person, Confidence: 0.25
Code for real-time detection using a webcam.
from ultralytics import YOLO import cv2 import math # start webcam cap = cv2.VideoCapture(0) cap.set(3, 640) cap.set(4, 480) # model model = YOLO("yolo-Weights/yolov8n.pt") # object classes classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" ] while True: success, img = cap.read() results = model(img, stream=True) # coordinates for r in results: boxes = r.boxes for box in boxes: # bounding box x1, y1, x2, y2 = box.xyxy[0] x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values # put box in cam cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3) # confidence confidence = math.ceil((box.conf[0]*100))/100 print("Confidence --->",confidence) # class name cls = int(box.cls[0]) print("Class name -->", classNames[cls]) # object details org = [x1, y1] font = cv2.FONT_HERSHEY_SIMPLEX fontScale = 1 color = (255, 0, 0) thickness = 2 cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness) cv2.imshow('Webcam', img) if cv2.waitKey(1) == ord('q'): break cap.release() cv2.destroyAllWindows()
Note: generated picture is not perfect. See zebra. AI output is affected by this.