Busy building my new workspace but meanwhile I am playing with machine learning

Last Updated or created 2024-09-25

I needed more space for my business, so I moved to my big workshop space where our music studio was.

I’ve installed Yolo (v8) and generated an image using ChatGPT with many objects.

Installing Yolo:
See https://docs.ultralytics.com/quickstart/#install-ultralytics

Generated image

Using below python script I get a text file with hits and an image with objectboxes.

import cv2
import random
from ultralytics import YOLO
# Load YOLOv8 model
model = YOLO('yolov8n.pt')  
input_image_path = 'input.jpg'
image = cv2.imread(input_image_path)
def get_random_color():
    return [random.randint(0, 255) for _ in range(3)]
class_colors = {i: get_random_color() for i in range(len(model.names))}
results = model(input_image_path)
output_txt_path = 'output.txt'
with open(output_txt_path, 'w') as f:
    for result in results:
        for box in result.boxes:
            cls = int(box.cls[0])  
            confidence = box.conf[0].item() 
            bbox = box.xyxy[0].cpu().numpy()
            class_name = model.names[cls]
            # Write text file
            f.write(f"Class: {class_name}, Confidence: {confidence:.2f}, BBox: {bbox}\n")
            color = class_colors[cls]
            cv2.rectangle(image, 
                          (int(bbox[0]), int(bbox[1])), 
                          (int(bbox[2]), int(bbox[3])), 
                          color, 3)  # Thicker rectangle
            label = f'{class_name} {confidence:.2f}'
            font_scale = 1.0  # Larger font size
            font_thickness = 2  # Thicker font
            cv2.putText(image, 
                        label, 
                        (int(bbox[0]), int(bbox[1]) - 10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 
                        font_scale, color, font_thickness)
output_image_path = 'output_with_boxes.jpg'
cv2.imwrite(output_image_path, image)
print(f"Detected objects saved to {output_txt_path}")
print(f"Output image with boxes saved to {output_image_path}"

Text file

Class: car, Confidence: 0.91
Class: car, Confidence: 0.90
Class: giraffe, Confidence: 0.90
Class: car, Confidence: 0.87
Class: bicycle, Confidence: 0.85
Class: person, Confidence: 0.77
Class: person, Confidence: 0.68
Class: bus, Confidence: 0.66
Class: sheep, Confidence: 0.64
Class: zebra, Confidence: 0.62
Class: umbrella, Confidence: 0.60
Class: bicycle, Confidence: 0.56
Class: umbrella, Confidence: 0.54
Class: airplane, Confidence: 0.52
Class: person, Confidence: 0.51
Class: person, Confidence: 0.48
Class: bicycle, Confidence: 0.44
Class: person, Confidence: 0.43
Class: stop sign, Confidence: 0.40
Class: umbrella, Confidence: 0.39
Class: motorcycle, Confidence: 0.39
Class: bicycle, Confidence: 0.38
Class: person, Confidence: 0.37
Class: person, Confidence: 0.35
Class: teddy bear, Confidence: 0.29
Class: truck, Confidence: 0.27
Class: airplane, Confidence: 0.26
Class: bus, Confidence: 0.25
Class: person, Confidence: 0.25

Code for real-time detection using a webcam.

from ultralytics import YOLO
import cv2
import math 
# start webcam
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)
# model
model = YOLO("yolo-Weights/yolov8n.pt")
# object classes
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]
while True:
    success, img = cap.read()
    results = model(img, stream=True)
    # coordinates
    for r in results:
        boxes = r.boxes
        for box in boxes:
            # bounding box
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values
            # put box in cam
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
            # confidence
            confidence = math.ceil((box.conf[0]*100))/100
            print("Confidence --->",confidence)
            # class name
            cls = int(box.cls[0])
            print("Class name -->", classNames[cls])
            # object details
            org = [x1, y1]
            font = cv2.FONT_HERSHEY_SIMPLEX
            fontScale = 1
            color = (255, 0, 0)
            thickness = 2
            cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness)
    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

Note: generated picture is not perfect. See zebra. AI output is affected by this.

Spread the love

Leave a Reply

Your email address will not be published. Required fields are marked *