commit 98103806ef64feaf9feba7c27ada5526f1a8f955 Author: Joydeep Pandey Date: Mon Feb 9 12:50:10 2026 +0530 ini diff --git a/GUIDE_TRAIN_TEST_YOLO.md b/GUIDE_TRAIN_TEST_YOLO.md new file mode 100644 index 0000000..dfedae1 --- /dev/null +++ b/GUIDE_TRAIN_TEST_YOLO.md @@ -0,0 +1,112 @@ +# YOLOv8 Training & Testing Guide + +This guide details how to prepare your dataset, train the YOLOv8 model for Potholes and Road Signs, and test the trained model. + +## 1. Dataset Preparation + +YOLOv8 requires data in a specific format. + +### A. Data Structure + +Organize your dataset folder like this: + +``` +datasets/ + road_signs_potholes/ + train/ + images/ + img1.jpg + ... + labels/ + img1.txt + ... + val/ + images/ + ... + labels/ + ... + data.yaml +``` + +### B. Label Format + +Each `.txt` file in `labels/` corresponds to an image. +Format: `class_id center_x center_y width height` (normalized 0-1). + +Example `img1.txt`: + +``` +0 0.5 0.5 0.2 0.3 +1 0.1 0.1 0.05 0.1 +``` + +**Class IDs Mapping (Example):** + +- `0`: Traffic Sign +- `1`: Pothole +- `2`: Manhole + +### C. Creating `data.yaml` + +Create a `data.yaml` file inside your dataset folder (or anywhere accessible). + +```yaml +path: ../datasets/road_signs_potholes # dataset root dir +train: train/images # train images (relative to 'path') +val: val/images # val images (relative to 'path') + +# Classes +names: + 0: Traffic Sign + 1: Pothole + 2: Manhole +``` + +## 2. Training the Model + +We have provided a script `backend/models/train_yolo.py`. + +**Command:** +Open your terminal in `d:\Time-Pass-Projects\pothole-roadsign detection`. + +```bash +# Activate your environment if needed +python backend/models/train_yolo.py +``` + +_Note: You will need to edit `backend/models/train_yolo.py` slightly to point to your actual `data.yaml` path if you haven't already, or pass it as an argument if you modify the script to accept args._ + +**Training Process:** + +1. The script downloads `yolov8n.pt` (nano) as a starting point. +2. It runs for 50 epochs (adjustable). +3. **Result:** Weights are saved in `runs/detect/train/weights/best.pt`. + +## 3. Testing the Model + +Once trained, you should test it visually. + +### A. Locate your specific model + +Find `runs/detect/trainX/weights/best.pt`. +Copy this file to `backend/models/best.pt` (or update paths in scripts). + +### B. Run the Test Script + +We have created `backend/test_model.py` for quick verification. + +```bash +python backend/test_model.py +``` + +_Make sure to update the `video_path` or `image_path` in `test_model.py` to point to a real file._ + +## 4. Integration + +After you are satisfied with `best.pt`: + +1. Move `best.pt` to `backend/models/`. +2. Updates `backend/pipelines/video_processor.py` line 10: + ```python + self.yolo = YOLOManager("backend/models/best.pt") + ``` diff --git a/backend/.gitignore b/backend/.gitignore new file mode 100644 index 0000000..f9606a3 --- /dev/null +++ b/backend/.gitignore @@ -0,0 +1 @@ +/venv diff --git a/backend/README.md b/backend/README.md new file mode 100644 index 0000000..c3ea84e --- /dev/null +++ b/backend/README.md @@ -0,0 +1,43 @@ +# Pothole & Road Sign Detection Backend + +This project implements a Two-Stage Detection Pipeline using YOLOv8 (for detection & tracking) and CLIP (for zero-shot classification). + +## Setup + +1. **Install Dependencies**: + ```bash + pip install -r backend/requirements.txt + ``` +2. **Training YOLO (Crucial Step)**: + You MUST train a YOLO model on your custom dataset (Pothole, Manhole, Traffic Signs) for this to work effectively. + - Prepare your `dataset.yaml`. + - Run the training script: + ```python + from backend.models.train_yolo import train_yolo + train_yolo("path/to/dataset.yaml", epochs=50) + ``` + + - This will generate `best.pt`. Move this file to `backend/models/best.pt`. + +## Running the API + +Start the FastAPI server: + +```bash +cd backend +python main.py +``` + +The server will start at `http://0.0.0.0:8000`. + +## API Usage + +**Endpoint**: `POST /detect/video` + +- **Body**: `multipart/form-data`, key `file` (Video file). +- **Response**: JSON summary of unique objects detected. + +## Configuration + +- Modify `backend/pipelines/video_processor.py` to change the `yolo_model_path` to your trained model path (e.g., `backend/models/best.pt`). +- You can also adjust the CLA candidate labels in `VideoProcessor.__init__`. diff --git a/backend/main.py b/backend/main.py new file mode 100644 index 0000000..08e2030 --- /dev/null +++ b/backend/main.py @@ -0,0 +1,59 @@ +from fastapi import FastAPI, File, UploadFile, HTTPException +from fastapi.responses import JSONResponse +import shutil +import os +import uuid +from backend.pipelines.video_processor import VideoProcessor + +app = FastAPI(title="Pothole & Road Sign Detection API") + +# Initialize Processor (Loading models takes time, do it on startup) +# In production, use lifespan events or dependency injection +print("Initializing Video Processor...") +try: + processor = VideoProcessor() +except Exception as e: + print(f"Warning: Could not initialize processor (Check model paths). Error: {e}") + processor = None + +UPLOAD_DIR = "uploads" +os.makedirs(UPLOAD_DIR, exist_ok=True) + +@app.get("/") +def health_check(): + return {"status": "running", "models_loaded": processor is not None} + +@app.post("/detect/video") +async def detect_video(file: UploadFile = File(...)): + if processor is None: + raise HTTPException(status_code=503, detail="Models not accepted or loaded.") + + # Save uploaded file + file_id = str(uuid.uuid4()) + file_location = os.path.join(UPLOAD_DIR, f"{file_id}_{file.filename}") + + with open(file_location, "wb") as buffer: + shutil.copyfileobj(file.file, buffer) + + try: + # Run processing + start_time = os.times().elapsed + results = processor.process_video(file_location) + + # Cleanup + os.remove(file_location) + + return { + "video_id": file_id, + "processed": True, + "unique_objects": results + } + except Exception as e: + # Cleanup on error + if os.path.exists(file_location): + os.remove(file_location) + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/backend/models/clip_manager.py b/backend/models/clip_manager.py new file mode 100644 index 0000000..42f648c --- /dev/null +++ b/backend/models/clip_manager.py @@ -0,0 +1,62 @@ +from transformers import CLIPProcessor, CLIPModel +import torch +from PIL import Image + +class CLIPManager: + def __init__(self, model_id: str = "openai/clip-vit-base-patch32"): + """ + Initializes the CLIP model and processor. + + Args: + model_id (str): Hugging Face model ID. + """ + print(f"Loading CLIP model: {model_id}...") + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.model = CLIPModel.from_pretrained(model_id).to(self.device) + self.processor = CLIPProcessor.from_pretrained(model_id) + print(f"CLIP loaded on {self.device}.") + + def classify_image(self, image: Image.Image, candidate_labels: list[str]): + """ + Classifies an image against a list of text labels. + + Args: + image (PIL.Image): The cropped image to classify. + candidate_labels (list[str]): List of strings to compare against. + + Returns: + dict: {label: score} sorted by confidence. + """ + if not candidate_labels: + return {} + + inputs = self.processor(text=candidate_labels, images=image, return_tensors="pt", padding=True).to(self.device) + + with torch.no_grad(): + outputs = self.model(**inputs) + + logits_per_image = outputs.logits_per_image # this is the image-text similarity score + probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities + + # Convert to dictionary + scores = probs.cpu().numpy()[0] + result = {label: float(score) for label, score in zip(candidate_labels, scores)} + + # Sort by score descending + sorted_result = dict(sorted(result.items(), key=lambda item: item[1], reverse=True)) + return sorted_result + + def get_best_match(self, image: Image.Image, candidate_labels: list[str], threshold: float = 0.5): + """ + Returns the single best match if it exceeds the threshold. + """ + results = self.classify_image(image, candidate_labels) + if not results: + return None, 0.0 + + best_label = list(results.keys())[0] + best_score = list(results.values())[0] + + if best_score >= threshold: + return best_label, best_score + return "Uncertain", best_score diff --git a/backend/models/train_yolo.py b/backend/models/train_yolo.py new file mode 100644 index 0000000..8be0cdb --- /dev/null +++ b/backend/models/train_yolo.py @@ -0,0 +1,39 @@ +from ultralytics import YOLO + +def train_yolo(data_yaml_path: str, model_size: str = "yolov8n.pt", epochs: int = 50): + """ + Trains a YOLOv8 model on a custom dataset. + + Args: + data_yaml_path (str): Path to the dataset.yaml file. + model_size (str): Pre-trained model to start from (e.g., yolov8n.pt, yolov8s.pt). + epochs (int): Number of training epochs. + """ + print(f"Loading {model_size}...") + model = YOLO(model_size) + + print(f"Starting training for {epochs} epochs using {data_yaml_path}...") + model.train(data=data_yaml_path, epochs=epochs, imgsz=640) + + print("Training complete. Validating...") + metrics = model.val() + print(f"Validation metrics: {metrics}") + + print("Exporting model...") + path = model.export(format="onnx") + print(f"Model exported to {path}") + +if __name__ == "__main__": + # Example usage: + # Ensure you have a data.yaml file configured for your dataset + # train_yolo("path/to/data.yaml") + + # Use relative path from where user is running (backend folder) + # They are running from 'backend', so dataset is at '../datasets/...' + dataset_path = "../datasets/road_signs_potholes/data.yaml" + + # Or absolute path if needed: + # dataset_path = "d:/Time-Pass-Projects/pothole-roadsign detection/datasets/road_signs_potholes/data.yaml" + + print(f"Using dataset: {dataset_path}") + train_yolo(dataset_path, epochs=100) # Increased epochs for better results on small data diff --git a/backend/models/yolo_manager.py b/backend/models/yolo_manager.py new file mode 100644 index 0000000..f75d402 --- /dev/null +++ b/backend/models/yolo_manager.py @@ -0,0 +1,35 @@ +from ultralytics import YOLO +import cv2 +import numpy as np + +class YOLOManager: + def __init__(self, model_path: str = "yolov8n.pt"): + """ + Initializes the YOLO model for inference. + + Args: + model_path (str): Path to the trained YOLO model weights (.pt file). + """ + print(f"Loading YOLO model from {model_path}...") + self.model = YOLO(model_path) + + def track(self, frame, conf: float = 0.25, iou: float = 0.5): + """ + Runs YOLO tracking on a single frame. + + Args: + frame: Numpy array (image). + conf (float): Confidence threshold. + iou (float): IoU threshold. + + Returns: + Results object from Ultralytics. + """ + # persist=True is crucial for tracking to work across frames + results = self.model.track(frame, persist=True, conf=conf, iou=iou, tracker="bytetrack.yaml", verbose=False) + return results[0] + + def detect(self, frame): + """Standard detection without tracking.""" + results = self.model.predict(frame, verbose=False) + return results[0] diff --git a/backend/pipelines/video_processor.py b/backend/pipelines/video_processor.py new file mode 100644 index 0000000..9b56b07 --- /dev/null +++ b/backend/pipelines/video_processor.py @@ -0,0 +1,165 @@ +import cv2 +import time +from collections import defaultdict +from backend.models.yolo_manager import YOLOManager +from backend.models.clip_manager import CLIPManager +from backend.utils.image_utils import is_blurry, crop_image, convert_cv2_to_pil + +class VideoProcessor: + def __init__(self, yolo_model_path="yolov8n.pt", clip_model_id="openai/clip-vit-base-patch32"): + self.yolo = YOLOManager(yolo_model_path) + self.clip = CLIPManager(clip_model_id) + + # Buffer to store the best shot for each track ID + # Format: {track_id: {'crop': np.array, 'area': float, 'frame_idx': int, 'bbox': list}} + self.active_tracks = {} + + # Store final results + self.final_results = [] + + # CLIP Candidates + self.pothole_labels = ["pothole", "shadow", "patch work", "manhole", "road crack"] + self.sign_labels = ["stop sign", "yield sign", "speed limit 30", "speed limit 40", "speed limit 50", "speed limit 60", "pedestrian crossing", "no u-turn", "traffic light", "keep right"] + + # Frame counter + self.frame_count = 0 + + def process_video(self, video_path: str): + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + print(f"Error opening video: {video_path}") + return [] + + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + print(f"Processing video {video_path} ({width}x{height})...") + + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + + self.frame_count += 1 + + # 1. Run YOLO Tracking + results = self.yolo.track(frame) + + if results.boxes is None or results.boxes.id is None: + continue + + boxes = results.boxes.xyxy.cpu().numpy() + track_ids = results.boxes.id.cpu().numpy() + class_ids = results.boxes.cls.cpu().numpy() # 0, 1, 2 depending on training + + current_frame_ids = set() + + for box, track_id, cls in zip(boxes, track_ids, class_ids): + track_id = int(track_id) + current_frame_ids.add(track_id) + + x1, y1, x2, y2 = box + w_box = x2 - x1 + h_box = y2 - y1 + area = w_box * h_box + + # Check if this is the "best shot" so far + if track_id not in self.active_tracks: + self.active_tracks[track_id] = { + 'crop': crop_image(frame, box), + 'area': area, + 'frame_idx': self.frame_count, + 'class_id': int(cls), + 'bbox': box, + 'processed': False + } + else: + # Update if bigger area and not processed yet + if area > self.active_tracks[track_id]['area'] and not self.active_tracks[track_id]['processed']: + self.active_tracks[track_id].update({ + 'crop': crop_image(frame, box), + 'area': area, + 'frame_idx': self.frame_count, + 'bbox': box + }) + + # Trigger Classification if object is near the edge (leaving frame) + # Margin of 50 pixels + if x1 < 50 or y1 < 50 or x2 > width - 50 or y2 > height - 50: + self._classify_and_store(track_id) + + # Cleanup tracks that are no longer present (Exited frame) + # Identify tracks in self.active_tracks that are NOT in current_frame_ids + # We need to be careful not to classify already processed/removed tracks + # But here we execute the "Trigger A" + + # Simple approach: Check all active tracks. If a track was seen recently but not now, assume it left? + # Better approach for simplicity: We do classification on 'processed' flag or when finalizing. + # Real ByteTrack keeps tracks 'lost' for some frames. + # Here we will iterate existing keys and if not in current_frame, classify. + + # Cleanup tracks that are no longer present (Exited frame) + # We iterate over a copy of keys to avoid RuntimeError + for tid in list(self.active_tracks.keys()): + if tid not in current_frame_ids: + # It's gone from view (or mostly gone) + self._classify_and_store(tid) + # Remove from active tracks to save memory + if self.active_tracks[tid].get('processed'): + del self.active_tracks[tid] + + cap.release() + + # Process any remaining tracks + for tid in list(self.active_tracks.keys()): + self._classify_and_store(tid) + + print("Processing complete.") + return self.final_results + + def _classify_and_store(self, track_id): + track_data = self.active_tracks.get(track_id) + if not track_data or track_data.get('processed'): + return + + crop = track_data['crop'] + + # Blur check - if too blurry, maybe skip or mark low confidence? + # For now, we process anyway but could log it. + # if is_blurry(crop): ... + + # Prepare for CLIP + pil_image = convert_cv2_to_pil(crop) + + # Deciding which labels to use based on YOLO class + # Assuming YOLO classes: 0=Sign, 1=Pothole/Manhole (Just an example schema) + # You'd need to map this to your specific training. + # For this logic, let's try both or fallback. + + # Strategy: Classify against ALL relevant labels to be safe? + # Or split if we trust YOLO class. + # Let's trust YOLO class if available. + # For this template, I will simply check against both lists and take the max confidence one. + + candidates = self.sign_labels + self.pothole_labels + best_label, score = self.clip.get_best_match(pil_image, candidates, threshold=0.5) + + obj_type = "Traffic Sign" if best_label in self.sign_labels else "Road Damage" + + result = { + "id": track_id, + "type": obj_type, + "subtype": best_label, + "confidence": float(score), + "frame_idx": track_data['frame_idx'], + # In a real app, you might save the crop to disk and return a URL + # "crop_path": save_to_disk... + } + + self.final_results.append(result) + self.active_tracks[track_id]['processed'] = True + +if __name__ == "__main__": + # Test run + processor = VideoProcessor() + # processor.process_video("test_video.mp4") diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..39dde9d --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,11 @@ +ultralytics +transformers +torch +fastapi +uvicorn +opencv-python-headless +pillow +numpy +ftfy +regex +tqdm diff --git a/backend/runs/detect/train/args.yaml b/backend/runs/detect/train/args.yaml new file mode 100644 index 0000000..36c1d92 --- /dev/null +++ b/backend/runs/detect/train/args.yaml @@ -0,0 +1,109 @@ +task: detect +mode: train +model: yolov8n.pt +data: ../datasets/road_signs_potholes/data.yaml +epochs: 100 +time: null +patience: 100 +batch: 16 +imgsz: 640 +save: true +save_period: -1 +cache: false +device: cpu +workers: 8 +project: null +name: train +exist_ok: false +pretrained: true +optimizer: auto +verbose: true +seed: 0 +deterministic: true +single_cls: false +rect: false +cos_lr: false +close_mosaic: 10 +resume: false +amp: true +fraction: 1.0 +profile: false +freeze: null +multi_scale: 0.0 +compile: false +overlap_mask: true +mask_ratio: 4 +dropout: 0.0 +val: true +split: val +save_json: false +conf: null +iou: 0.7 +max_det: 300 +half: false +dnn: false +plots: true +end2end: null +source: null +vid_stride: 1 +stream_buffer: false +visualize: false +augment: false +agnostic_nms: false +classes: null +retina_masks: false +embed: null +show: false +save_frames: false +save_txt: false +save_conf: false +save_crop: false +show_labels: true +show_conf: true +show_boxes: true +line_width: null +format: torchscript +keras: false +optimize: false +int8: false +dynamic: false +simplify: true +opset: null +workspace: null +nms: false +lr0: 0.01 +lrf: 0.01 +momentum: 0.937 +weight_decay: 0.0005 +warmup_epochs: 3.0 +warmup_momentum: 0.8 +warmup_bias_lr: 0.1 +box: 7.5 +cls: 0.5 +dfl: 1.5 +pose: 12.0 +kobj: 1.0 +rle: 1.0 +angle: 1.0 +nbs: 64 +hsv_h: 0.015 +hsv_s: 0.7 +hsv_v: 0.4 +degrees: 0.0 +translate: 0.1 +scale: 0.5 +shear: 0.0 +perspective: 0.0 +flipud: 0.0 +fliplr: 0.5 +bgr: 0.0 +mosaic: 1.0 +mixup: 0.0 +cutmix: 0.0 +copy_paste: 0.0 +copy_paste_mode: flip +auto_augment: randaugment +erasing: 0.4 +cfg: null +tracker: botsort.yaml +save_dir: D:\Time-Pass-Projects\pothole-roadsign detection\backend\runs\detect\train diff --git a/backend/test_model.py b/backend/test_model.py new file mode 100644 index 0000000..6b15351 --- /dev/null +++ b/backend/test_model.py @@ -0,0 +1,71 @@ +from backend.models.yolo_manager import YOLOManager +import cv2 +import os + +def test_model(model_path="backend/models/best.pt", source="test_video.mp4"): + """ + Tests the YOLO model on a video or image. + """ + if not os.path.exists(model_path): + print(f"Model not found at {model_path}. Using standard yolov8n.pt for demo.") + model_path = "yolov8n.pt" + + yolo = YOLOManager(model_path) + + # Check if source is image or video + ext = os.path.splitext(source)[1].lower() + if ext in ['.jpg', '.jpeg', '.png', '.bmp']: + frame = cv2.imread(source) + if frame is None: + print(f"Could not read image: {source}") + return + + results = yolo.detect(frame) + res_plotted = results.plot() + cv2.imshow("YOLO Detection", res_plotted) + cv2.waitKey(0) + cv2.destroyAllWindows() + + else: + # Video + cap = cv2.VideoCapture(source) + if not cap.isOpened(): + print(f"Could not open video: {source}") + return + + print("Press 'q' to exit.") + while True: + ret, frame = cap.read() + if not ret: + break + + # Use 'track' or 'detect' + results = yolo.track(frame) + + # Plot results on frame + annotated_frame = results.plot() + + cv2.imshow("YOLO Tracking", annotated_frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + cap.release() + cv2.destroyAllWindows() + +if __name__ == "__main__": + # CHANGE THIS to your test file + TEST_FILE = "d:/path/to/your/test/video_or_image.jpg" + + if not os.path.exists(TEST_FILE): + if TEST_FILE == "0": + # Webcam + test_model(source=0) + else: + print(f"File {TEST_FILE} not found.") + TEST_FILE = input("Enter path to image/video (or 0 for webcam): ").strip('"') + if TEST_FILE == "0": + test_model(source=0) + else: + test_model(source=TEST_FILE) + else: + test_model(source=TEST_FILE) diff --git a/backend/utils/image_utils.py b/backend/utils/image_utils.py new file mode 100644 index 0000000..a9666d9 --- /dev/null +++ b/backend/utils/image_utils.py @@ -0,0 +1,52 @@ +import cv2 +import numpy as np +from PIL import Image + +def is_blurry(image: np.ndarray, threshold: float = 100.0) -> bool: + """ + Checks if an image is blurry using the Laplacian variance method. + + Args: + image (np.ndarray): The image to check (BGR format). + threshold (float): The variance threshold below which the image is considered blurry. + + Returns: + bool: True if blurry, False otherwise. + """ + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + variance = cv2.Laplacian(gray, cv2.CV_64F).var() + return variance < threshold + +def crop_image(frame: np.ndarray, bbox: list[float], padding_percent: float = 0.1) -> np.ndarray: + """ + Crops an image based on a bounding box with optional padding. + + Args: + frame (np.ndarray): The full image frame. + bbox (list): [x1, y1, x2, y2] + padding_percent (float): Percentage of padding to add around the box. + + Returns: + np.ndarray: The cropped image. + """ + h, w, _ = frame.shape + x1, y1, x2, y2 = bbox + + width = x2 - x1 + height = y2 - y1 + + pad_w = width * padding_percent + pad_h = height * padding_percent + + # Apply padding ensuring we stay within frame boundaries + new_x1 = max(0, int(x1 - pad_w)) + new_y1 = max(0, int(y1 - pad_h)) + new_x2 = min(w, int(x2 + pad_w)) + new_y2 = min(h, int(y2 + pad_h)) + + return frame[new_y1:new_y2, new_x1:new_x2] + +def convert_cv2_to_pil(cv2_image: np.ndarray) -> Image.Image: + """Conventional cv2 BGR to PIL RGB conversion.""" + color_converted = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB) + return Image.fromarray(color_converted) diff --git a/backend/utils/label_helper.py b/backend/utils/label_helper.py new file mode 100644 index 0000000..0c5b8f9 --- /dev/null +++ b/backend/utils/label_helper.py @@ -0,0 +1,66 @@ +import cv2 +import sys +import os + +def get_yolo_coordinates(image_path): + """ + Opens an image, lets the user draw a box, and prints the YOLO format coordinates. + """ + if not os.path.exists(image_path): + print(f"Error: File {image_path} not found.") + return + + # Load image + img = cv2.imread(image_path) + if img is None: + print("Error: Could not read image.") + return + + height, width, _ = img.shape + + print("---------------------------------------------------------") + print(f"Loaded {image_path} ({width}x{height})") + print("INSTRUCTIONS:") + print("1. Draw a box around the object using your mouse.") + print("2. Press ENTER or SPACE to confirm the box.") + print("3. Press 'c' to cancel.") + print("---------------------------------------------------------") + + # Select ROI + # fromCenter=False, showCrosshair=True + r = cv2.selectROI("Draw Box (Press Enter to Confirm)", img, fromCenter=False, showCrosshair=True) + cv2.destroyAllWindows() + + # r is (x, y, w, h) in pixels + x_pixel, y_pixel, w_pixel, h_pixel = r + + if w_pixel == 0 or h_pixel == 0: + print("No box selected.") + return + + # Convert to YOLO format (Normalized) + # center_x, center_y, width, height + + center_x = (x_pixel + (w_pixel / 2)) / width + center_y = (y_pixel + (h_pixel / 2)) / height + norm_w = w_pixel / width + norm_h = h_pixel / height + + # Limit precision to 6 decimal places + print(f"\nSUCCESS! Here is your YOLO label line:") + print(f"---------------------------------------------------------") + print(f" {center_x:.6f} {center_y:.6f} {norm_w:.6f} {norm_h:.6f}") + print(f"---------------------------------------------------------") + print("Replace with:") + print("0 -> if it is a Traffic Sign") + print("1 -> if it is a Pothole") + print("2 -> if it is a Manhole") + print(f"---------------------------------------------------------") + +if __name__ == "__main__": + if len(sys.argv) > 1: + path = sys.argv[1] + else: + path = input("Enter the path to your image: ").strip('"') + + get_yolo_coordinates(path) diff --git a/backend/yolov8n.pt b/backend/yolov8n.pt new file mode 100644 index 0000000..0db4ca4 Binary files /dev/null and b/backend/yolov8n.pt differ diff --git a/datasets/road_signs_potholes/README.md b/datasets/road_signs_potholes/README.md new file mode 100644 index 0000000..348bba9 --- /dev/null +++ b/datasets/road_signs_potholes/README.md @@ -0,0 +1,31 @@ +# Dataset Structure Guide + +This folder contains the structure required for YOLOv8 training. + +## What goes where? + +1. **Images**: + - Put your training images (80% of data) in: `train/images/` + - Put your validation images (20% of data) in: `val/images/` + - Supported formats: `.jpg`, `.png`, `.bmp`. + +2. **Labels**: + - For every image `image1.jpg`, you need a text file `image1.txt` in the corresponding `labels/` folder. + - Example: + - `train/images/road_01.jpg` + - `train/labels/road_01.txt` + +3. **data.yaml**: + - This file configures the dataset paths and class names. + - It is the entry point for the training script. + +## Label Format + +YOLO expects a `.txt` file with one line per object: +` ` + +- **class_id**: Integer (0, 1, 2...) from `data.yaml`. +- **coordinates**: Normalized between 0 and 1. + +Example: +`0 0.5 0.5 0.2 0.4` -> Class 0, centered in the middle, 20% width, 40% height. diff --git a/datasets/road_signs_potholes/data.yaml b/datasets/road_signs_potholes/data.yaml new file mode 100644 index 0000000..ffba634 --- /dev/null +++ b/datasets/road_signs_potholes/data.yaml @@ -0,0 +1,16 @@ +# Train/Val/Test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: D:/Time-Pass-Projects/pothole-roadsign detection/datasets/road_signs_potholes # dataset root dir +train: train/images +val: val/images + +# Classes +names: + 0: Traffic Sign + 1: Pothole + 2: Manhole + +# Key: +# data.yaml is the "Map" for YOLO. +# 1. It tells YOLO where to find the images for training and validation. +# 2. It tells YOLO how many classes there are and what their names are (0, 1, 2...). +# 3. YOLO reads this file first to verify everything exists. diff --git a/datasets/road_signs_potholes/train/images/40.png b/datasets/road_signs_potholes/train/images/40.png new file mode 100644 index 0000000..aa62575 Binary files /dev/null and b/datasets/road_signs_potholes/train/images/40.png differ diff --git a/datasets/road_signs_potholes/train/images/60.png b/datasets/road_signs_potholes/train/images/60.png new file mode 100644 index 0000000..5c07c96 Binary files /dev/null and b/datasets/road_signs_potholes/train/images/60.png differ diff --git a/datasets/road_signs_potholes/train/images/manhole.png b/datasets/road_signs_potholes/train/images/manhole.png new file mode 100644 index 0000000..1af9131 Binary files /dev/null and b/datasets/road_signs_potholes/train/images/manhole.png differ diff --git a/datasets/road_signs_potholes/train/images/narrow_bridge_ahead.png b/datasets/road_signs_potholes/train/images/narrow_bridge_ahead.png new file mode 100644 index 0000000..5fa162a Binary files /dev/null and b/datasets/road_signs_potholes/train/images/narrow_bridge_ahead.png differ diff --git a/datasets/road_signs_potholes/train/images/pothole1.png b/datasets/road_signs_potholes/train/images/pothole1.png new file mode 100644 index 0000000..ef641f5 Binary files /dev/null and b/datasets/road_signs_potholes/train/images/pothole1.png differ diff --git a/datasets/road_signs_potholes/train/images/pothole2.png b/datasets/road_signs_potholes/train/images/pothole2.png new file mode 100644 index 0000000..d1fd823 Binary files /dev/null and b/datasets/road_signs_potholes/train/images/pothole2.png differ diff --git a/datasets/road_signs_potholes/train/labels/40.txt b/datasets/road_signs_potholes/train/labels/40.txt new file mode 100644 index 0000000..5071501 --- /dev/null +++ b/datasets/road_signs_potholes/train/labels/40.txt @@ -0,0 +1 @@ +0 0.481481 0.488739 0.757202 0.815315 \ No newline at end of file diff --git a/datasets/road_signs_potholes/train/labels/60.txt b/datasets/road_signs_potholes/train/labels/60.txt new file mode 100644 index 0000000..3e429eb --- /dev/null +++ b/datasets/road_signs_potholes/train/labels/60.txt @@ -0,0 +1 @@ +0 0.492045 0.492081 0.979545 0.979638 \ No newline at end of file diff --git a/datasets/road_signs_potholes/train/labels/manhole.txt b/datasets/road_signs_potholes/train/labels/manhole.txt new file mode 100644 index 0000000..564872a --- /dev/null +++ b/datasets/road_signs_potholes/train/labels/manhole.txt @@ -0,0 +1 @@ +2 0.485294 0.511561 0.858289 0.734104 \ No newline at end of file diff --git a/datasets/road_signs_potholes/train/labels/narrow_bridge_ahead.txt b/datasets/road_signs_potholes/train/labels/narrow_bridge_ahead.txt new file mode 100644 index 0000000..62333f4 --- /dev/null +++ b/datasets/road_signs_potholes/train/labels/narrow_bridge_ahead.txt @@ -0,0 +1 @@ +0 0.470684 0.442231 0.700326 0.749004 \ No newline at end of file diff --git a/datasets/road_signs_potholes/train/labels/pothole1.txt b/datasets/road_signs_potholes/train/labels/pothole1.txt new file mode 100644 index 0000000..23490f7 --- /dev/null +++ b/datasets/road_signs_potholes/train/labels/pothole1.txt @@ -0,0 +1 @@ +1 0.511981 0.741117 0.321086 0.172589 \ No newline at end of file diff --git a/datasets/road_signs_potholes/train/labels/pothole2.txt b/datasets/road_signs_potholes/train/labels/pothole2.txt new file mode 100644 index 0000000..8d766ac --- /dev/null +++ b/datasets/road_signs_potholes/train/labels/pothole2.txt @@ -0,0 +1 @@ +1 0.679221 0.808594 0.454545 0.296875 \ No newline at end of file diff --git a/datasets/road_signs_potholes/val/images/road_003.jpg b/datasets/road_signs_potholes/val/images/road_003.jpg new file mode 100644 index 0000000..e69de29 diff --git a/datasets/road_signs_potholes/val/labels/road_003.txt b/datasets/road_signs_potholes/val/labels/road_003.txt new file mode 100644 index 0000000..a98593d --- /dev/null +++ b/datasets/road_signs_potholes/val/labels/road_003.txt @@ -0,0 +1 @@ +0 0.4 0.4 0.15 0.15