mirror of
https://github.com/NanjingForestryUniversity/supermachine--tomato-passion_fruit.git
synced 2025-11-08 22:34:00 +00:00
186 lines
7.9 KiB
Python
186 lines
7.9 KiB
Python
import os
|
||
import sys
|
||
from pathlib import Path
|
||
import torch
|
||
FILE = Path(__file__).resolve()
|
||
ROOT = FILE.parents[1] # YOLOv5 root directory
|
||
if str(ROOT) not in sys.path:
|
||
sys.path.append(str(ROOT)) # add ROOT to PATH
|
||
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
|
||
from models.common import DetectMultiBackend
|
||
from utils.general import check_img_size, non_max_suppression, scale_boxes
|
||
from utils.segment.general import process_mask, process_mask_native
|
||
from utils.torch_utils import select_device, smart_inference_mode
|
||
import cv2
|
||
import numpy as np
|
||
from config import Config as setting
|
||
|
||
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
|
||
# This function remains the same as defined previously
|
||
shape = img.shape[:2] # current shape [height, width]
|
||
if isinstance(new_shape, int):
|
||
new_shape = (new_shape, new_shape)
|
||
|
||
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
||
if not scaleup:
|
||
r = min(r, 1.0)
|
||
|
||
ratio = r, r
|
||
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
||
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
|
||
if auto:
|
||
dw, dh = np.mod(dw, stride), np.mod(dh, stride)
|
||
elif scaleFill:
|
||
dw, dh = 0.0, 0.0
|
||
new_unpad = new_shape
|
||
ratio = new_shape[0] / shape[1], new_shape[1] / shape[0]
|
||
|
||
dw /= 2
|
||
dh /= 2
|
||
|
||
if shape[::-1] != new_unpad:
|
||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
||
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
|
||
|
||
return img, ratio, (dw, dh)
|
||
def process_image(img0, img_size=640, stride=32, auto=True):
|
||
"""
|
||
Processes an image by resizing and padding it to the required size.
|
||
|
||
Args:
|
||
- img0 (np.array): Original image as a numpy array.
|
||
- img_size (int, optional): Desired size of the image. Defaults to 640.
|
||
- stride (int, optional): Stride size for padding. Defaults to 32.
|
||
- auto (bool, optional): If True, automatically adjusts padding to meet stride requirements. Defaults to True.
|
||
|
||
Returns:
|
||
- np.array: The processed image ready for model input.
|
||
"""
|
||
# Resize and pad the image
|
||
im, _, _ = letterbox(img0, new_shape=img_size, stride=stride, auto=auto)
|
||
|
||
# Convert image from HWC to CHW format and from BGR to RGB
|
||
im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
|
||
im = np.ascontiguousarray(im)
|
||
|
||
return im
|
||
|
||
class TOSEG:
|
||
def __init__(self, weights=Path(setting.toseg_weights), device='', dnn=False, data=None, half=False, imgsz=(640, 640)):
|
||
self.device = select_device(device)
|
||
self.model = DetectMultiBackend(weights, device=self.device, dnn=dnn, data=data, fp16=half)
|
||
self.stride, self.names, self.pt = self.model.stride, self.model.names, self.model.pt
|
||
self.imgsz = check_img_size(imgsz, s=self.stride)
|
||
self.model.warmup(imgsz=(1 if self.pt else 1, 3, *self.imgsz))
|
||
|
||
@smart_inference_mode()
|
||
#返回结果图像
|
||
# def visualize(self, image, results, line_thickness=3, hide_labels=False, hide_conf=False):
|
||
# annotator = Annotator(image, line_width=line_thickness)
|
||
#
|
||
# # 获取原始图像尺寸
|
||
# h, w = image.shape[:2]
|
||
#
|
||
# # 准备 im_gpu 参数
|
||
# im_gpu = torch.as_tensor(image, dtype=torch.float16, device=self.device).permute(2, 0, 1).flip(
|
||
# 0).contiguous() / 255
|
||
#
|
||
# for r in results:
|
||
# box = r['xyxy']
|
||
# leaf = r['leaf']
|
||
# label = None if hide_labels else (r['label'] if hide_conf else f"{r['label']} {r['conf']:.2f}")
|
||
# annotator.box_label(box, label, color=colors(r['cls'], True))
|
||
#
|
||
# # 确保 leaf 是正确的格式并调整大小
|
||
# if isinstance(leaf, np.ndarray):
|
||
# leaf = torch.from_numpy(leaf).to(self.device)
|
||
# elif isinstance(leaf, list):
|
||
# leaf = torch.tensor(leaf, device=self.device)
|
||
#
|
||
# # 如果 leaf 是 2D,添加批次维度
|
||
# if leaf.ndim == 2:
|
||
# leaf = leaf.unsqueeze(0)
|
||
#
|
||
# # 调整掩码大小以匹配原始图像
|
||
# leaf = torch.nn.functional.interpolate(leaf.unsqueeze(1).float(), size=(h, w), mode='bilinear',
|
||
# align_corners=False).squeeze(1)
|
||
#
|
||
# annotator.masks(leaf, colors=[colors(r['cls'], True)], im_gpu=im_gpu)
|
||
#
|
||
# return annotator.result()
|
||
#返回掩码图像
|
||
def visualize(self, image, results, line_thickness=3, hide_labels=False, hide_conf=False):
|
||
# 创建一个全白的背景图像
|
||
background = np.zeros_like(image) * 255 # 将背景设置为白色
|
||
|
||
h, w = image.shape[:2] # 获取图像尺寸
|
||
|
||
for r in results:
|
||
mask = r['leaf']
|
||
if isinstance(mask, np.ndarray):
|
||
mask = torch.from_numpy(mask).to(self.device)
|
||
elif isinstance(mask, list):
|
||
mask = torch.tensor(mask, device=self.device)
|
||
|
||
if mask.ndim == 2:
|
||
mask = mask.unsqueeze(0)
|
||
|
||
# 调整掩码大小以匹配原始图像
|
||
mask = torch.nn.functional.interpolate(mask.unsqueeze(1).float(), size=(h, w), mode='bilinear',
|
||
align_corners=False).squeeze(1)
|
||
|
||
# 将遮罩应用于背景
|
||
black_mask = (mask.cpu().numpy() > 0.5) # 创建一个黑色遮罩
|
||
for i in range(3): # 对每个颜色通道进行操作
|
||
background[:, :, i] = np.where(black_mask, 255, background[:, :, i]) # 在遮罩区域应用黑色
|
||
|
||
return background
|
||
|
||
def predict(self, source, conf_thres=0.25, iou_thres=0.45, max_det=1000, classes=None,
|
||
agnostic_nms=False, augment=False, retina_masks=False):
|
||
# dataset = LoadImages(source, img_size=self.imgsz, stride=self.stride, auto=self.pt)
|
||
|
||
|
||
# for path, im, im0s, vid_cap, s in dataset:
|
||
im0s = source
|
||
im = process_image(im0s, img_size=self.imgsz, stride=self.stride, auto=self.pt)
|
||
im = torch.from_numpy(im).to(self.model.device)
|
||
im = im.half() if self.model.fp16 else im.float() # uint8 to fp16/32
|
||
im /= 255 # 0 - 255 to 0.0 - 1.0
|
||
if len(im.shape) == 3:
|
||
im = im[None] # expand for batch dim
|
||
|
||
pred, proto = self.model(im, augment=augment)[:2]
|
||
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, nm=32)
|
||
|
||
results = []
|
||
for i, det in enumerate(pred): # per image
|
||
if len(det):
|
||
if retina_masks:
|
||
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0s.shape).round()
|
||
masks = process_mask_native(proto[i], det[:, 6:], det[:, :4], im0s.shape[:2])
|
||
else:
|
||
masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True)
|
||
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0s.shape).round()
|
||
|
||
for j, (*xyxy, conf, cls) in enumerate(reversed(det[:, :6])):
|
||
c = int(cls)
|
||
label = f'{self.names[c]} {conf:.2f}'
|
||
mask = masks[j]
|
||
results.append({
|
||
'xyxy': xyxy,
|
||
'conf': conf,
|
||
'cls': c,
|
||
'label': label,
|
||
'leaf': mask
|
||
})
|
||
|
||
return results, im0s
|
||
|
||
def toseg(self, img):
|
||
results, image = self.predict(img)
|
||
vaa = self.visualize(image, results)
|
||
mask = cv2.cvtColor(vaa, cv2.COLOR_RGB2GRAY)
|
||
return mask |