yolov5数据增强代码解读 yolov5数据增强代码解读

yolov5中用到的数据增强方法：
self.mosaic 启用马赛克增强
self.mosaic_border = [-img_size // 2, -img_size // 2] 马赛克扩充
albumentations.Blur(p=0.1), 用一个随机尺寸的核来模糊图片
albumentations.MedianBlur(p=0.1), 使用中值滤波
albumentations.ToGray(p=0.01)], 转化成灰度图
HSV color-space HSV空间的数据增强，包括（H色彩，S饱和度，B亮度）
img = np.flipud(img) 上下翻转
img = np.fliplr(img) 水平翻转
mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1))) Mixup图像混叠增广
在utils/datasets.py里

class LoadImagesAndLabels(Dataset):# for training/testing def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''): self.img_size = img_size self.augment = augment##是否采用数据增强 self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect self.mosaic = self.augment and not self.rect# 是否启用马赛克增强 self.mosaic_border = [-img_size // 2, -img_size // 2]##是否启用马赛克扩充 self.stride = stride self.path = path self.albumentations = Albumentations() if augment else None##如果数据增强，用pytorch自带的Albumentations()进行数据增强 ... ... def __getitem__(self, index): index = self.indices[index]# linear, shuffled, or image_weightshyp = self.hyp mosaic = self.mosaic and random.random() < hyp['mosaic'] if mosaic: # Load mosaic img, labels = load_mosaic(self, index) shapes = None# MixUp augmentation if random.random() < hyp['mixup']: img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1)))else: # Load image img, (h0, w0), (h, w) = load_image(self, index)# Letterbox shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size# final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad)# for COCO mAP rescalinglabels = self.labels[index].copy() if labels.size:# normalized xywh to pixel xyxy format labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])if self.augment: img, labels = random_perspective(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear'], perspective=hyp['perspective'])nl = len(labels)# number of labels if nl: labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3)if self.augment: # Albumentations img, labels = self.albumentations(img, labels)##此处调用utils中的augmentation中的数据增强# HSV color-space augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])# Flip up-down if random.random() < hyp['flipud']: img = np.flipud(img) if nl: labels[:, 2] = 1 - labels[:, 2]# Flip left-right if random.random() < hyp['fliplr']: img = np.fliplr(img) if nl: labels[:, 1] = 1 - labels[:, 1]# Cutouts # labels = cutout(img, labels, p=0.5)labels_out = torch.zeros((nl, 6)) if nl: labels_out[:, 1:] = torch.from_numpy(labels)# Convert img = img.transpose((2, 0, 1))[::-1]# HWC to CHW, BGR to RGB img = np.ascontiguousarray(img)return torch.from_numpy(img), labels_out, self.img_files[index], shapes

utils中的augmentation.py

class Albumentations: # YOLOv5 Albumentations class (optional, only used if package is installed) def __init__(self): self.transform = None try: import albumentations as A##需要安装albumentations check_version(A.__version__, '1.0.3')# version requirementself.transform = A.Compose([ A.Blur(p=0.1), A.MedianBlur(p=0.1), A.ToGray(p=0.01)], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))##这里只用到了A.Blur，A.MedianBlur，A.ToGray三种数据增强logging.info(colorstr('albumentations: ') + ', '.join(f'{ x}' for x in self.transform.transforms if x.p)) except ImportError:# package not installed, skip pass except Exception as e: logging.info(colorstr('albumentations: ') + f'{ e}')def __call__(self, im, labels, p=1.0): if self.transform and random.random() < p: new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0])# transformed im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])]) return im, labels

train_transform = albumentations.Compose([ albumentations.Resize(RESIZE_SIZE, RESIZE_SIZE), albumentations.OneOf([ albumentations.RandomGamma(gamma_limit=(60, 120), p=0.9), albumentations.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.9), albumentations.CLAHE(clip_limit=4.0, tile_grid_size=(4, 4), p=0.9), ]), albumentations.OneOf([ albumentations.Blur(blur_limit=4, p=1), albumentations.MotionBlur(blur_limit=4, p=1), albumentations.MedianBlur(blur_limit=4, p=1) ], p=0.5), albumentations.HorizontalFlip(p=0.5), albumentations.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=20, interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_CONSTANT, p=1), albumentations.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=https://www.it610.com/article/255.0, p=1.0) ])def mixup(im, labels, im2, labels2): # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf r = np.random.beta(32.0, 32.0)# mixup ratio, alpha=beta=32.0 im = (im * r + im2 * (1 - r)).astype(np.uint8) labels = np.concatenate((labels, labels2), 0) return im, labels Resize就是拉伸图片修改尺寸 RandomGamma就是使用gamma变换 RandomBrightnessContrast就是随机选择图片的对比度和亮度 CLAHE是一种对比度受限情况下的自适应直方图均衡化算法 blur就是用一个随机尺寸的核来模糊图片，这个尺寸小于等于blur_limit motion blur和上面一样，随机的核，然后使用运动模糊算法来图例图片，运动模糊简单的说就是因为摄像机运动造成的那种模糊的动感。 medianBlur就是使用中值滤波。 HorizontalFlip水平翻转 ShiftScaleRotate这个就是平移缩放旋转三个一，给力！ Normalize这个就是图像归一化了。

想要修改yolov5中离线增强数据的方法参考：https://cloud.tencent.com/developer/article/1660972
yolov5中的矩形训练

文章图片

可以看到yolov5会对图片进行填充，填充为正方形从而传入网络进行训练，可以看到这里面有很多冗余的信息，会让网络产生很多无意义的候选框，矩形训练就是减少这些冗余信息，减少网络产生的无意义的框的数量，加快网络训练速度。yolov5网络的总步长为32，所以其实只要图片边长能够整除32就可以了，不一定完全需要正方形图片传入网络，矩形训练就是将图片填充为最小的32的倍数边长，从而减小冗余信息。

文章图片
值得一提的是，除了矩形训练，还有矩形推理，也就是在做检测的时候也这样填充，从而加快推理速度，减少推理时间。
【yolov5数据增强代码解读】参考：https://blog.csdn.net/Q1u1NG/article/details/107362572
Mixup图像混叠增广参考：https://blog.csdn.net/zandaoguang/article/details/108525787