0 前言


那么什么是数据增强呢?Data Augmentation ,基于有限的数据生成更多等价(同样有效)的数据,丰富训练数据的分布,使通过训练集得到的模型泛化能力更强。

数据增强可以分为两类,离线增强和在线增强。离线增强 : 直接对数据集进行处理,数据的数目会变成增强因子乘以原数据集的数目,这种方法常常用于数据集很小的时候。在线增强 : 这种增强的方法用于,获得 batch 数据之后,然后对这个 batch 的数据进行增强,如旋转、平移、翻折等相应的变化,由于有些数据集不能接受线性级别的增长,这种方法长用于大的数据集,很多机器学习框架已经支持了这种数据增强方式,并且可以使用 GPU 优化计算。




1 数据增强的实现


以下代码中的标注框均是四点标注的四边形,非水平矩形框。按照左上、右上、右下、左下的顺序排列四点,即顺时针方向,八个坐标点:x_{0},\: y_{0},\: x_{1},\: y_{1},\: x_{2},\: y_{2},\: x_{3},\: y_{3}

1.1 贴背景


def add_background_randomly(image, background, box_list=[]):    """    box_list = [(cls_type_0, rect_0), (cls_type_1, rect_1), ... , (cls_type_n, rect_n)]    rect = [x0, y0, x1, y1, x2, y2, x3, y3]    left_top = (x0, y0), right_top = (x1, y1), right_bottom = (x2, y2), left_bottom = (x3, y3)    """    img_height, img_width = image.shape[:2]    bg_height, bg_width = background.shape[:2]        # resize image smaller to background    # the image accounts for at least two-thirds and not more than four-fifths    min_size = min(bg_height, bg_width) // 3 * 2    max_size = min(bg_height, bg_width) // 5 * 4    new_size = random.randint(min_size, max_size)    resize_multiple = round(new_size / max(img_height, img_width), 4)    # image = image.resize((int(img_width * resize_multiple), int(img_height * resize_multiple)), Image.ANTIALIAS)    image = cv2.resize(image, (int(img_width * resize_multiple), int(img_height * resize_multiple)))    img_height, img_width = image.shape[:2]    # paste the image to the background    # height_pos = random.randint((bg_height-img_height)//3, (bg_height-img_height)//3*2)    # width_pos = random.randint((bg_width-img_width)//3, (bg_width-img_width)//3*2)    height_pos = random.randint(0, (bg_height-img_height))    width_pos = random.randint(0, (bg_width-img_width))    background[height_pos:(height_pos+img_height), width_pos:(width_pos+img_width)] = image    img_height, img_width = background.shape[:2]    # calculate the boxes after adding background    new_box_list = []    for cls_type, rect in box_list:        for coor_index in range(len(rect)//2):            # resize            rect[coor_index*2] = int(rect[coor_index*2] * resize_multiple)      # x            rect[coor_index*2+1] = int(rect[coor_index*2+1] * resize_multiple)  # y            # paste            rect[coor_index*2] += width_pos                                     # x            rect[coor_index*2+1] += height_pos                                  # y            # limite            rect[coor_index*2] = max(min(rect[coor_index*2], img_width), 0)     # x            rect[coor_index*2+1] = max(min(rect[coor_index*2+1], img_height), 0)# y        box = (cls_type, rect)        new_box_list.append(box)    image_with_boxes = [background, new_box_list]    return image_with_boxes


1.2 随机旋转


def rotate_image(image, label_box_list=[], angle=90, color=(0, 0, 0), img_scale=1.0):    """    rotate with angle, background filled with color, default black (0, 0, 0)    label_box = (cls_type, box)    box = [x0, y0, x1, y1, x2, y2, x3, y3]    """    # grab the rotation matrix (applying the negative of the angle to rotate clockwise),     # then grab the sine and cosine (i.e., the rotation components of the matrix)    # if angle < 0, counterclockwise rotation; if angle > 0, clockwise rotation    # 1.0 - scale, to adjust the size scale (image scaling parameter), recommended 0.75    height_ori, width_ori = image.shape[:2]    x_center_ori, y_center_ori = (width_ori // 2, height_ori // 2)    rotation_matrix = cv2.getRotationMatrix2D((x_center_ori, y_center_ori), angle, img_scale)    cos = np.abs(rotation_matrix[0, 0])    sin = np.abs(rotation_matrix[0, 1])    # compute the new bounding dimensions of the image    width_new = int((height_ori * sin) + (width_ori * cos))    height_new = int((height_ori * cos) + (width_ori * sin))    # adjust the rotation matrix to take into account translation    rotation_matrix[0, 2] += (width_new / 2) - x_center_ori    rotation_matrix[1, 2] += (height_new / 2) - y_center_ori    # perform the actual rotation and return the image    # borderValue - color to fill missing background, default black, customizable    image_new = cv2.warpAffine(image, rotation_matrix, (width_new, height_new), borderValue=color)    # each point coordinates    angle = angle / 180 * math.pi    box_rot_list = cal_rotate_box(label_box_list, angle, (x_center_ori, y_center_ori), (width_new//2, height_new//2))    box_new_list = []    for cls_type, box_rot in box_rot_list:        for index in range(len(box_rot)//2):            box_rot[index*2] = int(box_rot[index*2])            box_rot[index*2] = max(min(box_rot[index*2], width_new), 0)            box_rot[index*2+1] = int(box_rot[index*2+1])            box_rot[index*2+1] = max(min(box_rot[index*2+1], height_new), 0)        box_new_list.append((cls_type, box_rot))    image_with_boxes = [image_new, box_new_list]    return image_with_boxesdef cal_rotate_box(box_list, angle, ori_center, new_center):    # box = [x0, y0, x1, y1, x2, y2, x3, y3]    # image_shape - [width, height]    box_list_new = []    for (cls_type, box) in box_list:        box_new = []        for index in range(len(box)//2):            box_new.extend(cal_rotate_coordinate(box[index*2], box[index*2+1], angle, ori_center, new_center))        label_box = (cls_type, box_new)        box_list_new.append(label_box)    return box_list_newdef cal_rotate_coordinate(x_ori, y_ori, angle, ori_center, new_center):    # box = [x0, y0, x1, y1, x2, y2, x3, y3]    # image_shape - [width, height]    x_0 = x_ori - ori_center[0]    y_0 = ori_center[1] - y_ori    x_new = x_0 * math.cos(angle) - y_0 * math.sin(angle) + new_center[0]    y_new = new_center[1] - (y_0 * math.cos(angle) + x_0 * math.sin(angle))    return (x_new, y_new)


1.3 随机色调变换


def hue_change(image):    if np.random.rand() < 0.8: image = transforms.ColorJitter(brightness=0.5)(image)    if np.random.rand() < 0.2: image = transforms.ColorJitter(contrast=0.2)(image)    if np.random.rand() < 0.2: image = transforms.ColorJitter(saturation=0.2)(image)    if np.random.rand() < 0.2: image = transforms.ColorJitter(hue=0.2)(image)    return image


1.4 随机透视变换


def perspective_tranform(image, perspective_rate=0.5, label_box_list=[]):    # perspective transform    img_height, img_width = image.shape[:2]    # points_src = np.float32([[rect[0], rect[1]], [rect[2], rect[3]], [rect[4], rect[5]], [rect[6], rect[7]]])    points_src = np.float32([[0, 0], [img_width-1, 0], [img_width-1, img_height-1], [0, img_height-1]])    max_width = int(img_width * (1.0 + perspective_rate))    max_height = int(img_height * (1.0 + perspective_rate))    min_width = int(img_width * (1.0 - perspective_rate))    min_height = int(img_height * (1.0 + perspective_rate))    delta_width = (max_width - min_width) // 2    delta_height = (max_height - min_height) // 2    x0 = random.randint(0, delta_width)    y0 = random.randint(0, delta_height)    x1 = random.randint(delta_width + min_width, max_width)    y1 = random.randint(0, delta_height)    x2 = random.randint(delta_width + min_width, max_width)    y2 = random.randint(delta_height + min_height, max_height)    x3 = random.randint(0, delta_width)    y3 = random.randint(delta_height + min_height, max_height)    points_dst = np.float32([[x0, y0], [x1, y1], [x2, y2], [x3, y3]])    # width_new = max(x0, x1, x2, x3) - min(x0, x1, x2, x3)    # height_new = max(y0, y1, y2, y3) - min(y0, y1, y2, y3)    M = cv2.getPerspectiveTransform(points_src, points_dst)    image_res = cv2.warpPerspective(image, M, (max_width, max_height))    # cut    image_new = image_res[min(y0, y1):max(y2, y3), min(x0, x3):max(x1, x2)]    # labels    box_new_list = []    for cls_type, box in label_box_list:        # after transformation        for index in range(len(box)//2):            px = (M[0][0]*box[index*2] + M[0][1]*box[index*2+1] + M[0][2]) / ((M[2][0]*box[index*2] + M[2][1]*box[index*2+1] + M[2][2]))            py = (M[1][0]*box[index*2] + M[1][1]*box[index*2+1] + M[1][2]) / ((M[2][0]*box[index*2] + M[2][1]*box[index*2+1] + M[2][2]))            box[index*2] = int(px)            box[index*2+1] = int(py)            # cut            box[index*2] -= min(x0, x3)            box[index*2+1] -= min(y0, y1)            box[index*2] = max(min(box[index*2], image_new.shape[1]), 0)            box[index*2+1] = max(min(box[index*2+1], image_new.shape[0]), 0)        box_new_list.append((cls_type, box))    image_with_boxes = [image_new, box_new_list]    return image_with_boxes

1.5 完整代码

import osimport randomfrom PIL import Image, ImageOpsfrom tqdm import tqdmimport torchvision.transforms as transformsimport cv2import numpy as npimport mathimport shutildef add_background_randomly(image, background, box_list=[]):    """    box_list = [(cls_type_0, rect_0), (cls_type_1, rect_1), ... , (cls_type_n, rect_n)]    rect = [x0, y0, x1, y1, x2, y2, x3, y3]    left_top = (x0, y0), right_top = (x1, y1), right_bottom = (x2, y2), left_bottom = (x3, y3)    """    img_height, img_width = image.shape[:2]    bg_height, bg_width = background.shape[:2]        # resize image smaller to background    # the image accounts for at least two-thirds and not more than four-fifths    min_size = min(bg_height, bg_width) // 3 * 2    max_size = min(bg_height, bg_width) // 5 * 4    new_size = random.randint(min_size, max_size)    resize_multiple = round(new_size / max(img_height, img_width), 4)    # image = image.resize((int(img_width * resize_multiple), int(img_height * resize_multiple)), Image.ANTIALIAS)    image = cv2.resize(image, (int(img_width * resize_multiple), int(img_height * resize_multiple)))    img_height, img_width = image.shape[:2]    # paste the image to the background    # height_pos = random.randint((bg_height-img_height)//3, (bg_height-img_height)//3*2)    # width_pos = random.randint((bg_width-img_width)//3, (bg_width-img_width)//3*2)    height_pos = random.randint(0, (bg_height-img_height))    width_pos = random.randint(0, (bg_width-img_width))    background[height_pos:(height_pos+img_height), width_pos:(width_pos+img_width)] = image    img_height, img_width = background.shape[:2]    # calculate the boxes after adding background    new_box_list = []    for cls_type, rect in box_list:        for coor_index in range(len(rect)//2):            # resize            rect[coor_index*2] = int(rect[coor_index*2] * resize_multiple)      # x            rect[coor_index*2+1] = int(rect[coor_index*2+1] * resize_multiple)  # y            # paste            rect[coor_index*2] += width_pos                                     # x            rect[coor_index*2+1] += height_pos                                  # y            # limite            rect[coor_index*2] = max(min(rect[coor_index*2], img_width), 0)     # x            rect[coor_index*2+1] = max(min(rect[coor_index*2+1], img_height), 0)# y        box = (cls_type, rect)        new_box_list.append(box)    image_with_boxes = [background, new_box_list]    return image_with_boxesdef rotate_image(image, label_box_list=[], angle=90, color=(0, 0, 0), img_scale=1.0):    """    rotate with angle, background filled with color, default black (0, 0, 0)    label_box = (cls_type, box)    box = [x0, y0, x1, y1, x2, y2, x3, y3]    """    # grab the rotation matrix (applying the negative of the angle to rotate clockwise),     # then grab the sine and cosine (i.e., the rotation components of the matrix)    # if angle < 0, counterclockwise rotation; if angle > 0, clockwise rotation    # 1.0 - scale, to adjust the size scale (image scaling parameter), recommended 0.75    height_ori, width_ori = image.shape[:2]    x_center_ori, y_center_ori = (width_ori // 2, height_ori // 2)    rotation_matrix = cv2.getRotationMatrix2D((x_center_ori, y_center_ori), angle, img_scale)    cos = np.abs(rotation_matrix[0, 0])    sin = np.abs(rotation_matrix[0, 1])    # compute the new bounding dimensions of the image    width_new = int((height_ori * sin) + (width_ori * cos))    height_new = int((height_ori * cos) + (width_ori * sin))    # adjust the rotation matrix to take into account translation    rotation_matrix[0, 2] += (width_new / 2) - x_center_ori    rotation_matrix[1, 2] += (height_new / 2) - y_center_ori    # perform the actual rotation and return the image    # borderValue - color to fill missing background, default black, customizable    image_new = cv2.warpAffine(image, rotation_matrix, (width_new, height_new), borderValue=color)    # each point coordinates    angle = angle / 180 * math.pi    box_rot_list = cal_rotate_box(label_box_list, angle, (x_center_ori, y_center_ori), (width_new//2, height_new//2))    box_new_list = []    for cls_type, box_rot in box_rot_list:        for index in range(len(box_rot)//2):            box_rot[index*2] = int(box_rot[index*2])            box_rot[index*2] = max(min(box_rot[index*2], width_new), 0)            box_rot[index*2+1] = int(box_rot[index*2+1])            box_rot[index*2+1] = max(min(box_rot[index*2+1], height_new), 0)        box_new_list.append((cls_type, box_rot))    image_with_boxes = [image_new, box_new_list]    return image_with_boxesdef cal_rotate_box(box_list, angle, ori_center, new_center):    # box = [x0, y0, x1, y1, x2, y2, x3, y3]    # image_shape - [width, height]    box_list_new = []    for (cls_type, box) in box_list:        box_new = []        for index in range(len(box)//2):            box_new.extend(cal_rotate_coordinate(box[index*2], box[index*2+1], angle, ori_center, new_center))        label_box = (cls_type, box_new)        box_list_new.append(label_box)    return box_list_newdef cal_rotate_coordinate(x_ori, y_ori, angle, ori_center, new_center):    # box = [x0, y0, x1, y1, x2, y2, x3, y3]    # image_shape - [width, height]    x_0 = x_ori - ori_center[0]    y_0 = ori_center[1] - y_ori    x_new = x_0 * math.cos(angle) - y_0 * math.sin(angle) + new_center[0]    y_new = new_center[1] - (y_0 * math.cos(angle) + x_0 * math.sin(angle))    return (x_new, y_new)def hue_change(image):    if np.random.rand() < 0.8: image = transforms.ColorJitter(brightness=0.5)(image)    if np.random.rand() < 0.2: image = transforms.ColorJitter(contrast=0.2)(image)    if np.random.rand() < 0.2: image = transforms.ColorJitter(saturation=0.2)(image)    if np.random.rand() < 0.2: image = transforms.ColorJitter(hue=0.2)(image)    return imagedef perspective_tranform(image, perspective_rate=0.5, label_box_list=[]):    # perspective transform    img_height, img_width = image.shape[:2]    # points_src = np.float32([[rect[0], rect[1]], [rect[2], rect[3]], [rect[4], rect[5]], [rect[6], rect[7]]])    points_src = np.float32([[0, 0], [img_width-1, 0], [img_width-1, img_height-1], [0, img_height-1]])    max_width = int(img_width * (1.0 + perspective_rate))    max_height = int(img_height * (1.0 + perspective_rate))    min_width = int(img_width * (1.0 - perspective_rate))    min_height = int(img_height * (1.0 + perspective_rate))    delta_width = (max_width - min_width) // 2    delta_height = (max_height - min_height) // 2    x0 = random.randint(0, delta_width)    y0 = random.randint(0, delta_height)    x1 = random.randint(delta_width + min_width, max_width)    y1 = random.randint(0, delta_height)    x2 = random.randint(delta_width + min_width, max_width)    y2 = random.randint(delta_height + min_height, max_height)    x3 = random.randint(0, delta_width)    y3 = random.randint(delta_height + min_height, max_height)    points_dst = np.float32([[x0, y0], [x1, y1], [x2, y2], [x3, y3]])    # width_new = max(x0, x1, x2, x3) - min(x0, x1, x2, x3)    # height_new = max(y0, y1, y2, y3) - min(y0, y1, y2, y3)    M = cv2.getPerspectiveTransform(points_src, points_dst)    image_res = cv2.warpPerspective(image, M, (max_width, max_height))    # cut    image_new = image_res[min(y0, y1):max(y2, y3), min(x0, x3):max(x1, x2)]    # labels    box_new_list = []    for cls_type, box in label_box_list:        # after transformation        for index in range(len(box)//2):            px = (M[0][0]*box[index*2] + M[0][1]*box[index*2+1] + M[0][2]) / ((M[2][0]*box[index*2] + M[2][1]*box[index*2+1] + M[2][2]))            py = (M[1][0]*box[index*2] + M[1][1]*box[index*2+1] + M[1][2]) / ((M[2][0]*box[index*2] + M[2][1]*box[index*2+1] + M[2][2]))            box[index*2] = int(px)            box[index*2+1] = int(py)            # cut            box[index*2] -= min(x0, x3)            box[index*2+1] -= min(y0, y1)            box[index*2] = max(min(box[index*2], image_new.shape[1]), 0)            box[index*2+1] = max(min(box[index*2+1], image_new.shape[0]), 0)        box_new_list.append((cls_type, box))    image_with_boxes = [image_new, box_new_list]    return image_with_boxesif __name__ == "__main__":    # test    img_test_path = os.path.join(test_path, file_name)    points = np.array([[rect[0],rect[1]], [rect[2],rect[3]], [rect[4],rect[5]], [rect[6],rect[7]]], np.int32)    image_rect = cv2.polylines(image_res, pts=[points], isClosed=True, color=(0,0,255), thickness=3)    cv2.imwrite(img_test_path, image_res)    # print("")

2 总结





