Tensorflow object detection api 使用和数据增强旋转

前面的部分参考TensorFlow 训练自己的数据集（包括修改文件等等）

https://blog.csdn.net/int93/article/details/79064428

项目结构

my_train

— dataset

— models

— object_detection

— create_pascal_tf_record.py

—

eval.py

— export_inference_graph.py

—

train.py

— img_augmentation.py

— pascal_label_map.pbtxt

— ssd_mobilenet_v2_coco.config

创建tf record文件

python3 create_pascal_tf_record.py 
--data_dir=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/dataset
--year=my_VOC 
--set=val
--output_path=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/val.record 
--label_map_path=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/pascal_label_map.pbtxt

python3 create_pascal_tf_record.py 
--data_dir=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/dataset 
--year=my_VOC 
--set=train 
--output_path=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/train.record 
--label_map_path=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/pascal_label_map.pbtxt

训练

nohup python3 train.py --logtostderr &

输出.pb模型文件

python export_inference_graph.py 
--input_type image_tensor 
--pipeline_config_path ssd_mobilenet_v2_coco.config 
--trained_checkpoint_prefix train/model.ckpt-200000 --output_directory train/

图片旋转原理见

https://www.oschina.net/translate/opencv-rotation?print

有两种思路

思路1

先以图片中心旋转图片，得到旋转矩阵
然后平移到新的图片尺寸下（不会被遮挡的）的图像中心
(Rp+t)+m 所以m=(new_w-old_w)/2, (new_h-old_h)/2
代码

https://www.cnblogs.com/darkknightzh/p/5070576.html

思路2

以新图片尺寸的中心为轴，得到旋转矩阵
然后再计算平移量
R(Ip+m)+t=Rp+Rm+t 所以应该求Rm, m和思路1 的m一样，但是要乘R
代码

https://www.oschina.net/translate/opencv-rotation?print

注意

因为这个旋转会修改图片尺寸，因此千万别忘记修改xml文件里的图片大小。因为bbox regression需要用的，影响loss的。

具体解释见

https://blog.csdn.net/weixin_42280271/article/details/82052614

#!/usr/bin/env python

import cv2
import math
import numpy as np
import os
import xml.etree.ElementTree as ET
import random

class ImgAugemention():
    def __init__(self):
        self.angle = 90

    # rotate_img
    def rotate_image(self, src, angle, scale=1.):
        w = src.shape[1]
        h = src.shape[0]
        # convet angle into rad
        rangle = np.deg2rad(angle)  # angle in radians
        # calculate new image width and height
        nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
        nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
        # ask OpenCV for the rotation matrix
        rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
        # calculate the move from the old center to the new center combined
        # with the rotation
        rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
        # the move only affects the translation, so update the translation
        # part of the transform
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]
        # map
        return cv2.warpAffine(
            src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
            flags=cv2.INTER_LANCZOS4)

    def rotate_xml(self, src, xmin, ymin, xmax, ymax, angle, scale=1.):
        w = src.shape[1]
        h = src.shape[0]
        rangle = np.deg2rad(angle)  # angle in radians
        # now calculate new image width and height
        # get width and heigh of changed image
        nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
        nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
        # ask OpenCV for the rotation matrix
        rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
        # calculate the move from the old center to the new center combined
        # with the rotation
        rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
        # the move only affects the translation, so update the translation
        # part of the transform
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]
        # rot_mat: the final rot matrix
        # get the four center of edges in the initial martix，and convert the coord
        point1 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymin, 1]))
        point2 = np.dot(rot_mat, np.array([xmax, (ymin+ymax)/2, 1]))
        point3 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymax, 1]))
        point4 = np.dot(rot_mat, np.array([xmin, (ymin+ymax)/2, 1]))
        # concat np.array
        concat = np.vstack((point1, point2, point3, point4))
        # change type
        concat = concat.astype(np.int32)
        # print(concat)
        rx, ry, rw, rh = cv2.boundingRect(concat)
        return rx, ry, rw, rh

    def process_img(self, imgs_path, xmls_path, img_save_path, xml_save_path, angle_list, ratio):
        # random.seed(1)
        imgs_list = os.listdir(imgs_path)
        random.shuffle(imgs_list)
        imgs_list_aug = imgs_list[:int(len(imgs_list)*ratio)]
        print(len(imgs_list_aug))
        id=0
        id_angle = 0
        # assign the rot angles
        for img_name in imgs_list_aug:
            angle = angle_list[id_angle]
            if id_angle == len(angle_list)-1:
                id_angle = 0
            else:
                id_angle += 1

            # split filename and suffix
            n, s = os.path.splitext(img_name)
            # for the sake of use yol model, only process '.jpg'
            if s == ".jpg":
                img_path = os.path.join(imgs_path, img_name)
                img = cv2.imread(img_path)
                rotated_img = self.rotate_image(img, angle)

                # print("log: [%sd] %s is processed." % (angle, img))
                xml_url = img_name.split('.')[0] + '.xml'
                xml_path = os.path.join(xmls_path, xml_url)
                # if xml exist then go on or skip
                if not os.path.exists(xml_path):
                    continue
                print('{} / {}'.format(id, len(imgs_list_aug)))
                id += 1
                # 写入图像
                cv2.imwrite(img_save_path + n + "_" + str(angle) + "d.jpg", rotated_img)


                tree = ET.parse(xml_path)
                root = tree.getroot()
                for box in root.iter('bndbox'):
                    xmin = float(box.find('xmin').text)
                    ymin = float(box.find('ymin').text)
                    xmax = float(box.find('xmax').text)
                    ymax = float(box.find('ymax').text)
                    x, y, w, h = self.rotate_xml(img, xmin, ymin, xmax, ymax, angle)
                    # change the coord
                    box.find('xmin').text = str(x)
                    box.find('ymin').text = str(y)
                    box.find('xmax').text = str(x + w)
                    box.find('ymax').text = str(y + h)
                    box.set('updated', 'yes')

                    if 1:
                        cv2.rectangle(rotated_img, (int(x), int(y)), (int(x+w), int(y+h)), (0,255,0), 2)
                        cv2.imwrite('./tmp_rotate/'+ img_name, rotated_img)
                root.find('size')[0].text = str(rotated_img.shape[1])
                root.find('size')[1].text = str(rotated_img.shape[0])
                root.find('size').set('updated', 'yes')


                # write into new xml
                tree.write(xml_save_path + n + "_" + str(angle) + "d.xml")
            # print("[%s] %s is processed." % (angle, img_name))


if __name__ == '__main__':
    img_aug = ImgAugemention()
    imgs_path = '/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/dataset/my_VOC/images/JPEGImages/'
    xmls_path = '/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/dataset/my_VOC/Annotations/'
    img_save_path = './rotate/'
    xml_save_path = './xml_rot/'
    ratio = 0.3

    os.system('rm -r ' + img_save_path)
    os.system('rm -r ' + xml_save_path)
    os.makedirs(img_save_path)
    os.makedirs(xml_save_path)

    angle_list = [60, 90, 120, 150, 210, 240, 300]
    img_aug.process_img(imgs_path, xmls_path, img_save_path, xml_save_path, angle_list, ratio)

原文链接：https://blog.csdn.net/qq_20095389/article/details/89521082

项目结构

思路1

思路2

你可能也喜欢