Tensorflow object detection api 使用和数据增强旋转

  • Post author:
  • Post category:其他


前面的部分参考TensorFlow 训练自己的数据集(包括修改文件等等)


https://blog.csdn.net/int93/article/details/79064428



项目结构

my_train

— dataset

— models

— object_detection

— create_pascal_tf_record.py



eval.py


— export_inference_graph.py



train.py


— img_augmentation.py

— pascal_label_map.pbtxt

— ssd_mobilenet_v2_coco.config


创建tf record文件

python3 create_pascal_tf_record.py 
--data_dir=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/dataset
--year=my_VOC 
--set=val
--output_path=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/val.record 
--label_map_path=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/pascal_label_map.pbtxt

python3 create_pascal_tf_record.py 
--data_dir=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/dataset 
--year=my_VOC 
--set=train 
--output_path=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/train.record 
--label_map_path=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/pascal_label_map.pbtxt


训练

nohup python3 train.py --logtostderr &


输出.pb模型文件

python export_inference_graph.py 
--input_type image_tensor 
--pipeline_config_path ssd_mobilenet_v2_coco.config 
--trained_checkpoint_prefix train/model.ckpt-200000 --output_directory train/

图片旋转原理见

https://www.oschina.net/translate/opencv-rotation?print


有两种思路



思路1


思路2



注意



因为这个旋转会修改图片尺寸,因此千万别忘记修改xml文件里的图片大小。因为bbox regression需要用的,影响loss的。

具体解释见

https://blog.csdn.net/weixin_42280271/article/details/82052614

#!/usr/bin/env python

import cv2
import math
import numpy as np
import os
import xml.etree.ElementTree as ET
import random

class ImgAugemention():
    def __init__(self):
        self.angle = 90

    # rotate_img
    def rotate_image(self, src, angle, scale=1.):
        w = src.shape[1]
        h = src.shape[0]
        # convet angle into rad
        rangle = np.deg2rad(angle)  # angle in radians
        # calculate new image width and height
        nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
        nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
        # ask OpenCV for the rotation matrix
        rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
        # calculate the move from the old center to the new center combined
        # with the rotation
        rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
        # the move only affects the translation, so update the translation
        # part of the transform
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]
        # map
        return cv2.warpAffine(
            src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
            flags=cv2.INTER_LANCZOS4)

    def rotate_xml(self, src, xmin, ymin, xmax, ymax, angle, scale=1.):
        w = src.shape[1]
        h = src.shape[0]
        rangle = np.deg2rad(angle)  # angle in radians
        # now calculate new image width and height
        # get width and heigh of changed image
        nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
        nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
        # ask OpenCV for the rotation matrix
        rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
        # calculate the move from the old center to the new center combined
        # with the rotation
        rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
        # the move only affects the translation, so update the translation
        # part of the transform
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]
        # rot_mat: the final rot matrix
        # get the four center of edges in the initial martix,and convert the coord
        point1 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymin, 1]))
        point2 = np.dot(rot_mat, np.array([xmax, (ymin+ymax)/2, 1]))
        point3 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymax, 1]))
        point4 = np.dot(rot_mat, np.array([xmin, (ymin+ymax)/2, 1]))
        # concat np.array
        concat = np.vstack((point1, point2, point3, point4))
        # change type
        concat = concat.astype(np.int32)
        # print(concat)
        rx, ry, rw, rh = cv2.boundingRect(concat)
        return rx, ry, rw, rh

    def process_img(self, imgs_path, xmls_path, img_save_path, xml_save_path, angle_list, ratio):
        # random.seed(1)
        imgs_list = os.listdir(imgs_path)
        random.shuffle(imgs_list)
        imgs_list_aug = imgs_list[:int(len(imgs_list)*ratio)]
        print(len(imgs_list_aug))
        id=0
        id_angle = 0
        # assign the rot angles
        for img_name in imgs_list_aug:
            angle = angle_list[id_angle]
            if id_angle == len(angle_list)-1:
                id_angle = 0
            else:
                id_angle += 1

            # split filename and suffix
            n, s = os.path.splitext(img_name)
            # for the sake of use yol model, only process '.jpg'
            if s == ".jpg":
                img_path = os.path.join(imgs_path, img_name)
                img = cv2.imread(img_path)
                rotated_img = self.rotate_image(img, angle)

                # print("log: [%sd] %s is processed." % (angle, img))
                xml_url = img_name.split('.')[0] + '.xml'
                xml_path = os.path.join(xmls_path, xml_url)
                # if xml exist then go on or skip
                if not os.path.exists(xml_path):
                    continue
                print('{} / {}'.format(id, len(imgs_list_aug)))
                id += 1
                # 写入图像
                cv2.imwrite(img_save_path + n + "_" + str(angle) + "d.jpg", rotated_img)


                tree = ET.parse(xml_path)
                root = tree.getroot()
                for box in root.iter('bndbox'):
                    xmin = float(box.find('xmin').text)
                    ymin = float(box.find('ymin').text)
                    xmax = float(box.find('xmax').text)
                    ymax = float(box.find('ymax').text)
                    x, y, w, h = self.rotate_xml(img, xmin, ymin, xmax, ymax, angle)
                    # change the coord
                    box.find('xmin').text = str(x)
                    box.find('ymin').text = str(y)
                    box.find('xmax').text = str(x + w)
                    box.find('ymax').text = str(y + h)
                    box.set('updated', 'yes')

                    if 1:
                        cv2.rectangle(rotated_img, (int(x), int(y)), (int(x+w), int(y+h)), (0,255,0), 2)
                        cv2.imwrite('./tmp_rotate/'+ img_name, rotated_img)
                root.find('size')[0].text = str(rotated_img.shape[1])
                root.find('size')[1].text = str(rotated_img.shape[0])
                root.find('size').set('updated', 'yes')


                # write into new xml
                tree.write(xml_save_path + n + "_" + str(angle) + "d.xml")
            # print("[%s] %s is processed." % (angle, img_name))


if __name__ == '__main__':
    img_aug = ImgAugemention()
    imgs_path = '/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/dataset/my_VOC/images/JPEGImages/'
    xmls_path = '/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/dataset/my_VOC/Annotations/'
    img_save_path = './rotate/'
    xml_save_path = './xml_rot/'
    ratio = 0.3

    os.system('rm -r ' + img_save_path)
    os.system('rm -r ' + xml_save_path)
    os.makedirs(img_save_path)
    os.makedirs(xml_save_path)

    angle_list = [60, 90, 120, 150, 210, 240, 300]
    img_aug.process_img(imgs_path, xmls_path, img_save_path, xml_save_path, angle_list, ratio)




版权声明:本文为qq_20095389原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。