前面的部分参考TensorFlow 训练自己的数据集(包括修改文件等等)
https://blog.csdn.net/int93/article/details/79064428
项目结构
my_train
— dataset
— models
— object_detection
— create_pascal_tf_record.py
—
eval.py
— export_inference_graph.py
—
train.py
— img_augmentation.py
— pascal_label_map.pbtxt
— ssd_mobilenet_v2_coco.config
创建tf record文件
python3 create_pascal_tf_record.py
--data_dir=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/dataset
--year=my_VOC
--set=val
--output_path=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/val.record
--label_map_path=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/pascal_label_map.pbtxt
python3 create_pascal_tf_record.py
--data_dir=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/dataset
--year=my_VOC
--set=train
--output_path=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/train.record
--label_map_path=/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/pascal_label_map.pbtxt
训练
nohup python3 train.py --logtostderr &
输出.pb模型文件
python export_inference_graph.py
--input_type image_tensor
--pipeline_config_path ssd_mobilenet_v2_coco.config
--trained_checkpoint_prefix train/model.ckpt-200000 --output_directory train/
图片旋转原理见
https://www.oschina.net/translate/opencv-rotation?print
有两种思路
思路1
- 先以图片中心旋转图片,得到旋转矩阵
- 然后平移到新的图片尺寸下(不会被遮挡的)的图像中心
- (Rp+t)+m 所以m=(new_w-old_w)/2, (new_h-old_h)/2
-
代码
https://www.cnblogs.com/darkknightzh/p/5070576.html
思路2
- 以新图片尺寸的中心为轴,得到旋转矩阵
- 然后再计算平移量
- R(Ip+m)+t=Rp+Rm+t 所以应该求Rm, m和思路1 的m一样,但是要乘R
-
代码
https://www.oschina.net/translate/opencv-rotation?print
注意
因为这个旋转会修改图片尺寸,因此千万别忘记修改xml文件里的图片大小。因为bbox regression需要用的,影响loss的。
具体解释见
https://blog.csdn.net/weixin_42280271/article/details/82052614
#!/usr/bin/env python
import cv2
import math
import numpy as np
import os
import xml.etree.ElementTree as ET
import random
class ImgAugemention():
def __init__(self):
self.angle = 90
# rotate_img
def rotate_image(self, src, angle, scale=1.):
w = src.shape[1]
h = src.shape[0]
# convet angle into rad
rangle = np.deg2rad(angle) # angle in radians
# calculate new image width and height
nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
# ask OpenCV for the rotation matrix
rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
# calculate the move from the old center to the new center combined
# with the rotation
rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
# the move only affects the translation, so update the translation
# part of the transform
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
# map
return cv2.warpAffine(
src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
flags=cv2.INTER_LANCZOS4)
def rotate_xml(self, src, xmin, ymin, xmax, ymax, angle, scale=1.):
w = src.shape[1]
h = src.shape[0]
rangle = np.deg2rad(angle) # angle in radians
# now calculate new image width and height
# get width and heigh of changed image
nw = (abs(np.sin(rangle)*h) + abs(np.cos(rangle)*w))*scale
nh = (abs(np.cos(rangle)*h) + abs(np.sin(rangle)*w))*scale
# ask OpenCV for the rotation matrix
rot_mat = cv2.getRotationMatrix2D((nw*0.5, nh*0.5), angle, scale)
# calculate the move from the old center to the new center combined
# with the rotation
rot_move = np.dot(rot_mat, np.array([(nw-w)*0.5, (nh-h)*0.5, 0]))
# the move only affects the translation, so update the translation
# part of the transform
rot_mat[0, 2] += rot_move[0]
rot_mat[1, 2] += rot_move[1]
# rot_mat: the final rot matrix
# get the four center of edges in the initial martix,and convert the coord
point1 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymin, 1]))
point2 = np.dot(rot_mat, np.array([xmax, (ymin+ymax)/2, 1]))
point3 = np.dot(rot_mat, np.array([(xmin+xmax)/2, ymax, 1]))
point4 = np.dot(rot_mat, np.array([xmin, (ymin+ymax)/2, 1]))
# concat np.array
concat = np.vstack((point1, point2, point3, point4))
# change type
concat = concat.astype(np.int32)
# print(concat)
rx, ry, rw, rh = cv2.boundingRect(concat)
return rx, ry, rw, rh
def process_img(self, imgs_path, xmls_path, img_save_path, xml_save_path, angle_list, ratio):
# random.seed(1)
imgs_list = os.listdir(imgs_path)
random.shuffle(imgs_list)
imgs_list_aug = imgs_list[:int(len(imgs_list)*ratio)]
print(len(imgs_list_aug))
id=0
id_angle = 0
# assign the rot angles
for img_name in imgs_list_aug:
angle = angle_list[id_angle]
if id_angle == len(angle_list)-1:
id_angle = 0
else:
id_angle += 1
# split filename and suffix
n, s = os.path.splitext(img_name)
# for the sake of use yol model, only process '.jpg'
if s == ".jpg":
img_path = os.path.join(imgs_path, img_name)
img = cv2.imread(img_path)
rotated_img = self.rotate_image(img, angle)
# print("log: [%sd] %s is processed." % (angle, img))
xml_url = img_name.split('.')[0] + '.xml'
xml_path = os.path.join(xmls_path, xml_url)
# if xml exist then go on or skip
if not os.path.exists(xml_path):
continue
print('{} / {}'.format(id, len(imgs_list_aug)))
id += 1
# 写入图像
cv2.imwrite(img_save_path + n + "_" + str(angle) + "d.jpg", rotated_img)
tree = ET.parse(xml_path)
root = tree.getroot()
for box in root.iter('bndbox'):
xmin = float(box.find('xmin').text)
ymin = float(box.find('ymin').text)
xmax = float(box.find('xmax').text)
ymax = float(box.find('ymax').text)
x, y, w, h = self.rotate_xml(img, xmin, ymin, xmax, ymax, angle)
# change the coord
box.find('xmin').text = str(x)
box.find('ymin').text = str(y)
box.find('xmax').text = str(x + w)
box.find('ymax').text = str(y + h)
box.set('updated', 'yes')
if 1:
cv2.rectangle(rotated_img, (int(x), int(y)), (int(x+w), int(y+h)), (0,255,0), 2)
cv2.imwrite('./tmp_rotate/'+ img_name, rotated_img)
root.find('size')[0].text = str(rotated_img.shape[1])
root.find('size')[1].text = str(rotated_img.shape[0])
root.find('size').set('updated', 'yes')
# write into new xml
tree.write(xml_save_path + n + "_" + str(angle) + "d.xml")
# print("[%s] %s is processed." % (angle, img_name))
if __name__ == '__main__':
img_aug = ImgAugemention()
imgs_path = '/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/dataset/my_VOC/images/JPEGImages/'
xmls_path = '/Users/anida.qin/Desktop/Projects/Work_02/obj_train/my_train/dataset/my_VOC/Annotations/'
img_save_path = './rotate/'
xml_save_path = './xml_rot/'
ratio = 0.3
os.system('rm -r ' + img_save_path)
os.system('rm -r ' + xml_save_path)
os.makedirs(img_save_path)
os.makedirs(xml_save_path)
angle_list = [60, 90, 120, 150, 210, 240, 300]
img_aug.process_img(imgs_path, xmls_path, img_save_path, xml_save_path, angle_list, ratio)