因为模型需要VOC训练集,而数据集只有图片和已制作好的xml文件,那么只能自己进行VOC数据集的再加工,好,开工!
文章目录构架VOC数据集文件夹利用程序生成Main下的四个txt文件更改xml中的原来文件属性 构架VOC数据集文件夹文件夹目录如图所示:
---VOC2007
------Annotations
---------n个xml文件
------ImagesSet
---------Action
---------Layout
---------Segmentation
---------Main
--------trainval.txt
--------train.txt
--------test.txt
--------val.txt
------JPEGImages
---------n个img文件
利用程序生成Main下的四个txt文件
generate_train_val_test_txt.py
此处仅展示部分代码,已上传完整代码在资源中下载。
import os
import random
import xml.dom.minidom
VOC_CLASSES = ['green', 'red']
def generate_train_val_test_txt():
xml_file_path = "H:/smartcar/VOCdevkit/VOC2007/Annotations" # xml文件路径
save_Path = "H:/smartcar/VOCdevkit/VOC2007/ImageSets/Main" #Main文件夹路径
############################################3
trainval_percent = 0.9
train_percent = 0.9
total_xml = os.listdir(xml_file_path) # 得到文件夹下所有文件名称
num = len(total_xml)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)
print("train and val size", tv)
print("train size", tr)
##############################################################
"""
将信息写入test.txt、train.txt、val.txt、trainval.txt
"""
ftrainval = open(os.path.join(save_Path,'trainval.txt'), 'w')
ftest = open(os.path.join(save_Path, 'test.txt'), 'w')
ftrain = open(os.path.join(save_Path, 'train.txt'), 'w')
fval = open(os.path.join(save_Path, 'val.txt'), 'w')
for i in list: # 遍历第i个xml文件
xml_name = total_xml[i][:-4]
if i in trainval:
ftrainval.write(xml_name + "\n")
if i in train:
ftrain.write(xml_name + "\n")
else:
fval.write(xml_name + "\n")
else:
ftest.write(xml_name + "\n")
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
######################################################################
"""
将信息写入(class_name)_test.txt、(class_name)_train.txt、(class_name)_val.txt、(class_name)_trainval.txt
"""
for idx in range(len(VOC_CLASSES)): #每一个类单独处理
class_name = VOC_CLASSES[idx]
# 创建txt
class_trainval = open(os.path.join(save_Path, str(class_name) + '_trainval.txt'), 'w')
class_test = open(os.path.join(save_Path, str(class_name) + '_test.txt'), 'w')
class_train = open(os.path.join(save_Path, str(class_name) + '_train.txt'), 'w')
class_val = open(os.path.join(save_Path, str(class_name) + '_val.txt'), 'w')
for k in list:
xml_name = total_xml[k][:-4] # xml的名称
print(xml_name)
xml_path=os.path.join(xml_file_path,xml_name+'.xml')
##################################################
# 将获取的xml文件名送入到dom解析
dom = xml.dom.minidom.parse(xml_path) # 输入xml文件具体路径
root = dom.documentElement
# 获取xml object标签
object_name = root.getElementsByTagName('name')
if len(object_name) > 0 and xml_name in object_name: # 存在object(矩形框并且class_name在object_name列表中
if k in trainval:
class_trainval.write(xml_name + ' ' + str(1) + "\n")
if k in train:
class_train.write(xml_name + ' ' + str(1) + "\n")
else:
class_val.write(xml_name + ' ' + str(1) + "\n")
else:
class_test.write(xml_name + ' ' + str(1) + "\n")
else:
if k in trainval:
class_trainval.write(xml_name + ' ' + str(-1) + "\n")
if k in train:
class_train.write(xml_name + ' ' + str(-1) + "\n")
else:
class_val.write(xml_name + ' ' + str(-1) + "\n")
else:
class_test.write(xml_name + ' ' + str(-1) + "\n")
class_trainval.close()
class_test.close()
class_train.close()
class_val.close() # 1类的。txt编辑好了
#################################################
更改xml中的原来文件属性
更改文件中的folder属性。
此处仅展示部分代码,已上传完整代码在资源中下载。
# coding=utf-8
import os
import os.path
import xml.dom.minidom
path = "H:\\smartcar\\VOCdevkit\\VOC2007\\Annotations"
files = os.listdir(path) # 得到文件夹下所有文件名称
for xmlFile in files: # 遍历文件夹
if not os.path.isdir(xmlFile): # 判断是否是文件夹,不是文件夹才打开
#print(xmlFile)
# 将获取的xml文件名送入到dom解析
dom = xml.dom.minidom.parse(os.path.join(path, xmlFile)) # 输入xml文件具体路径
root = dom.documentElement
# 获取标签以及的值
#name = root.getElementsByTagName('filename')
folder = root.getElementsByTagName('folder')
cnt=0
# 对每个xml文件的多个同样的属性值进行修改。此处将每一个属性修改为plane,每一个属性修改为VOC2007
# for i in range(len(name)):
# print(name[i].firstChild.data)
# name[i].firstChild.data = 'res'+str(cnt)+'.jpg'
# #print(name[i].firstChild.data)
# cnt += 1
# # print(name[i].firstChild.data)
for i in range(len(folder)):
print(folder[i].firstChild.data)
folder[i].firstChild.data = 'VOC2007'
print(folder[i].firstChild.data)
# 将属性存储至xml文件中
with open(os.path.join(path, xmlFile), 'w') as fh:
dom.writexml(fh)
print('已写入')
fh.close()
Irving.Gao
原创文章 7获赞 3访问量 354
关注
私信
展开阅读全文