#|Widerface数据集 | widerface数据集转成YOLO格式 &|人脸数据集|航拍

widerface数据处理

一、原始数据格式转为VOC格式
二、VOC格式转为COCO格式
- 第一步：转成COCO格式
- 第二步：从原来打乱的XML标注文件中提取出train和val
三、VOC格式转成YOLO格式
- 第一步：提取标注文件夹里面的文件名
- 第二步：将xml格式转成yolo格式
四、在转换过程中查看框是否正确
- 1. 查看VOC是否转换正确（文件命名为：xml_draw.py）
- 2. 查看COCO是否转换正确（文件命名为：json_draw.py）

一、原始数据格式转为VOC格式

首先应该创建三个文件夹：Annotations,ImageSets\Main,JPEGImages

文章图片

文章图片
运行下面的代码，注意一定要写对路径且不能有中文
注意，这需要执行两次，分别是train和val。文件命名为：face2voc.py

# coding:utf-8 import os,cv2,sys,shutil,numpy from xml.dom.minidom import Document import os # 本程序可以将widerface转为VOC格式的数据 def writexml(filename, saveimg, bboxes, xmlpath): doc = Document() annotation = doc.createElement('annotation') doc.appendChild(annotation) folder = doc.createElement('folder') folder_name = doc.createTextNode('widerface') folder.appendChild(folder_name) annotation.appendChild(folder) filenamenode = doc.createElement('filename') filename_name = doc.createTextNode(filename) filenamenode.appendChild(filename_name) annotation.appendChild(filenamenode) source = doc.createElement('source') annotation.appendChild(source) database = doc.createElement('database') database.appendChild(doc.createTextNode('wider face Database')) source.appendChild(database) annotation_s = doc.createElement('annotation') annotation_s.appendChild(doc.createTextNode('PASCAL VOC2007')) source.appendChild(annotation_s) image = doc.createElement('image') image.appendChild(doc.createTextNode('flickr')) source.appendChild(image) flickrid = doc.createElement('flickrid') flickrid.appendChild(doc.createTextNode('-1')) source.appendChild(flickrid) owner = doc.createElement('owner') annotation.appendChild(owner) flickrid_o = doc.createElement('flickrid') flickrid_o.appendChild(doc.createTextNode('muke')) owner.appendChild(flickrid_o) name_o = doc.createElement('name') name_o.appendChild(doc.createTextNode('muke')) owner.appendChild(name_o) size = doc.createElement('size') annotation.appendChild(size) width = doc.createElement('width') width.appendChild(doc.createTextNode(str(saveimg.shape[1]))) height = doc.createElement('height') height.appendChild(doc.createTextNode(str(saveimg.shape[0]))) depth = doc.createElement('depth') depth.appendChild(doc.createTextNode(str(saveimg.shape[2]))) size.appendChild(width) size.appendChild(height) size.appendChild(depth) segmented = doc.createElement('segmented') segmented.appendChild(doc.createTextNode('0')) annotation.appendChild(segmented) for i in range(len(bboxes)): bbox = bboxes[i] objects = doc.createElement('object') annotation.appendChild(objects) object_name = doc.createElement('name') object_name.appendChild(doc.createTextNode('face')) objects.appendChild(object_name) pose = doc.createElement('pose') pose.appendChild(doc.createTextNode('Unspecified')) objects.appendChild(pose) truncated = doc.createElement('truncated') truncated.appendChild(doc.createTextNode('0')) objects.appendChild(truncated) difficult = doc.createElement('difficult') difficult.appendChild(doc.createTextNode('0')) objects.appendChild(difficult) bndbox = doc.createElement('bndbox') objects.appendChild(bndbox) xmin = doc.createElement('xmin') xmin.appendChild(doc.createTextNode(str(bbox[0]))) bndbox.appendChild(xmin) ymin = doc.createElement('ymin') ymin.appendChild(doc.createTextNode(str(bbox[1]))) bndbox.appendChild(ymin) xmax = doc.createElement('xmax') xmax.appendChild(doc.createTextNode(str(bbox[0] + bbox[2]))) bndbox.appendChild(xmax) ymax = doc.createElement('ymax') ymax.appendChild(doc.createTextNode(str(bbox[1] + bbox[3]))) bndbox.appendChild(ymax) f = open(xmlpath, "w") f.write(doc.toprettyxml(indent='')) f.close() rootdir = "/home/xx/faceDetection" gtfile = "/home/xx/faceDetection/wider_face_split/wider_face_val_bbx_gt.txt" im_folder = "/home/xx/faceDetection/WIDER_val/images" fwrite = open("/home/xx/faceDetection/ImageSets/Main/val.txt", "w") # wider_face_train_bbx_gt.txt的文件内容 # 第一行为名字 # 第二行为头像的数量 n # 剩下的为n行人脸数据 # 以下为示例 # 0--Parade/0_Parade_marchingband_1_117.jpg # 9 # 69 359 50 36 1 0 0 0 0 1 # 227 382 56 43 1 0 1 0 0 1 # 296 305 44 26 1 0 0 0 0 1 # 353 280 40 36 2 0 0 0 2 1 # 885 377 63 41 1 0 0 0 0 1 # 819 391 34 43 2 0 0 0 1 0 # 727 342 37 31 2 0 0 0 0 1 # 598 246 33 29 2 0 0 0 0 1 # 740 308 45 33 1 0 0 0 2 1 with open(gtfile, "r") as gt: while(True): gt_con = gt.readline()[:-1] if gt_con is None or gt_con == "": break; im_path = im_folder + "/" + gt_con; print(im_path) im_data = https://www.it610.com/article/cv2.imread(im_path) if im_data is None: continue # 可视化的部分 # cv2.imshow(im_path, im_data) # cv2.waitKey(0) numbox = int(gt.readline()) # 获取每一行人脸数据 bboxes = [] if numbox == 0:# numbox 为0 的情况处理 gt.readline() else: for i in range(numbox): line = gt.readline() infos = line.split(" ")# 用空格分割 # x y w h ..... bbox = (int(infos[0]), int(infos[1]), int(infos[2]), int(infos[3])) # 绘制人脸框 # cv2.rectangle(im_data, (int(infos[0]), int(infos[1])), #(int(infos[0]) + int(infos[2]), int(infos[1]) + int(infos[3])), #color=(0, 0, 255), thickness=1) bboxes.append(bbox)# 将一张图片的所有人脸数据加入bboxes # cv2.imshow(im_path, im_data) # cv2.waitKey(0) filename = gt_con.replace("/", "_")# 将存储位置作为图片名称，斜杠转为下划线 fwrite.write(filename.split(".")[0] + "\n") cv2.imwrite("{}/JPEGImages/{}".format(rootdir, filename), im_data) xmlpath = "{}/Annotations/{}.xml".format(rootdir, filename.split(".")[0]) writexml(filename, im_data, bboxes, xmlpath) fwrite.close()

二、VOC格式转为COCO格式创建annotations,images,xml_annotations 这三个文件夹，第一个用于保存json文件，第二个用于保存图片，第三个用于保存已经分成train和val的xml的文件。

文章图片

文章图片

第一步：转成COCO格式文件命名为：I_voc2coco.py

#### customized for crack detection dataset #### usage : python3 voc2coco.py xml_dir ./data/xml --json_file ./val.jsonimport sys import os import json import xml.etree.ElementTree as ET import globSTART_BOUNDING_BOX_ID = 1 PRE_DEFINE_CATEGORIES = {"face" : 0}def get(root, name): vars = root.findall(name) return varsdef get_and_check(root, name, length):vars = root.findall(name) if len(vars) == 0: raise ValueError("Can not find %s in %s." % (name, root.tag)) if length > 0 and len(vars) != length: raise ValueError( "The size of %s is supposed to be %d, but is %d." % (name, length, len(vars)) ) if length == 1: vars = vars[0] return varsdef get_filename_as_int(filename): try: # print(filename,filename[6:]) filename = filename.replace("\\", "/") filename = os.path.splitext(os.path.basename(filename))[0] if filename[:5] == "India" :returnint("2"+filename[6:]) elif filename[:5] == "Japan" :returnint("3"+filename[6:]) else : return int("1"+filename[6:])#return int(filename[6:]) except: raise ValueError("Filename %s is supposed to be an integer." % (filename))def get_categories(xml_files): """Generate category name to id mapping from a list of xml files.Arguments: xml_files {list} -- A list of xml file paths.Returns: dict -- category name to id mapping. """ acceptable_classes = ["car","truck","bus"] classes_names = [] for xml_file in xml_files: tree = ET.parse(xml_file) root = tree.getroot() for member in root.findall("object"): classes_names.append(member[0].text) classes_names = list(set(classes_names)) # for item in classes_names : #if item not in acceptable_classes : #classes_names.remove(item) #print("removed{}".format(item)) # classes_names.sort() # print("clsnames : {}".format(classes_names)) return {name: i for i, name in enumerate(classes_names)}def convert(xml_files, json_file): json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []} if PRE_DEFINE_CATEGORIES is not None: categories = PRE_DEFINE_CATEGORIES else: categories = get_categories(xml_files) bnd_id = START_BOUNDING_BOX_ID for xml_file in xml_files: tree = ET.parse(xml_file) root = tree.getroot() path = get(root, "path") if len(path) == 1: filename = os.path.basename(path[0].text) elif len(path) == 0: filename = get_and_check(root, "filename", 1).text else: raise ValueError("%d paths found in %s" % (len(path), xml_file)) ## The filename must be a number # import pdb; pdb.set_trace() # image_id = get_filename_as_int(filename) image_id = filename[:-4] size = get_and_check(root, "size", 1) width = int(get_and_check(size, "width", 1).text) height = int(get_and_check(size, "height", 1).text) image = { "file_name": filename, "height": height, "width": width, "id": filename[:-4], } json_dict["images"].append(image) ## Currently we do not support segmentation. #segmented = get_and_check(root, 'segmented', 1).text #assert segmented == '0' for obj in get(root, "object"): category = get_and_check(obj, "name", 1).text if category not in categories: continue new_id = len(categories) categories[category] = new_id category_id = categories[category] bndbox = get_and_check(obj, "bndbox", 1) xmin = int(get_and_check(bndbox, "xmin", 1).text) - 1 ymin = int(get_and_check(bndbox, "ymin", 1).text) - 1 xmax = int(get_and_check(bndbox, "xmax", 1).text) ymax = int(get_and_check(bndbox, "ymax", 1).text) assert xmax > xmin assert ymax > ymin o_width = abs(xmax - xmin) o_height = abs(ymax - ymin) ann = { "area": o_width * o_height, "iscrowd": 0, "image_id": image_id, "bbox": [xmin, ymin, o_width, o_height], "category_id": category_id, "id": bnd_id, "ignore": 0, "segmentation": [], } json_dict["annotations"].append(ann) bnd_id = bnd_id + 1for cate, cid in categories.items(): cat = {"supercategory": "none", "id": cid, "name": cate} json_dict["categories"].append(cat)os.makedirs(os.path.dirname(json_file), exist_ok=True) json_fp = open(json_file, "w") json_str = json.dumps(json_dict, indent=4) json_fp.write(json_str) json_fp.close()if __name__ == "__main__": import argparseparser = argparse.ArgumentParser( description="Convert Pascal VOC annotation to COCO format." ) # parser.add_argument("xml_dir", help="Directory path to xml files.", type=str) # parser.add_argument("json_file", help="Output COCO format json file.", type=str) # args = parser.parse_args()xml_path = '/home/xx/faceDetection/xml_annotations/train'# 这是xml文件所在的地址 json_file = '/home/xx/faceDetection/annotations/train.json'# 这是你要生成的json文件 xml_files = glob.glob(os.path.join(xml_path, "*.xml"))# If you want to do train/test split, you can pass a subset of xml files to convert function. print("Number of xml files: {}".format(len(xml_files))) convert(xml_files, json_file) print("Success: {}".format(json_file))

第二步：从原来打乱的XML标注文件中提取出train和val 文件命名为：II_voc2coco.py

# coding:utf-8 import os import shutil from tqdm import tqdm # 根据/data/data/UAV2017/ImageSets/Layout里面的trainval.txt和test.txt挑选出训练集和测试集 SPLIT_PATH = "/home/xx/faceDetection/data/wider_voc/ImageSets/Main" IMGS_PATH = "/home/xx/faceDetection/data/wider_voc/JPEGImages" TXTS_PATH = "/home/xx/faceDetection/data/wider_voc/Annotations" TO_IMGS_PATH = '/home/xx/faceDetection/data/wider_coco/images' TO_TXTS_PATH = '/home/xx/faceDetection/data/wider_coco/xml_annotations' data_split = ['train.txt', 'val.txt'] to_split = ['train', 'val']train_file = '/home/xx/faceDetection/data/wider_yolo/images_train.txt' val_file = '/home/xx/faceDetection/data/wider_yolo/images_val.txt' train_file_txt = '' val_file_txt = '' for index, split in enumerate(data_split): split_path = os.path.join(SPLIT_PATH, split) # import pdb; pdb.set_trace() to_imgs_path = os.path.join(TO_IMGS_PATH, to_split[index]) if not os.path.exists(to_imgs_path): os.makedirs(to_imgs_path) to_txts_path = os.path.join(TO_TXTS_PATH, to_split[index]) if not os.path.exists(to_txts_path): os.makedirs(to_txts_path) f = open(split_path, 'r') count = 1for line in tqdm(f.readlines(), desc="{} is copying".format(to_split[index])): # 复制图片 src_img_path = os.path.join(IMGS_PATH, line.strip() + '.jpg') # import pdb; pdb.set_trace() dst_img_path = os.path.join(to_imgs_path, line.strip() + '.jpg') if os.path.exists(src_img_path): shutil.copyfile(src_img_path, dst_img_path) else: print("error file: {}".format(src_img_path)) if to_split[index] == 'train': train_file_txt = train_file_txt + dst_img_path + '\n' elif to_split[index] == 'val': val_file_txt = val_file_txt + dst_img_path + '\n' # 复制txt标注文件 src_txt_path = os.path.join(TXTS_PATH, line.strip() + '.xml') dst_txt_path = os.path.join(to_txts_path, line.strip() + '.xml') if os.path.exists(src_txt_path): shutil.copyfile(src_txt_path, dst_txt_path) else: print("error file: {}".format(src_txt_path)) with open(train_file, 'w') as out_train: out_train.write(train_file_txt)with open(val_file, 'w') as out_val: out_val.write(val_file_txt)

三、VOC格式转成YOLO格式首先创建images，labels用于存放图像和生成txt文件的标注文件，再分别创建train和val两个文件夹。
然后再创建images_train.txt和images_val.txt文件，使用后面的代码可以将图像路径保存到这两个文件中。

文章图片

同样的，images和labels都需要分train和val。

文章图片

文章图片

注意，下面的代码都需要运行两次，分别是train和val。
第一步：提取标注文件夹里面的文件名文件命名为：extrace.py

# P02 批量读取文件名（不带后缀）import osfile_path = "/data/xxx/faceDetection/data/wider_coco/xml_annotations/train/" path_list = os.listdir(file_path)# os.listdir(file)会历遍文件夹内的文件并返回一个列表 # print(path_list) path_name = []# 把文件列表写入save.txt中def saveList(pathName): for file_name in pathName: with open("/data/xxx/faceDetection/data/wider_coco/name_vtrain.txt", "a") as f: f.write(file_name.split(".")[0] + "\n")def dirList(path_list): for i in range(0, len(path_list)): path = os.path.join(file_path, path_list[i]) if os.path.isdir(path): saveList(os.listdir(path))dirList(path_list) saveList(path_list)

第二步：将xml格式转成yolo格式 【#|Widerface数据集 | widerface数据集转成YOLO格式】文件命名为：voc_label.py

# 缺陷坐标xml转txtimport xml.etree.ElementTree as ET import osclasses = ['face']# 输入缺陷名称，必须与xml标注名称一致train_file = '/data/ljj_data/faceDetection/data/wider_yolo/images_train.txt' train_file_txt = ''wd = os.getcwd()def convert(size, box): dw = 1. / size[0] dh = 1. / size[1] box = list(box) box[1] = min(box[1], size[0])# 限制目标的范围在图片尺寸内 box[3] = min(box[3], size[1]) x = ((box[0] + box[1]) / 2.0) * dw y = ((box[2] + box[3]) / 2.0) * dh w = (box[1] - box[0]) * dw h = (box[3] - box[2]) * dh return (x, y, w, h)def convert_annotation(image_id): in_file = open('/data/ljj_data/faceDetection/data/wider_coco/xml_annotations/train/%s.xml' % (image_id))# 读取xml文件路径out_file = open('/data/ljj_data/faceDetection/data/wider_yolo/labels_temp/train/%s.txt' % (image_id), 'w')# 需要保存的txt格式文件路径 tree = ET.parse(in_file) root = tree.getroot() size = root.find('size') w = int(size.find('width').text) h = int(size.find('height').text)for obj in root.iter('object'): cls = obj.find('name').text if cls not in classes:# 检索xml中的缺陷名称 continue cls_id = classes.index(cls)# if cls_id == 0 or cls_id == 11: #continue xmlbox = obj.find('bndbox') b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) bb = convert((w, h), b) out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')image_ids_train = open('/data/ljj_data/faceDetection/data/wider_coco/name_train.txt').read().strip().split()# 读取xml文件名索引for image_id in image_ids_train: convert_annotation(image_id)anns = os.listdir('/data/ljj_data/faceDetection/data/wider_coco/xml_annotations/train/') for ann in anns: ans = '' outpath = '/data/ljj_data/faceDetection/data/wider_yolo/labels_temp/train/' + ann if ann[-3:] != 'xml': continue train_file_txt = train_file_txt + '/data/ljj_data/faceDetection/data/wider_yolo/images_temp/train/' + ann[:-3] + 'jpg\n' # import pdb # pdb.set_trace()with open(train_file, 'w') as outfile: outfile.write(train_file_txt)

四、在转换过程中查看框是否正确 1. 查看VOC是否转换正确（文件命名为：xml_draw.py）

import os import os.path import xml.etree.cElementTree as ET import cv2 def draw(image_path, xml_path, root_saved_path): """ 图片根据标注画框 """ src_img_path = image_path src_ann_path = xml_path for file in os.listdir(src_ann_path): # print(file) file_name, suffix = os.path.splitext(file) # import pdb # pdb.set_trace() if suffix == '.xml': # print(file) xml_path = os.path.join(src_ann_path, file) image_path = os.path.join(src_img_path, file_name+'.jpg') img = cv2.imread(image_path) tree = ET.parse(xml_path) root = tree.getroot() # import pdb # pdb.set_trace() for obj in root.iter('object'): name = obj.find('name').text xml_box = obj.find('bndbox') x1 = int(xml_box.find('xmin').text) x2 = int(xml_box.find('xmax').text) y1 = int(xml_box.find('ymin').text) y2 = int(xml_box.find('ymax').text) cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), thickness=2) # 字为绿色 # cv2.putText(img, name, (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 0.7, (0, 255, 0), thickness=2) cv2.imwrite(os.path.join(root_saved_path, file_name+'.jpg'), img)if __name__ == '__main__': image_path = "/home/xx/faceDetection/data/wider_coco/images/val" xml_path = "/home/xx/faceDetection/data/wider_coco/xml_annotations/val" root_saved_path = "/home/xx/faceDetection/data/xml_output" draw(image_path, xml_path, root_saved_path)

2. 查看COCO是否转换正确（文件命名为：json_draw.py）

import cv2 # import pandas as pd import json import os # ground-truth def select(json_path, outpath, image_path): json_file = open(json_path) infos = json.load(json_file) images = infos["images"] annos = infos["annotations"] assert len(images) == len(images) # import pdb; pdb.set_trace() for i in range(len(images)): im_id = images[i]["id"] im_path = image_path + images[i]["file_name"] img = cv2.imread(im_path) for j in range(len(annos)): if annos[j]["image_id"] == im_id: x, y, w, h = annos[j]["bbox"] x, y, w, h = int(x), int(y), int(w), int(h) x2, y2 = x + w, y + h # object_name = annos[j][""] img = cv2.rectangle(img, (x, y), (x2, y2), (0, 255, 0), thickness=1) img_name = outpath + images[i]["file_name"] # import pdb; pdb.set_trace() cv2.imwrite(img_name, img) # continue # print(i) print("Done!")# predict # def select(json_path, outpath, image_path): #json_file = open(json_path) #infos = json.load(json_file) #for i in range(len(infos)): #im_id = infos[i]["image_id"] #im_path = image_path + str(infos[i]["image_id"]) + '.jpg' ## import pdb; pdb.set_trace() #img_name = outpath + str(infos[i]["image_id"]) + '.jpg' #score = str(infos[i]["score"]) #if not os.path.exists(img_name): #img = cv2.imread(im_path) #else: #img = cv2.imread(img_name) ## if float(score) < 0.5: ##continue ## else: #x, y, w, h = infos[i]["bbox"] #x, y, w, h = int(x), int(y), int(w), int(h) #x2, y2 = x + w, y + h #c_x, c_y = int((x + x2) / 2), int((y + y2) / 2) #cla = str(infos[i]["category_id"]) ## import pdb; pdb.set_trace() ## img = cv2.rectangle(img, (x, y), (x2, y2), (0, 255, 255), thickness=2) #if float(score) <= 0.3: #cv2.circle(img, (c_x, c_y), 5, (0,0,int(255*float(score))), -1) # red #continue #elif float(score) > 0.3 and float(score) <= 0.6: #cv2.circle(img, (c_x, c_y), 5, (int(255*float(score)),255,0), -1)# green #elif float(score) > 0.6: #cv2.circle(img, (c_x, c_y), 5, (0,int(255*float(score)),255), -1)# yellow ## cv2.rectangle(img, (x, y), (x2, y2), (0, 0, 255), thickness=2) ## cv2.putText(img, score, (x, y + 5), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) ## cv2.putText(img, cla,(c_x, c_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2) #img_name = outpath + str(infos[i]["image_id"]) + '.jpg' ## import pdb; pdb.set_trace() #cv2.imwrite(img_name, img) #print("Done!")if __name__ == "__main__": json_path = "/home/xx/faceDetection/data/wider_coco/annotations/val.json" out_path = "/home/xx/faceDetection/data/json_output/" image_path = "/home/xx/faceDetection/data/wider_coco/images/val/" select(json_path, out_path, image_path)