python|opencv-python 入门实战(传统方法Hog+svm实现目标检测)

前言 很多初学opencv的朋友,可能会茫无头绪,不知道怎么和python搭配,做一些简单的图像处理工作。在这里分享一个自己做过的一个小课题:公交上下车流量检测中的一部分——人头检测。
关于Hog特征和svm原理的相关讲解博客,前人已经做的相当完备了。我这里就不再讲述原理,直接就我之前编的代码进行详解,帮助一些初学者快速了解opencv的常用函数,以及如何使用python,编写一些并实现的目标检测。
【python|opencv-python 入门实战(传统方法Hog+svm实现目标检测)】大佬到这就可以结束了
在看代码之前,推荐点击:这是我针对我写的代码,总结的算法思路
程序实现 1 train

import cv2 import numpy as np import random import xml.dom.minidom from non_maximum import non_max_suppression_fast as nms def gamma_trans(img,gamma):#伽马变换 gamma_table = [np.power(x/255.0,gamma)*255.0 for x in range(256)] gamma_table = np.round(np.array(gamma_table)).astype(np.uint8) return cv2.LUT(img,gamma_table) def load_images(dirname,size):#加载图片 img_list = [] for i in range(size): path = dirname + str(i+1) +'.jpg'#图片存放路径 img = cv2.imread(path)#调用opencv的imread函数读取图片,得到一个通道为三的数组 img_list.append(img) path = dirname return img_list def extract_images(path,img_list,size,wsize=(80,80)):#提取目标区域,构成正样本(我利用vott软件将带有人 头的图片标记出来,并导出xml格式的文件,这个函数用于读取图片上的人头区域并构成待检测列表) extract_img = [] for i in range(size): path1 = path + str(i+1) +'.xml' doc = xml.dom.minidom.parse(path1) root = doc.documentElement#xml文件的python读取方式 xminnode = root.getElementsByTagName("xmin") xmaxnode = root.getElementsByTagName("xmax") ymaxnode = root.getElementsByTagName("ymax") yminnode = root.getElementsByTagName("ymin") xmin = int(float(xminnode[0].childNodes[0].nodeValue)) xmax = int(float(xmaxnode[0].childNodes[0].nodeValue)) ymin = int(float(yminnode[0].childNodes[0].nodeValue)) ymax = int(float(ymaxnode[0].childNodes[0].nodeValue)) roi = img_list[i][ymin:ymin+wsize[1],xmin:xmin+wsize[0]]#取目标区域 if roi.shape[1] != 80 or roi.shape[0] != 80: continue extract_img.append(roi) return extract_img def extract_neg_img(dirname,extract_neg,wsize=(80,80)):#基于没有人头的图片,随机生成负样本 x = 10 xmin,xmax,ymin,ymax = 0,0,0,0 for i in range(15): path = dirname + str(i+1)+'.jpg' img = cv2.imread(path) path = dirname for j in range(x): xmin = random.randint(1,288-wsize[0]) ymin = random.randint(1,352-wsize[1]) xmax = xmin + wsize[0] ymax = ymin + wsize[1] roi = img[xmin:xmax,ymin:ymax] extract_neg.append(roi) return extract_neg def computeHOGs(img_list,gradient_list,wsize=(80,80)):#计算roi的hog特征 hog = cv2.HOGDescriptor((80,80),(40,40),(8,8),(8,8),9) for i in range(len(img_list)): img = img_list[i] gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)#灰度化 gray = cv2.equalizeHist(gray)#直方图均衡化,防止有些图片太黑或者太亮 gray = gamma_trans(gray,0.8) gradient_list.append(hog.compute(gray)) return gradient_list def sliding_window(image, stepSize, windowSize):#滑动窗口,用于检测时取区域 for y in range(0, image.shape[0], stepSize): for x in range(0, image.shape[1], stepSize): yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])#哪个维度超纲,哪个维度就显示原图 def resize(img, scaleFactor):#重新定义大小,保证输入计算hog特征的图片时一样大的,不然特征向量维度不一样,无法分类 return cv2.resize(img, (int(img.shape[1] * (1 / scaleFactor)), int(img.shape[0] * (1 / scaleFactor))), interpolation=cv2.INTER_AREA)#读取HOG特征 neg_list = [] pos_list = [] gradient_list = [] labels = [] path1 = 'E:\\data\\pos\\' path2 = 'E:\\data\\neg\\' path_pos = 'E:\\data\\post\\train-PascalVOC-export\\Annotations\\' path_neg = 'E:\\data\\negtive\\negtive-PascalVOC-export\\Annotations\\' pos_list = load_images(path1,333) pos_list = extract_images(path_pos,pos_list,333,wsize=(80,80)) neg_list = load_images(path2,193) neg_list = extract_images(path_neg,neg_list,193,wsize=(80,80)) neg_list = extract_neg_img(path2,neg_list,wsize=(80,80)) #neg_list = extract_neg_img(path2,wsize=(80,80)) computeHOGs(pos_list,gradient_list) for _ in range(len(pos_list)): labels.append(+1) computeHOGs(neg_list,gradient_list) for _ in range(len(neg_list)): labels.append(-1) #训练svm svm = cv2.ml.SVM_create() svm.setType(cv2.ml.SVM_C_SVC) svm.setGamma(0.001) svm.setC(30) svm.setKernel(cv2.ml.SVM_RBF) svm.train(np.array(gradient_list), cv2.ml.ROW_SAMPLE, np.array(labels)) svm.save("svm.xml")

2 test
svm2 = cv2.ml.SVM_load("svm.xml")#加载之前保存的支持向量 videoCapture = cv2.VideoCapture('52.avi')#opencv 的视频类,为了操作视频 videoCapture.set(cv2.CAP_PROP_POS_FRAMES,4203)#设置当前帧为4203,下一步读取时即为此帧 success,img = videoCapture.read() rectangles = [] counter = 0 scale = 1 w,h = 80,80 font = cv2.FONT_HERSHEY_PLAIN hog = cv2.HOGDescriptor((80,80),(40,40),(8,8),(8,8),9) for (x, y, roi) in sliding_window(img, 10, (80, 80)):#对得到的图进行滑动窗口,取目标区域用于检测(100, 40)为窗口大小,本文应取(80,80)if roi.shape[1] != w or roi.shape[0] != h:#判断是否超纲 continue gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)#需要和训练时一样,进行相同的数据预处理 gray = cv2.equalizeHist(gray) gray = gamma_trans(gray,0.8) test_gradient = hog.compute(gray) _, result = svm.predict(np.array([test_gradient]))#svm预测,下一个函数给出预测的置信度,负的越大置信度越高 a, res = svm.predict(np.array([test_gradient]), flags=cv2.ml.STAT_MODEL_RAW_OUTPUT) score = res[0][0] if result[0][0] == 1: if score < -1: print(score) rx, ry, rx2, ry2 = int(x * scale), int(y * scale), int((x+w) * scale), int((y+h) * scale) rectangles.append([rx, ry, rx2, ry2, score]) counter += 1 print(counter) windows = np.array(rectangles) boxes = nms(windows,0.5) print(len(boxes)) for (x, y, x2, y2, score) in boxes:#画出检测到的人头 cv2.rectangle(img, (int(x),int(y)),(int(x2), int(y2)),(0, 255, 0), 1) cv2.putText(img, "%f" % score, (int(x),int(y)), font, 1, (0, 255, 0)) cv2.imshow("img", img)#显示图像,常与下一函数连用,否则图片一闪而过 cv2.waitKey(0)

3 非最大值抑制
滑动串口因为步长不能太大,有些窗口相邻可能同时检测到人头,但其实是同一个人头。此时我们就根据svm的预测分值,取较大的作为检测结果
import numpy as np def area(box): return (abs(box[2] - box[0])) * (abs(box[3] - box[1]))def overlaps(a, b, thresh=0.5): x1 = np.maximum(a[0], b[0]) x2 = np.minimum(a[2], b[2]) y1 = np.maximum(a[1], b[1]) y2 = np.minimum(a[3], b[3]) intersect = float(area([x1, y1, x2, y2])) return intersect / 6400 >= thresh# Malisiewicz et al. def non_max_suppression_fast(boxes, overlapThresh = 0.5):(重叠面积超过50%,默认是一个人头,进行非最大抑制) # if there are no boxes, return an empty list if len(boxes) == 0: return []scores = boxes[:,4] score_idx = np.argsort(scores)#返回scores的从小到大排序的索引值 to_delete = [] while len(score_idx) > 0: box = score_idx[0] for s in score_idx: if s == score_idx[0]: #j=j+1 continue if (overlaps(boxes[s], boxes[box], overlapThresh)): to_delete.append(s) a = np.where(score_idx == s)#这里用到了numpy相关函数,初学者可以查一下用法 score_idx = np.delete(score_idx,a) #j=j+1 score_idx = np.delete(score_idx,0) boxes = np.delete(boxes,to_delete,axis=0) return boxes

补充说明 完整的代码链接:https://github.com/hunting777/hog-svm.git
我自己做了人头检测的数据集,若需要:https://zhuanlan.zhihu.com/p/114431564

    推荐阅读