h264,265实时视频流解码及人脸追踪的实现

冲天香阵透长安,满城尽带黄金甲。这篇文章主要讲述h264,265实时视频流解码及人脸追踪的实现相关的知识,希望能为你提供帮助。
以下为本人实际工作中经验所得分享,
日常项目中涉及到实时视频流播放,大都会选择flvJs,后者videoJs。而由于这两款无法满足实际需求并且无法解码h265视频
流,所以在后端C++的配合下,一起写了一套自用的视频流播放器,视频解码使用的是libffmpeg,找不到资源的可以私信我,
原理就是利用wasm编写c++代码使用ffmpeg进行视频解码。由于算法解码压力,解码动作在浏览器完成对电脑及带宽都有一定要求。
【h264,265实时视频流解码及人脸追踪的实现】视频播放我们采用了两种方式来实现,可以通过配置设置,一种是使用video播放mediaSource的流媒体,一种是使用webgl绘
制画面。mediaSource支持播放流媒体片段,这样播放器无需等待所有视频资源全都下载完再播放,可以不断的向播放器喂视
频流片段。
定义websockt连接类,管理信令及数据交互,分发事件

export default class WSReader extends Event constructor(url) super(WSReader); this.TAG = [WSReader]; this.ws = new WebSocket(url); this.ws.binaryType = arraybuffer; this.ws.onopen = this._onWebSocketOpen.bind(this); this.ws.onerror = this._onWebSocketError.bind(this); this.ws.onmessage = this._onWebSocketMessage.bind(this); this.ws.onclose = this._onWebSocketClose.bind(this); this.wsMethods = open: open, // 请求mime play: play, // 请求推流 pause: pause, // 停止推流 close: close, // 关闭ws连接 slow: slow, // 请求降低推流频率 fast: fast, // 请求提高推流频率 complete: complete, // 视频流已经全部发送完。 ; this.seq = 1; this.sendRate = 1; // 视频流传输频率,用来调整播放速度 this.isFullPause = false;

// 处理websocket message _onWebSocketMessage(ev) let data = https://www.songbingjia.com/android/ev; if (data instanceof ArrayBuffer) this.dispatch(VideoEvents.VIDEO_EVENT, data); else this.wsMessageHandle(data);

// 根据ws主体内容来分配事件 wsMessageHandle(data) let mes = JSON.parse(data); switch (mes.method) case open: debug.log(`get mime $mes.mime`); this.openHandle(mes) break; case play: debug.log(`ws play signal`); this.playHandle(mes) break; case pause: debug.log(`ws pause signal`); this.pauseHandle(mes) break; case close: debug.log(`ws close signal`); this.closeHandle() break; case slow: debug.log(`ws slow signal`); this.speedHandle(mes) break; case fast: debug.log(`ws fast signal`); this.speedHandle(mes) break; case complete: debug.log(`ws complete signal`); this.completeHandle() break; case mediaChange: debug.log(`ws mediaChange signal`); this.mediaChangeHandle(mes) break;

// 获取到MIME _onWsGetMime(data) if (data.ret == 0) this.videoInfo.mime = data.mime; if (this.options.renderType === webgl) // 获取到mime,根据mime判断为h264还是h265,初始化decoder this.decoder.init(data.mime.indexOf(avc) !== -1 ? 0 : 1); this.createBuffer(); // 成功获取到mime, 接下来发送play指令。 this.videoReader.play(); else debug.log(this.TAG, `get mime type failed`); this.dispatch(HSPlayer.Events.SERVER_PLAY_ERR, msg: get mime type failed)

定义对外基础类,包含播放器初始化,基础属性配置(倍速列表,缓冲区大小,播放类型等),播放器状态监听及异常分发。处理视频流数据
export default class HSPlayer extends Event // 向外开放的监听事件类型。 static get Events() return // 请求播放视频失败 SERVER_PLAY_ERR: SERVER_PLAY_ERR, // 请求暂停视频失败 SERVER_PAUSE_ERR: SERVER_PAUSE_ERR, // 请求变速失败 SERVER_SPEED_ERR: SERVER_SPEED_ERR, // 服务器的连接出现错误 SERVER_NET_ERR: SERVER_NET_ERR, // 由于网络异常导致到连接中断 ABNORMAL_DISCONNECT: ABNORMAL_DISCONNECT, // 浏览器不支持当前视频的格式 CHROME_CODEC_UNSUPPORT: CHROME_CODEC_UNSUPPORT, // 视频有缺失或被污染 VIDEO_STREAM_INCORRECT: VIDEO_STREAM_INCORRECT, // 实时视频流缓冲太长,需要seek到最新点 VIDEO_LIVE_STREAM_TOO_LOOG: VIDEO_LIVE_STREAM_TOO_LOOG, // 通知前端播放成功 VIDEO_PLAY_SUCESS: VIDEO_PLAY_SUCESS, ; static isSupported(mimeCode) return (window.MediaSource & & window.MediaSource.isTypeSupported(mimeCode)); constructor(options) super(HSPlayer); this.TAG = [HSPlayer]; let defaults = node: , // video 节点 cacheBufferTime: 60, // 回放最大缓存时长 单位秒 cacheBufferMinTime: 30, // 回放缓存小于cacheBufferMinTime时,重新获取流 cleanOffset: 0.8, // 清除buf时剩余的时长,单位秒 debug: false, // 是否打印出控制台信息 delayPlay: 0, // 获取实时视频流可以设置延时播放,单位ms type: live, // live 直播, playback 回放 wsUrl: null, // websocket 地址,目前项目信令跟视频流都用同一个地址 flushTime: 3 * 1000, // 清空buffer的间隔,用于直播 drawArInfo: false, // 是否需要画ar信息 renderType: null, // 如果是 webgl,则使用本地解码。 ;

// _onWsVideoBuffer /* * int8_t version; int16_t headLen; int8_t frameNum; int8_t type; int8_t codec; int32_t beginTimeStampSec; int32_t beginTimeStampMs; int32_t EndTimeStampSec; int32_t EndTimeStampMs; * */ _onWsVideoBuffer(originData) // 判断是否有头信息 let headMagic = new Uint8Array(originData.slice(0, 4)); // 获取头部长度 let hAr, hLen = 0; if ( headMagic[0] == 117 & & headMagic[1] == 109 & & headMagic[2] == 120 & & headMagic[3] == 115 ) hAr = new Uint8Array(originData.slice(5, 8)); hLen = (hAr[0] < < 8) + hAr[1]; if (!this.firstFrameTime & & originData) let hBuffer = new Uint8Array(originData.slice(0, hLen)); // 前6个字节是version, headLen, type....等 // 后8个字节是帧结束时间,暂时不用 let sec = (hBuffer[10] < < 24) + (hBuffer[11] < < 16) + (hBuffer[12] < < 8) + hBuffer[13]; let ms = (hBuffer[14] < < 24) + (hBuffer[15] < < 16) + (hBuffer[16] < < 8) + hBuffer[17]; this.firstFrameTime = sec * 1000 + ms; let data = https://www.songbingjia.com/android/originData.slice(hLen); if (this.options.renderType === webgl) // ar数据 let arLenBuf = new Uint8Array(originData.slice(27, 29)); let arLen = (arLenBuf[0] < < 8) + arLenBuf[1]; if (arLen) let arTarget = new Uint8Array(originData.slice(29, arLen + 29)); let arJson = ; arTarget.forEach(x => arJson += String.fromCharCode(x); ) let targetObj = JSON.parse(arJson); if ( targetObj.arInfo & & targetObj.arInfo.objList ) this.decoder.feed(data, targetObj.arInfo.objList); else this.decoder.feed(data, null); else this.decoder.feed(data, null); return false; // 如果是回放则把没有播放的buffer放入pendingBufs。直播则直接遗弃 if (this.options.type != live) while (this.pendingBufs.length > 0 & & this.bufferController) let buf = this.pendingBufs.shift(); this.bufferController.feed(buf); if(this.bufferController) this.bufferController.feed(data); else this.pendingBufs.push(data); else if(this.bufferController) this.bufferController.feed(data);

定义h264,h265解码类,因为此webgl模式下是由前端负责解码工作并绘制,底层只负责推送裸流,如果使用mediaSource模式则是c++将视频解码之后再推送过来。所以需要使用Decoder类,主要负责视频流队列管理,ar队列管理,定时解码,发布解码之后的数据
export default class Decoder constructor(node) this.queue = []; // 队列 this.arIndex = -1; this.arQueue = ; // ar信息队列 this.LOG_LEVEL_FFMPEG = 2; this.LOG_LEVEL_JS = 0; this.LOG_LEVEL_WASM = 1; this.node = node; // 视频画面画布 canvas = document.createElement(canvas); canvas.width = node.clientWidth; canvas.setAttribute( style, `width: 100%; height: auto; position: absolute; left: 0; top:0; ` ); // ar信息画布 arCanvas = document.createElement(canvas); arCanvas.width = node.clientWidth; arCanvas.setAttribute( style, `width: 100%; height: auto; position: absolute; left: 0; top:0; ` ); node.parentNode.appendChild(canvas); node.parentNode.appendChild(arCanvas); feed(buffer, ar) this.arIndex++; this.queue.push(buffer); if (ar) this.arQueue[this.arIndex] = ar;

init(decoderType) videoCallback = Module.addFunction((addr_y, addr_u, addr_v, stride_y, stride_u, stride_v, width, height, pts) => // console.log("[%d]In video callback, size = %d * %d, pts = %d", ++videoSize, width, height, pts) let size = width * height + (width / 2)* (height / 2) + (width / 2)* (height / 2); let data = new Uint8Array(size); let pos = 0; for(let i=0; i< height; i++) let src = addr_y + i * stride_y let tmp = HEAPU8.subarray(src, src + width) tmp = new Uint8Array(tmp) data.set(tmp, pos) pos += tmp.lengthfor(let i=0; i< height / 2; i++) let src = addr_u + i * stride_u let tmp = HEAPU8.subarray(src, src + width / 2) tmp = new Uint8Array(tmp) data.set(tmp, pos) pos += tmp.lengthfor(let i=0; i< height / 2; i++) let src = addr_v + i * stride_v let tmp = HEAPU8.subarray(src, src + width / 2) tmp = new Uint8Array(tmp) data.set(tmp, pos) pos += tmp.lengthvar obj = data: data, width, heightthis.displayVideoFrame(obj); this.displayVideoAr(pts, width, height); ); var ret = Module._openDecoder(decoderType, videoCallback, this.LOG_LEVEL_WASM) if(ret == 0) console.log("openDecoder success"); else console.error("openDecoder failed with error", ret); return; var pts = 0; // 定时解码 setInterval(() => const data = https://www.songbingjia.com/android/this.queue.shift(); if (data) const typedArray = new Uint8Array(data); const size = typedArray.length; var cacheBuffer = Module._malloc(size); Module.HEAPU8.set(typedArray, cacheBuffer); Module._decodeData(cacheBuffer, size, pts++) if (cacheBuffer != null) Module._free(cacheBuffer); cacheBuffer = null; // if(size < CHUNK_SIZE) //console.log(Flush frame data) //Module._flushDecoder(); //Module._closeDecoder(); // , 1)

displayVideoFrame(obj) var data = https://www.songbingjia.com/android/new Uint8Array(obj.data); var width = obj.width; var height = obj.height; var yLength = width * height; var uvLength = (width / 2) * (height / 2); if(!glPlayer) canvas.height = (canvas.width / width) * height; arCanvas.height = (canvas.width / width) * height; glPlayer = new WebGLPlayer(canvas, preserveDrawingBuffer: false , arCanvas); glPlayer.renderFrame(data, width, height, yLength, uvLength); displayVideoAr(pts, width, height) if (!glPlayer) return; let target = this.arQueue[pts]; if (target) delete this.arQueue[pts]; glPlayer.renderAR(target, width, height);

最后就是webgl渲染类,主要负责处理解码之后的Yuv数据跟ar数据进行绘制,当然另外再抽离了一个webgl用的texture类,这里就不列出来了。就是标准的webgl纹理处理
export default class WebGLPlayer constructor(canvas, options, arCanvas) this.canvas = canvas; this.gl = canvas.getContext("webgl") || canvas.getContext("experimental-webgl"); this.ctx = arCanvas.getContext("2d") this.initGL(options); initGL(options) if (!this.gl) console.log("[ER] WebGL not supported."); return; var gl = this.gl; gl.pixelStorei(gl.UNPACK_ALIGNMENT, 1); var program = gl.createProgram(); var vertexShaderSource = [ "attribute highp vec4 aVertexPosition; ", "attribute vec2 aTextureCoord; ", "varying highp vec2 vTextureCoord; ", "void main(void) ", " gl_Position = aVertexPosition; ", " vTextureCoord = aTextureCoord; ", "" ].join("\\n"); var vertexShader = gl.createShader(gl.VERTEX_SHADER); gl.shaderSource(vertexShader, vertexShaderSource); gl.compileShader(vertexShader); var fragmentShaderSource = [ "precision highp float; ", "varying lowp vec2 vTextureCoord; ", "uniform sampler2D YTexture; ", "uniform sampler2D UTexture; ", "uniform sampler2D VTexture; ", "const mat4 YUV2RGB = mat4", "(", " 1.1643828125, 0, 1.59602734375, -.87078515625,", " 1.1643828125, -.39176171875, -.81296875, .52959375,", " 1.1643828125, 2.017234375, 0, -1.081390625,", " 0, 0, 0, 1", "); ", "void main(void) ", " gl_FragColor = vec4( texture2D(YTexture, vTextureCoord).x, texture2D(UTexture, vTextureCoord).x, texture2D(VTexture, vTextureCoord).x, 1) * YUV2RGB; ", "" ].join("\\n"); var fragmentShader = gl.createShader(gl.FRAGMENT_SHADER); gl.shaderSource(fragmentShader, fragmentShaderSource); gl.compileShader(fragmentShader); gl.attachShader(program, vertexShader); gl.attachShader(program, fragmentShader); gl.linkProgram(program); gl.useProgram(program); if (!gl.getProgramParameter(program, gl.LINK_STATUS)) console.log("[ER] Shader link failed."); var vertexPositionAttribute = gl.getAttribLocation(program, "aVertexPosition"); gl.enableVertexAttribArray(vertexPositionAttribute); var textureCoordAttribute = gl.getAttribLocation(program, "aTextureCoord"); gl.enableVertexAttribArray(textureCoordAttribute); var verticesBuffer = gl.createBuffer(); gl.bindBuffer(gl.ARRAY_BUFFER, verticesBuffer); gl.bufferData(gl.ARRAY_BUFFER, new Float32Array([1.0, 1.0, 0.0, -1.0, 1.0, 0.0, 1.0, -1.0, 0.0, -1.0, -1.0, 0.0]), gl.STATIC_DRAW); gl.vertexAttribPointer(vertexPositionAttribute, 3, gl.FLOAT, false, 0, 0); var texCoordBuffer = gl.createBuffer(); gl.bindBuffer(gl.ARRAY_BUFFER, texCoordBuffer); gl.bufferData(gl.ARRAY_BUFFER, new Float32Array([1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0]), gl.STATIC_DRAW); gl.vertexAttribPointer(textureCoordAttribute, 2, gl.FLOAT, false, 0, 0); gl.y = new Texture(gl); gl.u = new Texture(gl); gl.v = new Texture(gl); gl.y.bind(0, program, "YTexture"); gl.u.bind(1, program, "UTexture"); gl.v.bind(2, program, "VTexture"); ;

renderFrame(videoFrame, width, height, uOffset, vOffset) if (!this.gl) console.log("[ER] Render frame failed due to WebGL not supported."); return; var gl = this.gl; gl.viewport(0, 0, gl.canvas.width, gl.canvas.height); gl.clearColor(0.0, 0.0, 0.0, 0.0); gl.clear(gl.COLOR_BUFFER_BIT); // 清空ar画布 this.ctx.clearRect(0, 0, gl.canvas.width, gl.canvas.height); gl.y.fill(width, height, videoFrame.subarray(0, uOffset)); gl.u.fill(width > > 1, height > > 1, videoFrame.subarray(uOffset, uOffset + vOffset)); gl.v.fill(width > > 1, height > > 1, videoFrame.subarray(uOffset + vOffset, videoFrame.length)); gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4); ; renderAR(arr, width, height) var gl = this.gl; arr.forEach( obj => const x = (gl.canvas.width / width) * obj.objRect.left; const y = (gl.canvas.height / height) * obj.objRect.top; const w = (gl.canvas.width / width) * (obj.objRect.right - obj.objRect.left); const h = (gl.canvas.height / height) * (obj.objRect.bottom - obj.objRect.top); const c = this.ctx;

注意细节
  1. 如果是回放则需要把没有播放完的片段保留在队列,直播则直接舍弃seek到最新的点
  2. mediaSource的sourceBuffer.mode记得设置为sequence,此配置意味着video将按照buffer队列依次播放,不会根据buffer的时间戳来播放。

    推荐阅读