|
13 | 13 | body { |
14 | 14 | background-color: #666666; |
15 | 15 | } |
| 16 | + canvas { |
| 17 | + position: absolute; |
| 18 | + inset: 0; |
| 19 | + margin: auto; |
| 20 | + } |
16 | 21 | </style> |
17 | 22 | </head> |
18 | 23 | <body> |
|
35 | 40 |
|
36 | 41 | import * as THREE from 'three'; |
37 | 42 |
|
38 | | - import { OrbitControls } from 'three/addons/controls/OrbitControls.js'; |
39 | | - |
40 | 43 | import { GLTFLoader } from 'three/addons/loaders/GLTFLoader.js'; |
41 | 44 | import { KTX2Loader } from 'three/addons/loaders/KTX2Loader.js'; |
42 | 45 | import { MeshoptDecoder } from 'three/addons/libs/meshopt_decoder.module.js'; |
|
45 | 48 |
|
46 | 49 | // Mediapipe |
47 | 50 |
|
48 | | - import vision from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.35'; |
49 | | - |
50 | | - const { FaceLandmarker, FilesetResolver } = vision; |
| 51 | + import { FaceLandmarker, FilesetResolver } from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.35'; |
51 | 52 |
|
52 | 53 | const blendshapesMap = { |
53 | 54 | // '_neutral': '', |
|
105 | 106 | // '': 'tongueOut' |
106 | 107 | }; |
107 | 108 |
|
| 109 | + // MediaPipe returns the head pose in a metric 3D space that assumes a |
| 110 | + // fixed virtual camera: right-handed, at the origin, looking down -Z, with |
| 111 | + // units in centimeters and a vertical field of view of 63 degrees. The |
| 112 | + // camera, the video plane and the model all have to share that frame for |
| 113 | + // the rendered face to register with the webcam image. |
| 114 | + |
| 115 | + const MP_FOV = 63; // vertical field of view, in degrees |
| 116 | + const MP_NEAR = 1; // 1 cm |
| 117 | + const MP_FAR = 10000; // 100 m |
| 118 | + |
| 119 | + const VIDEO_DISTANCE = 100; // depth of the video plane, in cm |
| 120 | + |
108 | 121 | // |
109 | 122 |
|
110 | 123 | const renderer = new THREE.WebGLRenderer( { antialias: true } ); |
|
113 | 126 | renderer.toneMapping = THREE.ACESFilmicToneMapping; |
114 | 127 | document.body.appendChild( renderer.domElement ); |
115 | 128 |
|
116 | | - const camera = new THREE.PerspectiveCamera( 60, window.innerWidth / window.innerHeight, 1, 100 ); |
117 | | - camera.position.z = 5; |
| 129 | + // The render camera matches MediaPipe's virtual camera: at the origin, |
| 130 | + // looking down -Z. It must not be moved, otherwise the overlay drifts. Its |
| 131 | + // aspect switches to the video's once the webcam is running. |
| 132 | + const camera = new THREE.PerspectiveCamera( MP_FOV, window.innerWidth / window.innerHeight, MP_NEAR, MP_FAR ); |
118 | 133 |
|
119 | 134 | const scene = new THREE.Scene(); |
120 | 135 | scene.background = new THREE.Color( 0x666666 ); |
121 | | - scene.scale.x = - 1; |
| 136 | + scene.scale.x = - 1; // mirror the whole scene for a selfie view ( flips video and pose together ) |
122 | 137 |
|
123 | 138 | scene.add( new THREE.AmbientLight( 0xffffff, 5 ) ); |
124 | 139 |
|
125 | | - const controls = new OrbitControls( camera, renderer.domElement ); |
126 | | - |
127 | 140 | // Face |
128 | 141 |
|
129 | 142 | let face, eyeL, eyeR; |
130 | 143 | const eyeRotationLimit = THREE.MathUtils.degToRad( 30 ); |
131 | 144 |
|
| 145 | + // MediaPipe's facial transformation matrix is copied here verbatim. Until |
| 146 | + // the webcam delivers one, the face rests at a default frontal pose ( in |
| 147 | + // front of the camera, in centimeters ) so it is framed before tracking. |
| 148 | + const faceContainer = new THREE.Object3D(); |
| 149 | + faceContainer.matrixAutoUpdate = false; |
| 150 | + faceContainer.matrix.makeTranslation( 0, 0, - 50 ); |
| 151 | + faceContainer.matrixWorldNeedsUpdate = true; |
| 152 | + scene.add( faceContainer ); |
| 153 | + |
| 154 | + // The Face Cap model is not MediaPipe's canonical face mesh, so this fixed |
| 155 | + // transform registers it into the canonical frame ( centimeters, +Y up, |
| 156 | + // +Z out of the face ) before the pose matrix is applied. The values are |
| 157 | + // derived from the model's eye positions. |
| 158 | + const registration = new THREE.Object3D(); |
| 159 | + registration.scale.setScalar( 0.958 ); |
| 160 | + registration.rotation.x = Math.PI / 2; |
| 161 | + registration.position.set( 0, 0.12, 1.18 ); |
| 162 | + faceContainer.add( registration ); |
| 163 | + |
132 | 164 | const ktx2Loader = new KTX2Loader() |
133 | 165 | .detectSupport( renderer ); |
134 | 166 |
|
|
137 | 169 | .setMeshoptDecoder( MeshoptDecoder ) |
138 | 170 | .load( 'models/gltf/facecap.glb', ( gltf ) => { |
139 | 171 |
|
140 | | - const mesh = gltf.scene.children[ 0 ]; |
141 | | - scene.add( mesh ); |
| 172 | + // Reparent the head/eyes/teeth and drop the model's own scale rig. |
| 173 | + const group = gltf.scene.getObjectByName( 'grp_transform' ); |
| 174 | + registration.add( group ); |
142 | 175 |
|
143 | | - const head = mesh.getObjectByName( 'mesh_2' ); |
| 176 | + const head = group.getObjectByName( 'mesh_2' ); |
144 | 177 | head.material = new THREE.MeshNormalMaterial(); |
145 | 178 |
|
146 | | - const teeth = mesh.getObjectByName( 'mesh_3' ); |
| 179 | + const teeth = group.getObjectByName( 'mesh_3' ); |
147 | 180 | teeth.material = new THREE.MeshNormalMaterial(); |
148 | 181 |
|
149 | | - face = mesh.getObjectByName( 'mesh_2' ); |
150 | | - eyeL = mesh.getObjectByName( 'eyeLeft' ); |
151 | | - eyeR = mesh.getObjectByName( 'eyeRight' ); |
| 182 | + face = head; |
| 183 | + eyeL = group.getObjectByName( 'eyeLeft' ); |
| 184 | + eyeR = group.getObjectByName( 'eyeRight' ); |
152 | 185 |
|
153 | 186 | // GUI |
154 | 187 |
|
|
177 | 210 | texture.colorSpace = THREE.SRGBColorSpace; |
178 | 211 |
|
179 | 212 | const geometry = new THREE.PlaneGeometry( 1, 1 ); |
180 | | - const material = new THREE.MeshBasicMaterial( { map: texture, depthWrite: false } ); |
| 213 | + const material = new THREE.MeshBasicMaterial( { map: texture, depthTest: false, depthWrite: false } ); |
181 | 214 | const videomesh = new THREE.Mesh( geometry, material ); |
| 215 | + videomesh.position.z = - VIDEO_DISTANCE; |
| 216 | + videomesh.renderOrder = - 1; |
182 | 217 | scene.add( videomesh ); |
183 | 218 |
|
184 | 219 | // MediaPipe |
|
209 | 244 | } ) |
210 | 245 | .catch( function ( error ) { |
211 | 246 |
|
212 | | - console.error( 'Unable to access the camera/webcam.', error ); |
| 247 | + console.warn( 'Unable to access the camera/webcam.', error ); |
213 | 248 |
|
214 | 249 | } ); |
215 | 250 |
|
216 | 251 | } |
217 | 252 |
|
218 | | - const transform = new THREE.Object3D(); |
| 253 | + // The camera matches the video aspect; the canvas is sized to that aspect |
| 254 | + // and centered, so the grey body shows through as letterbox/pillarbox bars. |
219 | 255 |
|
220 | | - function animate() { |
| 256 | + video.addEventListener( 'loadedmetadata', function () { |
221 | 257 |
|
222 | | - if ( video.readyState >= HTMLMediaElement.HAVE_METADATA ) { |
| 258 | + const aspect = video.videoWidth / video.videoHeight; |
223 | 259 |
|
224 | | - const results = faceLandmarker.detectForVideo( video, Date.now() ); |
| 260 | + camera.aspect = aspect; |
| 261 | + camera.updateProjectionMatrix(); |
225 | 262 |
|
226 | | - if ( results.facialTransformationMatrixes.length > 0 ) { |
| 263 | + // Size the plane so it exactly fills the frustum at its depth. |
| 264 | + const height = 2 * VIDEO_DISTANCE * Math.tan( THREE.MathUtils.degToRad( MP_FOV / 2 ) ); |
| 265 | + videomesh.scale.set( height * aspect, height, 1 ); |
227 | 266 |
|
228 | | - const facialTransformationMatrixes = results.facialTransformationMatrixes[ 0 ].data; |
| 267 | + resize(); |
229 | 268 |
|
230 | | - transform.matrix.fromArray( facialTransformationMatrixes ); |
231 | | - transform.matrix.decompose( transform.position, transform.quaternion, transform.scale ); |
| 269 | + } ); |
232 | 270 |
|
233 | | - const object = scene.getObjectByName( 'grp_transform' ); |
| 271 | + function animate() { |
234 | 272 |
|
235 | | - object.position.x = transform.position.x; |
236 | | - object.position.y = transform.position.z + 40; |
237 | | - object.position.z = - transform.position.y; |
| 273 | + if ( video.readyState >= HTMLMediaElement.HAVE_METADATA ) { |
238 | 274 |
|
239 | | - object.rotation.x = transform.rotation.x; |
240 | | - object.rotation.y = transform.rotation.z; |
241 | | - object.rotation.z = - transform.rotation.y; |
| 275 | + const results = faceLandmarker.detectForVideo( video, Date.now() ); |
| 276 | + |
| 277 | + if ( results.facialTransformationMatrixes.length > 0 ) { |
| 278 | + |
| 279 | + // Apply MediaPipe's metric pose matrix directly. |
| 280 | + faceContainer.matrix.fromArray( results.facialTransformationMatrixes[ 0 ].data ); |
| 281 | + faceContainer.matrixWorldNeedsUpdate = true; |
242 | 282 |
|
243 | 283 | } |
244 | 284 |
|
245 | 285 | if ( results.faceBlendshapes.length > 0 ) { |
246 | | - |
| 286 | + |
247 | 287 | const faceBlendshapes = results.faceBlendshapes[ 0 ].categories; |
248 | | - |
| 288 | + |
249 | 289 | // Morph values does not exist on the eye meshes, so we map the eyes blendshape score into rotation values |
250 | 290 | const eyeScore = { |
251 | 291 | leftHorizontal: 0, |
252 | 292 | rightHorizontal: 0, |
253 | 293 | leftVertical: 0, |
254 | 294 | rightVertical: 0, |
255 | | - }; |
| 295 | + }; |
256 | 296 |
|
257 | 297 | for ( const blendshape of faceBlendshapes ) { |
258 | 298 |
|
|
304 | 344 | eyeR.rotation.z = eyeScore.rightHorizontal * eyeRotationLimit; |
305 | 345 | eyeL.rotation.x = eyeScore.leftVertical * eyeRotationLimit; |
306 | 346 | eyeR.rotation.x = eyeScore.rightVertical * eyeRotationLimit; |
307 | | - |
| 347 | + |
308 | 348 | } |
309 | 349 |
|
310 | 350 | } |
311 | 351 |
|
312 | | - videomesh.scale.x = video.videoWidth / 100; |
313 | | - videomesh.scale.y = video.videoHeight / 100; |
314 | | - |
315 | 352 | renderer.render( scene, camera ); |
316 | 353 |
|
317 | | - controls.update(); |
318 | | - |
319 | 354 | } |
320 | 355 |
|
321 | | - window.addEventListener( 'resize', function () { |
| 356 | + function resize() { |
322 | 357 |
|
323 | | - camera.aspect = window.innerWidth / window.innerHeight; |
324 | | - camera.updateProjectionMatrix(); |
| 358 | + // Largest video-aspect rectangle that fits inside the window. |
| 359 | + let width = window.innerWidth; |
| 360 | + let height = window.innerHeight; |
325 | 361 |
|
326 | | - renderer.setSize( window.innerWidth, window.innerHeight ); |
| 362 | + if ( width / height > camera.aspect ) { |
327 | 363 |
|
328 | | - } ); |
| 364 | + width = height * camera.aspect; |
| 365 | + |
| 366 | + } else { |
| 367 | + |
| 368 | + height = width / camera.aspect; |
| 369 | + |
| 370 | + } |
| 371 | + |
| 372 | + renderer.setSize( width, height ); |
| 373 | + |
| 374 | + } |
| 375 | + |
| 376 | + window.addEventListener( 'resize', resize ); |
329 | 377 |
|
330 | 378 | </script> |
331 | 379 | </body> |
|
0 commit comments