Skip to content

Commit aaaf63d

Browse files
mrdoobclaude
andauthored
Examples: Improved face alignment in webgl_morphtargets_webcam (mrdoob#33690)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 7865a52 commit aaaf63d

2 files changed

Lines changed: 96 additions & 48 deletions

File tree

-6.6 KB
Loading

examples/webgl_morphtargets_webcam.html

Lines changed: 96 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313
body {
1414
background-color: #666666;
1515
}
16+
canvas {
17+
position: absolute;
18+
inset: 0;
19+
margin: auto;
20+
}
1621
</style>
1722
</head>
1823
<body>
@@ -35,8 +40,6 @@
3540

3641
import * as THREE from 'three';
3742

38-
import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
39-
4043
import { GLTFLoader } from 'three/addons/loaders/GLTFLoader.js';
4144
import { KTX2Loader } from 'three/addons/loaders/KTX2Loader.js';
4245
import { MeshoptDecoder } from 'three/addons/libs/meshopt_decoder.module.js';
@@ -45,9 +48,7 @@
4548

4649
// Mediapipe
4750

48-
import vision from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.35';
49-
50-
const { FaceLandmarker, FilesetResolver } = vision;
51+
import { FaceLandmarker, FilesetResolver } from 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.35';
5152

5253
const blendshapesMap = {
5354
// '_neutral': '',
@@ -105,6 +106,18 @@
105106
// '': 'tongueOut'
106107
};
107108

109+
// MediaPipe returns the head pose in a metric 3D space that assumes a
110+
// fixed virtual camera: right-handed, at the origin, looking down -Z, with
111+
// units in centimeters and a vertical field of view of 63 degrees. The
112+
// camera, the video plane and the model all have to share that frame for
113+
// the rendered face to register with the webcam image.
114+
115+
const MP_FOV = 63; // vertical field of view, in degrees
116+
const MP_NEAR = 1; // 1 cm
117+
const MP_FAR = 10000; // 100 m
118+
119+
const VIDEO_DISTANCE = 100; // depth of the video plane, in cm
120+
108121
//
109122

110123
const renderer = new THREE.WebGLRenderer( { antialias: true } );
@@ -113,22 +126,41 @@
113126
renderer.toneMapping = THREE.ACESFilmicToneMapping;
114127
document.body.appendChild( renderer.domElement );
115128

116-
const camera = new THREE.PerspectiveCamera( 60, window.innerWidth / window.innerHeight, 1, 100 );
117-
camera.position.z = 5;
129+
// The render camera matches MediaPipe's virtual camera: at the origin,
130+
// looking down -Z. It must not be moved, otherwise the overlay drifts. Its
131+
// aspect switches to the video's once the webcam is running.
132+
const camera = new THREE.PerspectiveCamera( MP_FOV, window.innerWidth / window.innerHeight, MP_NEAR, MP_FAR );
118133

119134
const scene = new THREE.Scene();
120135
scene.background = new THREE.Color( 0x666666 );
121-
scene.scale.x = - 1;
136+
scene.scale.x = - 1; // mirror the whole scene for a selfie view ( flips video and pose together )
122137

123138
scene.add( new THREE.AmbientLight( 0xffffff, 5 ) );
124139

125-
const controls = new OrbitControls( camera, renderer.domElement );
126-
127140
// Face
128141

129142
let face, eyeL, eyeR;
130143
const eyeRotationLimit = THREE.MathUtils.degToRad( 30 );
131144

145+
// MediaPipe's facial transformation matrix is copied here verbatim. Until
146+
// the webcam delivers one, the face rests at a default frontal pose ( in
147+
// front of the camera, in centimeters ) so it is framed before tracking.
148+
const faceContainer = new THREE.Object3D();
149+
faceContainer.matrixAutoUpdate = false;
150+
faceContainer.matrix.makeTranslation( 0, 0, - 50 );
151+
faceContainer.matrixWorldNeedsUpdate = true;
152+
scene.add( faceContainer );
153+
154+
// The Face Cap model is not MediaPipe's canonical face mesh, so this fixed
155+
// transform registers it into the canonical frame ( centimeters, +Y up,
156+
// +Z out of the face ) before the pose matrix is applied. The values are
157+
// derived from the model's eye positions.
158+
const registration = new THREE.Object3D();
159+
registration.scale.setScalar( 0.958 );
160+
registration.rotation.x = Math.PI / 2;
161+
registration.position.set( 0, 0.12, 1.18 );
162+
faceContainer.add( registration );
163+
132164
const ktx2Loader = new KTX2Loader()
133165
.detectSupport( renderer );
134166

@@ -137,18 +169,19 @@
137169
.setMeshoptDecoder( MeshoptDecoder )
138170
.load( 'models/gltf/facecap.glb', ( gltf ) => {
139171

140-
const mesh = gltf.scene.children[ 0 ];
141-
scene.add( mesh );
172+
// Reparent the head/eyes/teeth and drop the model's own scale rig.
173+
const group = gltf.scene.getObjectByName( 'grp_transform' );
174+
registration.add( group );
142175

143-
const head = mesh.getObjectByName( 'mesh_2' );
176+
const head = group.getObjectByName( 'mesh_2' );
144177
head.material = new THREE.MeshNormalMaterial();
145178

146-
const teeth = mesh.getObjectByName( 'mesh_3' );
179+
const teeth = group.getObjectByName( 'mesh_3' );
147180
teeth.material = new THREE.MeshNormalMaterial();
148181

149-
face = mesh.getObjectByName( 'mesh_2' );
150-
eyeL = mesh.getObjectByName( 'eyeLeft' );
151-
eyeR = mesh.getObjectByName( 'eyeRight' );
182+
face = head;
183+
eyeL = group.getObjectByName( 'eyeLeft' );
184+
eyeR = group.getObjectByName( 'eyeRight' );
152185

153186
// GUI
154187

@@ -177,8 +210,10 @@
177210
texture.colorSpace = THREE.SRGBColorSpace;
178211

179212
const geometry = new THREE.PlaneGeometry( 1, 1 );
180-
const material = new THREE.MeshBasicMaterial( { map: texture, depthWrite: false } );
213+
const material = new THREE.MeshBasicMaterial( { map: texture, depthTest: false, depthWrite: false } );
181214
const videomesh = new THREE.Mesh( geometry, material );
215+
videomesh.position.z = - VIDEO_DISTANCE;
216+
videomesh.renderOrder = - 1;
182217
scene.add( videomesh );
183218

184219
// MediaPipe
@@ -209,50 +244,55 @@
209244
} )
210245
.catch( function ( error ) {
211246

212-
console.error( 'Unable to access the camera/webcam.', error );
247+
console.warn( 'Unable to access the camera/webcam.', error );
213248

214249
} );
215250

216251
}
217252

218-
const transform = new THREE.Object3D();
253+
// The camera matches the video aspect; the canvas is sized to that aspect
254+
// and centered, so the grey body shows through as letterbox/pillarbox bars.
219255

220-
function animate() {
256+
video.addEventListener( 'loadedmetadata', function () {
221257

222-
if ( video.readyState >= HTMLMediaElement.HAVE_METADATA ) {
258+
const aspect = video.videoWidth / video.videoHeight;
223259

224-
const results = faceLandmarker.detectForVideo( video, Date.now() );
260+
camera.aspect = aspect;
261+
camera.updateProjectionMatrix();
225262

226-
if ( results.facialTransformationMatrixes.length > 0 ) {
263+
// Size the plane so it exactly fills the frustum at its depth.
264+
const height = 2 * VIDEO_DISTANCE * Math.tan( THREE.MathUtils.degToRad( MP_FOV / 2 ) );
265+
videomesh.scale.set( height * aspect, height, 1 );
227266

228-
const facialTransformationMatrixes = results.facialTransformationMatrixes[ 0 ].data;
267+
resize();
229268

230-
transform.matrix.fromArray( facialTransformationMatrixes );
231-
transform.matrix.decompose( transform.position, transform.quaternion, transform.scale );
269+
} );
232270

233-
const object = scene.getObjectByName( 'grp_transform' );
271+
function animate() {
234272

235-
object.position.x = transform.position.x;
236-
object.position.y = transform.position.z + 40;
237-
object.position.z = - transform.position.y;
273+
if ( video.readyState >= HTMLMediaElement.HAVE_METADATA ) {
238274

239-
object.rotation.x = transform.rotation.x;
240-
object.rotation.y = transform.rotation.z;
241-
object.rotation.z = - transform.rotation.y;
275+
const results = faceLandmarker.detectForVideo( video, Date.now() );
276+
277+
if ( results.facialTransformationMatrixes.length > 0 ) {
278+
279+
// Apply MediaPipe's metric pose matrix directly.
280+
faceContainer.matrix.fromArray( results.facialTransformationMatrixes[ 0 ].data );
281+
faceContainer.matrixWorldNeedsUpdate = true;
242282

243283
}
244284

245285
if ( results.faceBlendshapes.length > 0 ) {
246-
286+
247287
const faceBlendshapes = results.faceBlendshapes[ 0 ].categories;
248-
288+
249289
// Morph values does not exist on the eye meshes, so we map the eyes blendshape score into rotation values
250290
const eyeScore = {
251291
leftHorizontal: 0,
252292
rightHorizontal: 0,
253293
leftVertical: 0,
254294
rightVertical: 0,
255-
};
295+
};
256296

257297
for ( const blendshape of faceBlendshapes ) {
258298

@@ -304,28 +344,36 @@
304344
eyeR.rotation.z = eyeScore.rightHorizontal * eyeRotationLimit;
305345
eyeL.rotation.x = eyeScore.leftVertical * eyeRotationLimit;
306346
eyeR.rotation.x = eyeScore.rightVertical * eyeRotationLimit;
307-
347+
308348
}
309349

310350
}
311351

312-
videomesh.scale.x = video.videoWidth / 100;
313-
videomesh.scale.y = video.videoHeight / 100;
314-
315352
renderer.render( scene, camera );
316353

317-
controls.update();
318-
319354
}
320355

321-
window.addEventListener( 'resize', function () {
356+
function resize() {
322357

323-
camera.aspect = window.innerWidth / window.innerHeight;
324-
camera.updateProjectionMatrix();
358+
// Largest video-aspect rectangle that fits inside the window.
359+
let width = window.innerWidth;
360+
let height = window.innerHeight;
325361

326-
renderer.setSize( window.innerWidth, window.innerHeight );
362+
if ( width / height > camera.aspect ) {
327363

328-
} );
364+
width = height * camera.aspect;
365+
366+
} else {
367+
368+
height = width / camera.aspect;
369+
370+
}
371+
372+
renderer.setSize( width, height );
373+
374+
}
375+
376+
window.addEventListener( 'resize', resize );
329377

330378
</script>
331379
</body>

0 commit comments

Comments
 (0)