huggingface · RuixiangMa · May 29, 2026 · May 30, 2026
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
@@ -353,6 +353,8 @@
         title: JoyImageEditTransformer3DModel
       - local: api/models/latte_transformer3d
         title: LatteTransformer3DModel
+      - local: api/models/lens_transformer2d
+        title: LensTransformer2DModel
       - local: api/models/longcat_image_transformer2d
         title: LongCatImageTransformer2DModel
       - local: api/models/ltx2_video_transformer3d
@@ -553,6 +555,8 @@
         title: Kandinsky 5.0 Image
       - local: api/pipelines/kolors
         title: Kolors
+      - local: api/pipelines/lens
+        title: Lens
       - local: api/pipelines/latent_consistency_models
         title: Latent Consistency Models
       - local: api/pipelines/latent_diffusion

diff --git a/docs/source/en/api/models/lens_transformer2d.md b/docs/source/en/api/models/lens_transformer2d.md
@@ -0,0 +1,23 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# LensTransformer2DModel
+
+A Transformer model for image-like data from [Lens](https://huggingface.co/microsoft/Lens).
+
+## LensTransformer2DModel
+
+[[autodoc]] LensTransformer2DModel
+
+## LensTransformer2DModelOutput
+
+[[autodoc]] models.transformers.transformer_lens.Transformer2DModelOutput
diff --git a/docs/source/en/api/pipelines/lens.md b/docs/source/en/api/pipelines/lens.md
@@ -0,0 +1,52 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Lens
+
+<div class="flex flex-wrap space-x-1">
+</div>
+
+Lens is a 3.8B-parameter foundational text-to-image model designed for efficient training and fast high-resolution generation. It combines dense-caption pre-training, mixed-resolution learning, GPT-OSS multi-layer text features, and the FLUX.2 semantic VAE to reach competitive quality with substantially less training compute than larger T2I models. For more details, please refer to the [model card](https://huggingface.co/microsoft/Lens).
+
+The abstract from the paper is:
+
+*Lens is a 3.8B-parameter foundational text-to-image model designed for efficient training and fast high-resolution generation. It combines dense-caption pre-training, mixed-resolution learning, GPT-OSS multi-layer text features, and the FLUX.2 semantic VAE to reach competitive quality with substantially less training compute than larger T2I models.*
+
+## Usage Example
+
+```python
+import torch
+from diffusers import LensPipeline
+
+pipe = LensPipeline.from_pretrained("microsoft/Lens", torch_dtype=torch.bfloat16)
+pipe.to("cuda")
+
+image = pipe(
+    prompt="A cat holding a sign that says hello world",
+    height=1440,
+    width=1440,
+    num_inference_steps=20,
+    guidance_scale=5.0,
+).images[0]
+image.save("lens.png")
+```
+
+## LensPipeline
+
+[[autodoc]] LensPipeline
+
+- all
+- __call__
+
+## LensPipelineOutput
+
+[[autodoc]] pipelines.lens.pipeline_output.LensPipelineOutput
diff --git a/docs/source/en/api/pipelines/overview.md b/docs/source/en/api/pipelines/overview.md
@@ -50,6 +50,7 @@ The table below lists all the pipelines currently available in 🤗 Diffusers an
 | [Kandinsky 2.2](kandinsky_v22) | text2image, image2image, inpainting |
 | [Kandinsky 3](kandinsky3) | text2image, image2image |
 | [Kolors](kolors) | text2image |
+| [Lens](lens) | text2image |
 | [Latent Consistency Models](latent_consistency_models) | text2image |
 | [Latent Diffusion](latent_diffusion) | text2image, super-resolution |
 | [Latte](latte) | text2image |

diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py
@@ -260,6 +260,7 @@
             "JoyImageEditTransformer3DModel",
             "Kandinsky3UNet",
             "Kandinsky5Transformer3DModel",
+            "LensTransformer2DModel",
             "LatteTransformer3DModel",
             "LongCatAudioDiTTransformer",
             "LongCatAudioDiTVae",
@@ -621,6 +622,7 @@
             "LatentConsistencyModelImg2ImgPipeline",
             "LatentConsistencyModelPipeline",
             "LattePipeline",
+            "LensPipeline",
             "LDMTextToImagePipeline",
             "LEditsPPPipelineStableDiffusion",
             "LEditsPPPipelineStableDiffusionXL",
@@ -1096,6 +1098,7 @@
             JoyImageEditTransformer3DModel,
             Kandinsky3UNet,
             Kandinsky5Transformer3DModel,
+            LensTransformer2DModel,
             LatteTransformer3DModel,
             LongCatAudioDiTTransformer,
             LongCatAudioDiTVae,
@@ -1432,6 +1435,7 @@
             LatentConsistencyModelImg2ImgPipeline,
             LatentConsistencyModelPipeline,
             LattePipeline,
+            LensPipeline,
             LDMTextToImagePipeline,
             LEditsPPPipelineStableDiffusion,
             LEditsPPPipelineStableDiffusionXL,

diff --git a/src/diffusers/models/__init__.py b/src/diffusers/models/__init__.py
@@ -119,6 +119,7 @@
     _import_structure["transformers.transformer_hunyuanimage"] = ["HunyuanImageTransformer2DModel"]
     _import_structure["transformers.transformer_joyimage"] = ["JoyImageEditTransformer3DModel"]
     _import_structure["transformers.transformer_kandinsky"] = ["Kandinsky5Transformer3DModel"]
+    _import_structure["transformers.transformer_lens"] = ["LensTransformer2DModel"]
     _import_structure["transformers.transformer_longcat_audio_dit"] = ["LongCatAudioDiTTransformer"]
     _import_structure["transformers.transformer_longcat_image"] = ["LongCatImageTransformer2DModel"]
     _import_structure["transformers.transformer_ltx"] = ["LTXVideoTransformer3DModel"]
@@ -248,6 +249,7 @@
             HunyuanVideoTransformer3DModel,
             JoyImageEditTransformer3DModel,
             Kandinsky5Transformer3DModel,
+            LensTransformer2DModel,
             LatteTransformer3DModel,
             LongCatAudioDiTTransformer,
             LongCatImageTransformer2DModel,

diff --git a/src/diffusers/models/transformers/__init__.py b/src/diffusers/models/transformers/__init__.py
@@ -41,6 +41,7 @@
     from .transformer_hunyuanimage import HunyuanImageTransformer2DModel
     from .transformer_joyimage import JoyImageEditTransformer3DModel
     from .transformer_kandinsky import Kandinsky5Transformer3DModel
+    from .transformer_lens import LensTransformer2DModel
     from .transformer_longcat_audio_dit import LongCatAudioDiTTransformer
     from .transformer_longcat_image import LongCatImageTransformer2DModel
     from .transformer_ltx import LTXVideoTransformer3DModel