diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 5b9530be4..797133111 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -39,6 +39,7 @@ Features:
 
 - Add ``options`` parameter to ``AudioResampler`` for passing ``libswresample`` options (e.g. ``resampler``, ``filter_size``, ``cutoff``) by :gh-user:`WyattBlue` (:issue:`2262`).
 - Support ``yuv420p10le`` in ``VideoFrame.to_ndarray`` and ``VideoFrame.from_ndarray`` by :gh-user:`WyattBlue` (:issue:`1981`).
+- Add ``at`` parameter to ``Graph.push`` and ``Graph.vpush`` to push a frame to a single buffer source by index, for multi-input filters like ``overlay`` by :gh-user:`WyattBlue`.
 
 Fixes:
 
diff --git a/av/container/core.pxd b/av/container/core.pxd
index badd84abb..bd298a7a0 100644
--- a/av/container/core.pxd
+++ b/av/container/core.pxd
@@ -34,7 +34,7 @@ cdef class Container:
     cdef HWAccel hwaccel
 
     cdef readonly StreamContainer streams
-    cdef readonly dict metadata
+    cdef dict _metadata
 
     # Private API.
     cdef uint8_t _myflag  # enum: writeable, input_was_opened, started, done, extradata_planned
diff --git a/av/container/core.py b/av/container/core.py
index 8aeaa7a3e..c6cdfe7fe 100755
--- a/av/container/core.py
+++ b/av/container/core.py
@@ -414,6 +414,14 @@ def flags(self, value: cython.int):
     def input_was_opened(self):
         return self._myflag & 2
 
+    @property
+    def metadata(self) -> dict:
+        # Lazily created so output containers that never touch metadata don't
+        # allocate a dict. Input containers populate ``_metadata`` eagerly.
+        if self._metadata is None:
+            self._metadata = {}
+        return self._metadata
+
     def chapters(self):
         self._assert_open()
         result: list = []
diff --git a/av/container/input.py b/av/container/input.py
index 4881e720d..18558126c 100644
--- a/av/container/input.py
+++ b/av/container/input.py
@@ -101,7 +101,7 @@ def __cinit__(self, *args, **kwargs):
                 "Hardware accelerated decode requested but no stream is compatible"
             )
 
-        self.metadata = avdict_to_dict(
+        self._metadata = avdict_to_dict(
             self.ptr.metadata, self.metadata_encoding, self.metadata_errors
         )
 
diff --git a/av/container/output.py b/av/container/output.py
index 8c56ef080..4a9de9f73 100644
--- a/av/container/output.py
+++ b/av/container/output.py
@@ -67,7 +67,6 @@ def close_output(self: OutputContainer):
 class OutputContainer(Container):
     def __cinit__(self, *args, **kwargs):
         self.streams = StreamContainer()
-        self.metadata = {}
         self._extradata_bsfs = {}
         self._buffered_packets = []
         with cython.nogil:
diff --git a/av/filter/graph.py b/av/filter/graph.py
index a7648f967..8967cce5f 100644
--- a/av/filter/graph.py
+++ b/av/filter/graph.py
@@ -232,7 +232,7 @@ def set_audio_frame_size(self, frame_size):
                 cython.cast(FilterContext, sink).ptr, frame_size
             )
 
-    def push(self, frame):
+    def push(self, frame, at: cython.int = -1):
         if frame is None:
             contexts = self._get_context_by_type("buffer") + self._get_context_by_type(
                 "abuffer"
@@ -246,12 +246,29 @@ def push(self, frame):
                 f"can only AudioFrame, VideoFrame or None; got {type(frame)}"
             )
 
+        if at >= 0:
+            if at >= len(contexts):
+                raise IndexError(
+                    f"buffer source index {at} out of range; found {len(contexts)}"
+                )
+            contexts[at].push(frame)
+            return
+
         for ctx in contexts:
             ctx.push(frame)
 
-    def vpush(self, frame: VideoFrame | None):
+    def vpush(self, frame: VideoFrame | None, at: cython.int = -1):
         """Like `push`, but only for VideoFrames."""
-        for ctx in self._get_context_by_type("buffer"):
+        contexts = self._get_context_by_type("buffer")
+        if at >= 0:
+            if at >= len(contexts):
+                raise IndexError(
+                    f"buffer source index {at} out of range; found {len(contexts)}"
+                )
+            contexts[at].push(frame)
+            return
+
+        for ctx in contexts:
             ctx.push(frame)
 
     # TODO: Test complex filter graphs, add `at: int = 0` arg to pull() and vpull().
diff --git a/av/filter/graph.pyi b/av/filter/graph.pyi
index 758813ea2..8f3231fc2 100644
--- a/av/filter/graph.pyi
+++ b/av/filter/graph.pyi
@@ -42,7 +42,7 @@ class Graph:
         time_base: Fraction | None = None,
     ) -> FilterContext: ...
     def set_audio_frame_size(self, frame_size: int) -> None: ...
-    def push(self, frame: None | AudioFrame | VideoFrame) -> None: ...
+    def push(self, frame: None | AudioFrame | VideoFrame, at: int = -1) -> None: ...
     def pull(self) -> VideoFrame | AudioFrame: ...
-    def vpush(self, frame: VideoFrame | None) -> None: ...
+    def vpush(self, frame: VideoFrame | None, at: int = -1) -> None: ...
     def vpull(self) -> VideoFrame: ...
diff --git a/scripts/build-deps b/scripts/build-deps
index 4449f2bc3..c2602e728 100755
--- a/scripts/build-deps
+++ b/scripts/build-deps
@@ -69,7 +69,7 @@ echo ./configure
     --disable-bsfs \
     --enable-bsf=chomp,extract_extradata,h264_mp4toannexb,setts \
     --disable-filters \
-    --enable-filter=abuffer,abuffersink,aformat,aresample,atempo,buffer,buffersink,bwdif,color,loudnorm,lutrgb,palettegen,scale,testsrc,vflip,volume \
+    --enable-filter=abuffer,abuffersink,aformat,aresample,atempo,buffer,buffersink,bwdif,color,loudnorm,lutrgb,overlay,palettegen,scale,testsrc,vflip,volume \
     --enable-sse \
     --enable-avx \
     --enable-avx2 \
diff --git a/tests/test_filters.py b/tests/test_filters.py
index 51522e7de..97038828a 100644
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@@ -254,6 +254,50 @@ def test_EOF(self) -> None:
         assert palette_frame.width == 16
         assert palette_frame.height == 16
 
+    def test_push_at_index(self) -> None:
+        # overlay has two video buffer sources; `at` targets a single one,
+        # instead of broadcasting the same frame to both (like auto-editor's
+        # pushIdx/flushIdx).
+        width, height = 16, 16
+
+        base = VideoFrame(width, height, "yuv420p")
+        for plane in base.planes:
+            plane.update(bytes(plane.buffer_size))
+        base.pts = 0
+        base.time_base = Fraction(1, 30)
+
+        top = VideoFrame(width, height, "yuv420p")
+        for i, plane in enumerate(top.planes):
+            plane.update(bytes([200 if i == 0 else 128]) * plane.buffer_size)
+        top.pts = 0
+        top.time_base = Fraction(1, 30)
+
+        graph = Graph()
+        b0 = graph.add_buffer(
+            width=width, height=height, format=base.format, time_base=base.time_base
+        )
+        b1 = graph.add_buffer(
+            width=width, height=height, format=top.format, time_base=top.time_base
+        )
+        overlay = graph.add("overlay", "x=0:y=0")
+        sink = graph.add("buffersink")
+        b0.link_to(overlay, 0, 0)
+        b1.link_to(overlay, 0, 1)
+        overlay.link_to(sink)
+        graph.configure()
+
+        graph.push(base, at=0)
+        graph.push(top, at=1)
+        graph.push(None, at=0)
+        graph.push(None, at=1)
+
+        out = graph.vpull()
+        assert isinstance(out, av.VideoFrame)
+        assert (out.width, out.height) == (width, height)
+
+        with self.assertRaises(IndexError):
+            graph.push(base, at=2)
+
     def test_graph_threads(self) -> None:
         graph = Graph()
         assert graph.threads == 0