diff --git a/temporalio/bridge/Cargo.lock b/temporalio/bridge/Cargo.lock
index 85793c0f3..b86ad6b16 100644
--- a/temporalio/bridge/Cargo.lock
+++ b/temporalio/bridge/Cargo.lock
@@ -473,7 +473,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
 dependencies = [
  "libc",
- "windows-sys 0.52.0",
+ "windows-sys 0.60.2",
 ]
 
 [[package]]
@@ -1914,7 +1914,7 @@ dependencies = [
  "once_cell",
  "socket2 0.5.10",
  "tracing",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -2138,7 +2138,7 @@ dependencies = [
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys 0.52.0",
+ "windows-sys 0.60.2",
 ]
 
 [[package]]
@@ -2468,7 +2468,7 @@ dependencies = [
  "getrandom 0.3.3",
  "once_cell",
  "rustix",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
diff --git a/temporalio/contrib/google_adk_agents/_model.py b/temporalio/contrib/google_adk_agents/_model.py
index 80079433c..d5752d9a0 100644
--- a/temporalio/contrib/google_adk_agents/_model.py
+++ b/temporalio/contrib/google_adk_agents/_model.py
@@ -1,13 +1,30 @@
+import json
+import logging
 from collections.abc import AsyncGenerator
-from datetime import timedelta
+from datetime import datetime, timedelta, timezone
 
 from google.adk.models import BaseLlm, LLMRegistry
 from google.adk.models.llm_request import LlmRequest
 from google.adk.models.llm_response import LlmResponse
 
 from temporalio import activity, workflow
+from temporalio.contrib.pubsub import PubSubClient
 from temporalio.workflow import ActivityConfig
 
+logger = logging.getLogger(__name__)
+
+EVENTS_TOPIC = "events"
+
+
+def _make_event(event_type: str, **data: object) -> bytes:
+    return json.dumps(
+        {
+            "type": event_type,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "data": data,
+        }
+    ).encode()
+
 
 @activity.defn
 async def invoke_model(llm_request: LlmRequest) -> list[LlmResponse]:
@@ -35,20 +52,93 @@ async def invoke_model(llm_request: LlmRequest) -> list[LlmResponse]:
     ]
 
 
+@activity.defn
+async def invoke_model_streaming(llm_request: LlmRequest) -> list[LlmResponse]:
+    """Streaming-aware model activity.
+
+    Calls the LLM with stream=True, publishes TEXT_DELTA events via
+    PubSubClient as tokens arrive, and returns the collected responses.
+
+    The PubSubClient auto-detects the activity context to find the parent
+    workflow for publishing.
+
+    Args:
+        llm_request: The LLM request containing model name and parameters.
+
+    Returns:
+        List of LLM responses from the model.
+    """
+    if llm_request.model is None:
+        raise ValueError("No model name provided, could not create LLM.")
+
+    llm = LLMRegistry.new_llm(llm_request.model)
+    if not llm:
+        raise ValueError(f"Failed to create LLM for model: {llm_request.model}")
+
+    pubsub = PubSubClient.create(batch_interval=0.1)
+    responses: list[LlmResponse] = []
+    text_buffer = ""
+
+    async with pubsub:
+        pubsub.publish(EVENTS_TOPIC, _make_event("LLM_CALL_START"), priority=True)
+
+        async for response in llm.generate_content_async(
+            llm_request=llm_request, stream=True
+        ):
+            activity.heartbeat()
+            responses.append(response)
+
+            if response.content and response.content.parts:
+                for part in response.content.parts:
+                    if part.text:
+                        text_buffer += part.text
+                        pubsub.publish(
+                            EVENTS_TOPIC,
+                            _make_event("TEXT_DELTA", delta=part.text),
+                        )
+                    if part.function_call:
+                        pubsub.publish(
+                            EVENTS_TOPIC,
+                            _make_event(
+                                "TOOL_CALL_START",
+                                tool_name=part.function_call.name,
+                            ),
+                        )
+
+        if text_buffer:
+            pubsub.publish(
+                EVENTS_TOPIC,
+                _make_event("TEXT_COMPLETE", text=text_buffer),
+                priority=True,
+            )
+        pubsub.publish(
+            EVENTS_TOPIC, _make_event("LLM_CALL_COMPLETE"), priority=True
+        )
+
+    return responses
+
+
 class TemporalModel(BaseLlm):
     """A Temporal-based LLM model that executes model invocations as activities."""
 
     def __init__(
-        self, model_name: str, activity_config: ActivityConfig | None = None
+        self,
+        model_name: str,
+        activity_config: ActivityConfig | None = None,
+        streaming: bool = False,
     ) -> None:
         """Initialize the TemporalModel.
 
         Args:
             model_name: The name of the model to use.
             activity_config: Configuration options for the activity execution.
+            streaming: When True, the model activity uses the streaming LLM
+                endpoint and publishes token events via PubSubClient. The
+                workflow is unaffected -- it still receives complete responses.
         """
         super().__init__(model=model_name)
         self._model_name = model_name
+        self._streaming = streaming
         self._activity_config = ActivityConfig(
             start_to_close_timeout=timedelta(seconds=60)
         )
@@ -62,15 +152,23 @@ async def generate_content_async(
 
         Args:
             llm_request: The LLM request containing model parameters and content.
-            stream: Whether to stream the response (currently ignored).
+            stream: Whether to stream the response (currently ignored; use the
+                ``streaming`` constructor parameter instead).
 
         Yields:
             The responses from the model.
         """
-        responses = await workflow.execute_activity(
-            invoke_model,
-            args=[llm_request],
-            **self._activity_config,
-        )
+        if self._streaming:
+            responses = await workflow.execute_activity(
+                invoke_model_streaming,
+                args=[llm_request],
+                **self._activity_config,
+            )
+        else:
+            responses = await workflow.execute_activity(
+                invoke_model,
+                args=[llm_request],
+                **self._activity_config,
+            )
         for response in responses:
             yield response
diff --git a/temporalio/contrib/google_adk_agents/_plugin.py b/temporalio/contrib/google_adk_agents/_plugin.py
index 03cb78998..52504e78f 100644
--- a/temporalio/contrib/google_adk_agents/_plugin.py
+++ b/temporalio/contrib/google_adk_agents/_plugin.py
@@ -8,7 +8,10 @@
 
 from temporalio import workflow
 from temporalio.contrib.google_adk_agents._mcp import TemporalMcpToolSetProvider
-from temporalio.contrib.google_adk_agents._model import invoke_model
+from temporalio.contrib.google_adk_agents._model import (
+    invoke_model,
+    invoke_model_streaming,
+)
 from temporalio.contrib.pydantic import (
     PydanticPayloadConverter as _DefaultPydanticPayloadConverter,
 )
@@ -94,7 +97,7 @@ def workflow_runner(runner: WorkflowRunner | None) -> WorkflowRunner:
                 )
             return runner
 
-        new_activities = [invoke_model]
+        new_activities = [invoke_model, invoke_model_streaming]
         if toolset_providers is not None:
             for toolset_provider in toolset_providers:
                 new_activities.extend(toolset_provider._get_activities())
diff --git a/temporalio/contrib/openai_agents/_invoke_model_activity.py b/temporalio/contrib/openai_agents/_invoke_model_activity.py
index 945a05ec6..c29ef2dc9 100644
--- a/temporalio/contrib/openai_agents/_invoke_model_activity.py
+++ b/temporalio/contrib/openai_agents/_invoke_model_activity.py
@@ -4,8 +4,10 @@
 """
 
 import enum
+import json
+import logging
 from dataclasses import dataclass
-from datetime import timedelta
+from datetime import datetime, timedelta, timezone
 from typing import Any
 
 from agents import (
@@ -24,6 +26,7 @@
     RunContextWrapper,
     Tool,
     TResponseInputItem,
+    Usage,
     UserError,
     WebSearchTool,
 )
@@ -31,13 +34,29 @@
     APIStatusError,
     AsyncOpenAI,
 )
+from openai.types.responses import ResponseCompletedEvent
 from openai.types.responses.tool_param import Mcp
 from typing_extensions import Required, TypedDict
 
 from temporalio import activity
 from temporalio.contrib.openai_agents._heartbeat_decorator import _auto_heartbeater
+from temporalio.contrib.pubsub import PubSubClient
 from temporalio.exceptions import ApplicationError
 
+logger = logging.getLogger(__name__)
+
+EVENTS_TOPIC = "events"
+
+
+def _make_event(event_type: str, **data: object) -> bytes:
+    return json.dumps(
+        {
+            "type": event_type,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "data": data,
+        }
+    ).encode()
+
 
 @dataclass
 class HandoffInput:
@@ -263,3 +282,201 @@ def make_tool(tool: ToolInput) -> Tool:
                 non_retryable=True,
                 next_retry_delay=retry_after,
             ) from e
+
+    @activity.defn
+    @_auto_heartbeater
+    async def invoke_model_activity_streaming(
+        self, input: ActivityModelInput
+    ) -> ModelResponse:
+        """Streaming-aware model activity.
+
+        Calls model.stream_response(), publishes token events via PubSubClient,
+        and returns the complete ModelResponse constructed from the
+        ResponseCompletedEvent at the end of the stream.
+        """
+        model = self._model_provider.get_model(input.get("model_name"))
+
+        async def empty_on_invoke_tool(
+            _ctx: RunContextWrapper[Any], _input: str
+        ) -> str:
+            return ""
+
+        async def empty_on_invoke_handoff(
+            _ctx: RunContextWrapper[Any], _input: str
+        ) -> Any:
+            return None
+
+        def make_tool(tool: ToolInput) -> Tool:
+            if isinstance(
+                tool,
+                (
+                    FileSearchTool,
+                    WebSearchTool,
+                    ImageGenerationTool,
+                    CodeInterpreterTool,
+                ),
+            ):
+                return tool
+            elif isinstance(tool, HostedMCPToolInput):
+                return HostedMCPTool(tool_config=tool.tool_config)
+            elif isinstance(tool, FunctionToolInput):
+                return FunctionTool(
+                    name=tool.name,
+                    description=tool.description,
+                    params_json_schema=tool.params_json_schema,
+                    on_invoke_tool=empty_on_invoke_tool,
+                    strict_json_schema=tool.strict_json_schema,
+                )
+            else:
+                raise UserError(f"Unknown tool type: {tool.name}")  # type:ignore[reportUnreachable]
+
+        tools = [make_tool(x) for x in input.get("tools", [])]
+        handoffs: list[Handoff[Any, Any]] = [
+            Handoff(
+                tool_name=x.tool_name,
+                tool_description=x.tool_description,
+                input_json_schema=x.input_json_schema,
+                agent_name=x.agent_name,
+                strict_json_schema=x.strict_json_schema,
+                on_invoke_handoff=empty_on_invoke_handoff,
+            )
+            for x in input.get("handoffs", [])
+        ]
+
+        pubsub = PubSubClient.create(batch_interval=0.1)
+        final_response = None
+        text_buffer = ""
+        thinking_buffer = ""
+        thinking_active = False
+
+        try:
+            async with pubsub:
+                pubsub.publish(
+                    EVENTS_TOPIC, _make_event("LLM_CALL_START"), priority=True
+                )
+
+                async for event in model.stream_response(
+                    system_instructions=input.get("system_instructions"),
+                    input=input["input"],
+                    model_settings=input["model_settings"],
+                    tools=tools,
+                    output_schema=input.get("output_schema"),
+                    handoffs=handoffs,
+                    tracing=ModelTracing(input["tracing"]),
+                    previous_response_id=input.get("previous_response_id"),
+                    conversation_id=input.get("conversation_id"),
+                    prompt=input.get("prompt"),
+                ):
+                    activity.heartbeat()
+                    etype = getattr(event, "type", None)
+
+                    if etype == "response.output_text.delta":
+                        text_buffer += event.delta
+                        pubsub.publish(
+                            EVENTS_TOPIC,
+                            _make_event("TEXT_DELTA", delta=event.delta),
+                        )
+                    elif etype == "response.reasoning_summary_text.delta":
+                        if not thinking_active:
+                            thinking_active = True
+                            pubsub.publish(
+                                EVENTS_TOPIC, _make_event("THINKING_START")
+                            )
+                        thinking_buffer += event.delta
+                        pubsub.publish(
+                            EVENTS_TOPIC,
+                            _make_event("THINKING_DELTA", delta=event.delta),
+                        )
+                    elif etype == "response.reasoning_summary_text.done":
+                        if thinking_active:
+                            pubsub.publish(
+                                EVENTS_TOPIC,
+                                _make_event(
+                                    "THINKING_COMPLETE",
+                                    content=thinking_buffer,
+                                ),
+                                priority=True,
+                            )
+                            thinking_buffer = ""
+                            thinking_active = False
+                    elif etype == "response.output_item.added":
+                        item = event.item
+                        if getattr(item, "type", None) == "function_call":
+                            pubsub.publish(
+                                EVENTS_TOPIC,
+                                _make_event(
+                                    "TOOL_CALL_START", tool_name=item.name
+                                ),
+                            )
+                    elif isinstance(event, ResponseCompletedEvent):
+                        final_response = event.response
+
+                if text_buffer:
+                    pubsub.publish(
+                        EVENTS_TOPIC,
+                        _make_event("TEXT_COMPLETE", text=text_buffer),
+                        priority=True,
+                    )
+                pubsub.publish(
+                    EVENTS_TOPIC,
+                    _make_event("LLM_CALL_COMPLETE"),
+                    priority=True,
+                )
+
+        except APIStatusError as e:
+            retry_after = None
+            retry_after_ms_header = e.response.headers.get("retry-after-ms")
+            if retry_after_ms_header is not None:
+                retry_after = timedelta(milliseconds=float(retry_after_ms_header))
+
+            if retry_after is None:
+                retry_after_header = e.response.headers.get("retry-after")
+                if retry_after_header is not None:
+                    retry_after = timedelta(seconds=float(retry_after_header))
+
+            should_retry_header = e.response.headers.get("x-should-retry")
+            if should_retry_header == "true":
+                raise e
+            if should_retry_header == "false":
+                raise ApplicationError(
+                    "Non retryable OpenAI error",
+                    non_retryable=True,
+                    next_retry_delay=retry_after,
+                ) from e
+
+            if (
+                e.response.status_code in [408, 409, 429]
+                or e.response.status_code >= 500
+            ):
+                raise ApplicationError(
+                    f"Retryable OpenAI status code: {e.response.status_code}",
+                    non_retryable=False,
+                    next_retry_delay=retry_after,
+                ) from e
+
+            raise ApplicationError(
+                f"Non retryable OpenAI status code: {e.response.status_code}",
+                non_retryable=True,
+                next_retry_delay=retry_after,
+            ) from e
+
+        if final_response is None:
+            raise ApplicationError(
+                "Stream ended without ResponseCompletedEvent",
+                non_retryable=True,
+            )
+
+        usage = Usage(
+            requests=1,
+            input_tokens=final_response.usage.input_tokens
+            if final_response.usage
+            else 0,
+            output_tokens=final_response.usage.output_tokens
+            if final_response.usage
+            else 0,
+        )
+        return ModelResponse(
+            output=final_response.output,
+            usage=usage,
+            response_id=final_response.id,
+        )
diff --git a/temporalio/contrib/openai_agents/_model_parameters.py b/temporalio/contrib/openai_agents/_model_parameters.py
index 55827e0d5..d5b757a4e 100644
--- a/temporalio/contrib/openai_agents/_model_parameters.py
+++ b/temporalio/contrib/openai_agents/_model_parameters.py
@@ -68,3 +68,9 @@ class ModelActivityParameters:
 
     use_local_activity: bool = False
     """Whether to use a local activity. If changed during a workflow execution, that would break determinism."""
+
+    enable_streaming: bool = False
+    """When True, the model activity uses the streaming LLM endpoint and
+    publishes token events via PubSubClient. The workflow is unaffected --
+    it still receives a complete ModelResponse. Incompatible with
+    use_local_activity (local activities do not support heartbeats)."""
diff --git a/temporalio/contrib/openai_agents/_temporal_model_stub.py b/temporalio/contrib/openai_agents/_temporal_model_stub.py
index f55821309..adacd9ecb 100644
--- a/temporalio/contrib/openai_agents/_temporal_model_stub.py
+++ b/temporalio/contrib/openai_agents/_temporal_model_stub.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import logging
+from datetime import timedelta
 
 from temporalio import workflow
 from temporalio.contrib.openai_agents._model_parameters import ModelActivityParameters
@@ -154,7 +155,28 @@ def make_tool_info(tool: Tool) -> ToolInput:
         else:
             summary = None
 
-        if self.model_params.use_local_activity:
+        if self.model_params.enable_streaming:
+            if self.model_params.use_local_activity:
+                raise ValueError(
+                    "Streaming is incompatible with local activities "
+                    "(local activities do not support heartbeats)."
+                )
+            return await workflow.execute_activity_method(
+                ModelActivity.invoke_model_activity_streaming,
+                activity_input,
+                summary=summary,
+                task_queue=self.model_params.task_queue,
+                schedule_to_close_timeout=self.model_params.schedule_to_close_timeout,
+                schedule_to_start_timeout=self.model_params.schedule_to_start_timeout,
+                start_to_close_timeout=self.model_params.start_to_close_timeout,
+                heartbeat_timeout=self.model_params.heartbeat_timeout
+                or timedelta(seconds=30),
+                retry_policy=self.model_params.retry_policy,
+                cancellation_type=self.model_params.cancellation_type,
+                versioning_intent=self.model_params.versioning_intent,
+                priority=self.model_params.priority,
+            )
+        elif self.model_params.use_local_activity:
             return await workflow.execute_local_activity_method(
                 ModelActivity.invoke_model_activity,
                 activity_input,
diff --git a/temporalio/contrib/openai_agents/_temporal_openai_agents.py b/temporalio/contrib/openai_agents/_temporal_openai_agents.py
index 39168d0fd..b35853781 100644
--- a/temporalio/contrib/openai_agents/_temporal_openai_agents.py
+++ b/temporalio/contrib/openai_agents/_temporal_openai_agents.py
@@ -195,7 +195,11 @@ def add_activities(
             if not register_activities:
                 return activities or []
 
-            new_activities = [ModelActivity(model_provider).invoke_model_activity]
+            model_activity = ModelActivity(model_provider)
+            new_activities = [
+                model_activity.invoke_model_activity,
+                model_activity.invoke_model_activity_streaming,
+            ]
 
             server_names = [server.name for server in mcp_server_providers]
             if len(server_names) != len(set(server_names)):
diff --git a/temporalio/contrib/pubsub/DESIGN-ADDENDUM-CAN.md b/temporalio/contrib/pubsub/DESIGN-ADDENDUM-CAN.md
new file mode 100644
index 000000000..55650db56
--- /dev/null
+++ b/temporalio/contrib/pubsub/DESIGN-ADDENDUM-CAN.md
@@ -0,0 +1,272 @@
+# Continue-As-New Addendum
+
+Addendum to [DESIGN.md](./DESIGN.md). Addresses the continue-as-new (CAN) gap
+identified in section 10 ("Event retention").
+
+## Problem
+
+The pub/sub mixin accumulates workflow history through two channels:
+
+1. **Signals** — each `__pubsub_publish` signal adds a `WorkflowSignaled` event
+   plus the serialized `PublishInput` payload.
+2. **Updates** — each `__pubsub_poll` response serializes the returned
+   `PollResult` (including all matched items) into the history as an update
+   completion event.
+
+Over a streaming agent session, a subscriber polling every few seconds
+accumulates many update-completion events, each containing a slice of the log.
+These are redundant copies of data already held in `_pubsub_log`. The history
+grows toward the ~50K event warning threshold, at which point Temporal forces
+termination.
+
+Continue-as-new resets the history. By serializing the full log into the CAN
+input, we carry a single canonical copy forward and discard all the redundant
+history entries from prior signals, updates, and queries.
+
+## Design
+
+### `PubSubState` type
+
+New dataclass in `_types.py`:
+
+```python
+@dataclass
+class PubSubState:
+    """Serializable snapshot of pub/sub state for continue-as-new."""
+    log: list[PubSubItem] = field(default_factory=list)
+```
+
+The offset counter is not stored — it is derived as `len(log)`. This avoids
+any possibility of the counter and log diverging.
+
+Exported from `__init__.py`.
+
+### Mixin changes
+
+New and modified methods on `PubSubMixin`:
+
+```python
+def init_pubsub(self, prior_state: PubSubState | None = None) -> None:
+    """Initialize pub/sub state.
+
+    Args:
+        prior_state: State from a previous run (via get_pubsub_state()).
+                     Pass None on the first run.
+    """
+    if prior_state is not None:
+        self._pubsub_log = list(prior_state.log)
+    else:
+        self._pubsub_log = []
+    self._pubsub_draining = False
+
+def get_pubsub_state(self) -> PubSubState:
+    """Return a serializable snapshot of pub/sub state.
+
+    Call this when building your continue-as-new arguments.
+    """
+    return PubSubState(log=list(self._pubsub_log))
+```
+
+The mixin does **not** trigger CAN itself. The parent workflow decides when to
+continue-as-new (typically by checking `workflow.info().is_continue_as_new_suggested()`
+at a safe point in its main loop).
+
+### Draining: `drain_pubsub()` + update validator
+
+A long-poll `__pubsub_poll` handler can block for up to 300 seconds waiting for
+new items. We cannot let that block continue-as-new indefinitely. Conversely, a
+naive drain that unblocks waiting polls but doesn't reject new ones creates a
+race: the client receives an empty result, immediately sends a new poll, the new
+poll is accepted, and `all_handlers_finished()` never stabilizes. This is
+because `await workflow.wait_condition(workflow.all_handlers_finished)` yields,
+allowing the SDK to process new events — including new update acceptances — in
+the same or subsequent workflow tasks.
+
+The solution is two mechanisms working together:
+
+1. **A drain flag** that unblocks all waiting poll handlers.
+2. **An update validator** that rejects new polls once draining is set.
+
+```python
+def drain_pubsub(self) -> None:
+    """Unblock all waiting poll handlers and reject new polls.
+
+    Call this before waiting for all_handlers_finished() and
+    continue_as_new().
+    """
+    self._pubsub_draining = True
+
+@workflow.update(name="__pubsub_poll")
+async def _pubsub_poll(self, input: PollInput) -> PollResult:
+    await workflow.wait_condition(
+        lambda: len(self._pubsub_log) > input.from_offset
+                or self._pubsub_draining,
+        timeout=input.timeout,
+    )
+    # Return whatever items are available (possibly empty if drain-only)
+    all_new = self._pubsub_log[input.from_offset:]
+    next_offset = len(self._pubsub_log)
+    if input.topics:
+        topic_set = set(input.topics)
+        filtered = [item for item in all_new if item.topic in topic_set]
+    else:
+        filtered = list(all_new)
+    return PollResult(items=filtered, next_offset=next_offset)
+
+@_pubsub_poll.validator
+def _validate_pubsub_poll(self, input: PollInput) -> None:
+    if self._pubsub_draining:
+        raise RuntimeError("Workflow is draining for continue-as-new")
+```
+
+The validator is read-only (checks a flag, raises to reject) — this satisfies
+the Temporal constraint that validators must not mutate state or block.
+
+**CAN sequence in the parent workflow:**
+
+```python
+self.drain_pubsub()
+await workflow.wait_condition(workflow.all_handlers_finished)
+workflow.continue_as_new(args=[...])
+```
+
+What happens:
+
+1. `drain_pubsub()` sets `_pubsub_draining = True`.
+2. All blocked `__pubsub_poll` handlers unblock (the `or self._pubsub_draining`
+   clause becomes true) and return their current items.
+3. The validator rejects any new `__pubsub_poll` updates — they are never
+   accepted, so no new handlers start.
+4. `all_handlers_finished()` becomes true and **stays** true.
+5. `continue_as_new()` proceeds.
+
+On the client side, the rejected poll surfaces as an error. The subscriber
+detects CAN via `describe()`, follows the chain, and resumes from the same
+offset against the new run.
+
+### Client-side CAN resilience
+
+The current `subscribe()` method catches `CancelledError` and
+`WorkflowUpdateRPCTimeoutOrCancelledError`, then stops iteration. It has no
+CAN awareness.
+
+#### New behavior
+
+`subscribe()` gains a `follow_continues` parameter (default `True`):
+
+```python
+async def subscribe(
+    self,
+    topics: list[str] | None = None,
+    from_offset: int = 0,
+    *,
+    follow_continues: bool = True,
+) -> AsyncIterator[PubSubItem]:
+```
+
+When an `execute_update` call fails and `follow_continues` is `True`, the
+client:
+
+1. Calls `describe()` on the current handle to check execution status.
+2. If the status is `CONTINUED_AS_NEW`, replaces `self._handle` with a fresh
+   handle for the same workflow ID (no pinned `run_id`), then retries the poll
+   from the same offset.
+3. If the status is anything else, re-raises the original error.
+
+```python
+async def _follow_continue_as_new(self) -> bool:
+    """Check if the workflow continued-as-new and update the handle.
+
+    Returns True if the handle was updated (caller should retry).
+    """
+    try:
+        desc = await self._handle.describe()
+    except Exception:
+        return False
+    if desc.status == WorkflowExecutionStatus.CONTINUED_AS_NEW:
+        self._handle = self._handle._client.get_workflow_handle(
+            self._handle.id
+        )
+        return True
+    return False
+```
+
+The retry succeeds because the new run's log contains all items from the
+previous run. Polling from the same offset returns the expected items.
+
+#### Why this works with `activity_pubsub_client()`
+
+`activity_pubsub_client()` creates handles via
+`activity.client().get_workflow_handle(workflow_id)` — no `run_id` pinned.
+Signals and updates already route to the current run, so activity-side
+publishing is CAN-friendly without changes.
+
+## Offset Continuity
+
+Since the full log is carried forward:
+
+- Pre-CAN: offsets `0..N-1`, `len(log) == N`.
+- Post-CAN: `init_pubsub(prior_state)` restores the same N items. New appends
+  start at offset N.
+- A subscriber at offset K (where K < N) polls the new run and gets items
+  `K..N-1` from the carried-forward log, then continues with new items.
+
+No offset remapping. No sentinel values. No coordination protocol.
+
+## Usage Example
+
+```python
+@dataclass
+class WorkflowInput:
+    # ... application fields ...
+    pubsub_state: PubSubState | None = None
+
+@workflow.defn
+class AgentWorkflow(PubSubMixin):
+    @workflow.run
+    async def run(self, input: WorkflowInput) -> None:
+        self.init_pubsub(prior_state=input.pubsub_state)
+
+        while True:
+            await workflow.wait_condition(
+                lambda: self._pending_message or self._closed
+            )
+            if self._closed:
+                return
+
+            await self._run_turn(self._pending_message)
+
+            if workflow.info().is_continue_as_new_suggested():
+                self.drain_pubsub()
+                await workflow.wait_condition(workflow.all_handlers_finished)
+                workflow.continue_as_new(args=[WorkflowInput(
+                    # ... application fields ...
+                    pubsub_state=self.get_pubsub_state(),
+                )])
+```
+
+## Edge Cases
+
+### Payload size limit
+
+The full log serialized into CAN input could approach Temporal's default 2 MB
+payload limit for very long sessions with large payloads. This is an inherent
+constraint of the full-history approach.
+
+Mitigation: the snapshot + truncate extension described in DESIGN.md section 10
+addresses this by discarding consumed entries before CAN. That extension becomes
+the natural next step if payload size becomes a problem in practice.
+
+### Signal delivery during CAN
+
+A `PubSubClient` in publish mode sending signals mid-CAN may get errors if
+its handle is pinned to the old run. The publishing side does **not**
+auto-follow CAN — the parent workflow should ensure activities complete (and
+therefore stop publishing) before triggering CAN.
+
+### Concurrent subscribers
+
+Multiple subscribers independently follow the CAN chain. Each maintains its
+own offset. Sharing a `PubSubClient` instance across concurrent `subscribe()`
+calls is safe — they all want to target the latest run, and the handle is
+effectively just a workflow ID reference.
diff --git a/temporalio/contrib/pubsub/DESIGN-ADDENDUM-DEDUP.md b/temporalio/contrib/pubsub/DESIGN-ADDENDUM-DEDUP.md
new file mode 100644
index 000000000..7c838f9b3
--- /dev/null
+++ b/temporalio/contrib/pubsub/DESIGN-ADDENDUM-DEDUP.md
@@ -0,0 +1,224 @@
+# Exactly-Once Publish Delivery — Addendum
+
+Addendum to [DESIGN.md](./DESIGN.md). Addresses the signal delivery gap: the
+original design has no deduplication, so a retry after a failed signal can
+produce duplicate entries in the log.
+
+## Problem
+
+The `PubSubClient.flush()` method sends buffered items to the workflow via a
+Temporal signal. If the signal call raises an exception (e.g., network timeout
+on the response after the server accepted the signal), the client cannot
+distinguish "signal was delivered" from "signal was not delivered." Without
+deduplication, the client must choose:
+
+- **Clear buffer before sending (swap pattern).** Items are lost if the signal
+  truly fails. At-most-once.
+- **Clear buffer after sending.** Items are re-sent on the next flush if the
+  signal was delivered but the response failed. At-least-once with silent
+  duplication.
+
+Neither is acceptable for a pub/sub log where subscribers expect exactly-once
+delivery and stable offsets.
+
+## Options Considered
+
+### Option 1: Batch UUID
+
+Each flush assigns a `uuid4` to the batch. The workflow maintains a set of seen
+batch IDs and skips duplicates.
+
+- **Pro:** Simple to implement.
+- **Con:** The seen-IDs set grows without bound. Must be carried through
+  continue-as-new or periodically pruned. Pruning requires knowing which IDs
+  can never be retried — which is unknowable without additional protocol.
+
+### Option 2: Offset-based dedup
+
+The publisher includes the expected log offset in the signal. The workflow
+rejects if items at that offset already exist.
+
+- **Pro:** No additional state — dedup is implicit in the log structure.
+- **Con:** The publisher does not know the current log offset. It would need to
+  query first, introducing a read-before-write round-trip and a race between
+  the query and the signal. Multiple concurrent publishers would conflict.
+
+### Option 3: Publisher ID + sequence number
+
+Each `PubSubClient` generates a UUID on creation (the publisher ID). Each flush
+increments a monotonic sequence counter. The signal payload includes
+`(publisher_id, sequence)`. The workflow tracks the highest seen sequence per
+publisher and rejects any signal with a sequence ≤ the recorded value.
+
+- **Pro:** Dedup state is `dict[str, int]` — bounded by the number of
+  publishers (typically 1–2), not the number of flushes. The workflow can
+  detect gaps (missing sequence numbers) as a diagnostic signal. Naturally
+  survives continue-as-new if carried in state. No unbounded set. No
+  read-before-write round-trip.
+- **Con:** Requires the publisher to maintain a sequence counter (trivial) and
+  the workflow to carry `publisher_sequences` through CAN (small dict).
+
+### Option 4: Temporal idempotency keys
+
+Temporal does not currently provide built-in signal deduplication or idempotency
+keys for signals. This option is not available.
+
+## Design Decision: Publisher ID + sequence number (Option 3)
+
+Option 3 is adopted. The dedup state is minimal, bounded, and self-cleaning
+(old publishers' entries can be removed after a timeout or on CAN). It aligns
+with how Kafka producers achieve exactly-once: each producer has an ID and a
+monotonic sequence, and the broker deduplicates on the pair.
+
+## Wire Changes
+
+### `PublishInput`
+
+```python
+@dataclass
+class PublishInput:
+    items: list[PublishEntry] = field(default_factory=list)
+    publisher_id: str = ""
+    sequence: int = 0
+```
+
+Both fields default to empty/zero for backward compatibility. If `publisher_id`
+is empty, the workflow skips deduplication (legacy behavior).
+
+### `PubSubClient` changes
+
+```python
+class PubSubClient:
+    def __init__(self, handle, ...):
+        ...
+        self._publisher_id: str = uuid.uuid4().hex
+        self._sequence: int = 0
+
+    async def flush(self) -> None:
+        async with self._flush_lock:
+            if self._buffer:
+                self._sequence += 1
+                batch = self._buffer
+                self._buffer = []
+                try:
+                    await self._handle.signal(
+                        "__pubsub_publish",
+                        PublishInput(
+                            items=batch,
+                            publisher_id=self._publisher_id,
+                            sequence=self._sequence,
+                        ),
+                    )
+                except Exception:
+                    # Restore items for retry. Sequence number is already
+                    # incremented — the next attempt uses the same sequence,
+                    # so the workflow deduplicates if the first signal was
+                    # actually delivered.
+                    self._sequence -= 1
+                    self._buffer = batch + self._buffer
+                    raise
+```
+
+Key behaviors:
+
+- **Buffer swap before send.** Items are moved out of the buffer before the
+  signal await. New `publish()` calls during the await write to the fresh
+  buffer and are not affected by a retry.
+- **Sequence advances on failure.** If the signal raises, the sequence counter
+  is NOT decremented. The failed batch is restored to the buffer, but the next
+  flush uses a new sequence number. This prevents data loss: if the original
+  signal was delivered but the client saw an error, items published during the
+  failed await would be merged into the retry batch. With the old sequence,
+  the workflow would deduplicate the entire merged batch, silently dropping
+  the newly-published items. With a new sequence, the retry is treated as a
+  fresh batch. The tradeoff is that the original items may be delivered twice
+  (at-least-once), but the workflow-side dedup catches the common case where
+  the batch is retried unchanged.
+- **Lock for coalescing.** An `asyncio.Lock` serializes flushes. Multiple
+  concurrent `flush()` callers queue on the lock; by the time each enters,
+  later items have accumulated. This naturally coalesces N flush calls into
+  fewer signals.
+
+## Workflow Changes
+
+### Signal handler
+
+```python
+@workflow.signal(name="__pubsub_publish")
+def _pubsub_publish(self, input: PublishInput) -> None:
+    self._check_initialized()
+    if input.publisher_id:
+        last_seq = self._publisher_sequences.get(input.publisher_id, 0)
+        if input.sequence <= last_seq:
+            return  # duplicate — skip
+        self._publisher_sequences[input.publisher_id] = input.sequence
+    for entry in input.items:
+        self._pubsub_log.append(PubSubItem(topic=entry.topic, data=entry.data))
+```
+
+If `publisher_id` is empty (legacy or workflow-internal publish), dedup is
+skipped. Otherwise, the workflow compares the incoming sequence against the
+highest seen for that publisher. If it's ≤, the entire batch is dropped as a
+duplicate.
+
+### Internal state
+
+```python
+self._publisher_sequences: dict[str, int] = {}
+```
+
+Initialized in `init_pubsub()` from `PubSubState.publisher_sequences`.
+
+## Continue-as-New State
+
+`PubSubState` gains a `publisher_sequences` field:
+
+```python
+@dataclass
+class PubSubState:
+    log: list[PubSubItem] = field(default_factory=list)
+    base_offset: int = 0
+    publisher_sequences: dict[str, int] = field(default_factory=dict)
+```
+
+This is carried through CAN so that dedup survives across runs. The dict is
+small — one entry per publisher that has ever sent to this workflow, typically
+1–2 entries.
+
+### Cleanup on CAN
+
+Stale publisher entries (from publishers that are no longer active) accumulate
+but are harmless — they're just `str: int` pairs. If cleanup is desired, the
+workflow can remove entries for publishers that haven't sent in N runs, but this
+is not required for correctness.
+
+## Sequence Gap Detection
+
+If the workflow receives sequence N+2 without seeing N+1, it indicates a lost
+signal. The current design does **not** act on this — it processes the batch
+normally and records the new high-water mark. Gaps are expected to be rare
+(they require a signal to be truly lost, not just slow), and the publisher will
+retry with the same sequence if it didn't get an ack.
+
+A future extension could log a warning on gap detection for observability.
+
+## Properties
+
+- **Exactly-once delivery.** Each `(publisher_id, sequence)` pair is processed
+  at most once. Combined with at-least-once retry on the client, this achieves
+  exactly-once.
+- **Bounded dedup state.** One `int` per publisher. Does not grow with the
+  number of flushes.
+- **No read-before-write.** The publisher does not need to query the workflow
+  before sending.
+- **Backward compatible.** Empty `publisher_id` disables dedup. Existing code
+  without the field works as before.
+- **CAN-safe.** Publisher sequences survive continue-as-new in `PubSubState`.
+
+## Relationship to Other Addenda
+
+- [Continue-as-new addendum](./DESIGN-ADDENDUM-CAN.md): `PubSubState` shape
+  updated with `publisher_sequences`. Drain/validator mechanics unaffected.
+- [Topic offsets addendum](./DESIGN-ADDENDUM-TOPICS.md): Unaffected. Dedup
+  operates on the publish path; offsets and cursors operate on the subscribe
+  path.
diff --git a/temporalio/contrib/pubsub/DESIGN-ADDENDUM-ITEM-OFFSET.md b/temporalio/contrib/pubsub/DESIGN-ADDENDUM-ITEM-OFFSET.md
new file mode 100644
index 000000000..5cb992cea
--- /dev/null
+++ b/temporalio/contrib/pubsub/DESIGN-ADDENDUM-ITEM-OFFSET.md
@@ -0,0 +1,175 @@
+# Per-Item Offsets — Addendum
+
+Addendum to [DESIGN-ADDENDUM-TOPICS.md](./DESIGN-ADDENDUM-TOPICS.md). Revisits
+the decision that `PubSubItem` does not carry an offset, based on experience
+with the voice-terminal agent where the subscriber needs to track consumption
+progress at item granularity.
+
+## Problem
+
+The voice-terminal agent streams TTS audio chunks through the pub/sub log.
+Audio chunks are large (~50-100KB base64 each) and must not be truncated
+from the workflow log until they have been **played** by the client, not merely
+**received**.
+
+The current API exposes offsets only at poll-batch granularity via
+`PollResult.next_offset`. The subscriber cannot determine which global offset
+corresponds to a specific item within the batch. This makes it impossible to
+report fine-grained consumption progress back to the workflow for truncation.
+
+### Why batch-level offsets are insufficient
+
+The subscriber's consumption model has two stages:
+
+1. **Receive**: items are yielded by `subscribe()` and buffered locally
+   (e.g., audio enqueued into a playback buffer).
+2. **Consume**: the local consumer finishes processing the item (e.g., the
+   speaker finishes playing the audio).
+
+The subscriber needs to signal the workflow: "I have consumed through offset N,
+you may truncate up to N." This requires knowing the offset of each item, not
+just the offset at the end of a poll batch.
+
+Without per-item offsets, the subscriber can only report the batch boundary.
+If the subscriber crashes after receiving a batch but before consuming all
+items, truncation based on the batch boundary discards unconsumed items.
+
+### Why this matters for continue-as-new
+
+Before continue-as-new, the workflow must serialize the pub/sub log into the
+workflow input. Audio chunks make the log large (observed 3.6MB, exceeding
+Temporal's payload size limit). The workflow needs to truncate consumed items
+before serialization, but can only safely truncate items the subscriber has
+actually consumed — which requires per-item offset tracking.
+
+### Workaround: count items from `from_offset`
+
+When the subscriber requests all topics (no filtering), items map 1:1 to
+consecutive global offsets. The subscriber can compute `from_offset + i` for
+each item. This works for the voice-terminal (which subscribes to all topics)
+but is fragile — it breaks silently if topic filtering is introduced or if a
+third topic is added to the workflow without updating the subscription.
+
+## Proposed Change
+
+Add an `offset` field to `PubSubItem` and `_WireItem`, populated by the poll
+handler from the item's position in the log. No new storage in the workflow —
+the offset is computed at poll time.
+
+### Wire types (revised)
+
+```python
+@dataclass
+class PubSubItem:
+    topic: str
+    data: bytes
+    offset: int = 0
+
+@dataclass
+class _WireItem:
+    topic: str
+    data: str  # base64-encoded bytes
+    offset: int = 0
+```
+
+### Poll handler change
+
+The poll handler already iterates the log slice. It annotates each item with
+its global offset before returning:
+
+```python
+all_new = self._pubsub_log[log_offset:]
+next_offset = self._pubsub_base_offset + len(self._pubsub_log)
+if input.topics:
+    topic_set = set(input.topics)
+    filtered = [
+        (self._pubsub_base_offset + log_offset + i, item)
+        for i, item in enumerate(all_new)
+        if item.topic in topic_set
+    ]
+else:
+    filtered = [
+        (self._pubsub_base_offset + log_offset + i, item)
+        for i, item in enumerate(all_new)
+    ]
+return PollResult(
+    items=[
+        _WireItem(topic=item.topic, data=encode_data(item.data), offset=off)
+        for off, item in filtered
+    ],
+    next_offset=next_offset,
+)
+```
+
+### `subscribe()` change
+
+The client passes the offset through to the yielded `PubSubItem`:
+
+```python
+for wire_item in result.items:
+    yield PubSubItem(
+        topic=wire_item.topic,
+        data=decode_data(wire_item.data),
+        offset=wire_item.offset,
+    )
+```
+
+### Backward compatibility
+
+The `offset` field defaults to `0` on both `PubSubItem` and `_WireItem`.
+Existing subscribers that don't use the field are unaffected. Workflows
+running old code that don't populate the field will return `0` for all items —
+subscribers must treat `offset=0` as "unknown" if they depend on it.
+
+## Subscriber consumption tracking pattern
+
+With per-item offsets, the voice-terminal client can track played-through
+progress:
+
+```python
+played_offset = from_offset
+
+async for item in pubsub.subscribe(from_offset=from_offset):
+    if item.topic == AUDIO_TOPIC:
+        player.enqueue(pcm, offset=item.offset)
+    elif item.topic == EVENTS_TOPIC:
+        # Events are consumed immediately on receipt
+        played_offset = item.offset + 1
+        if event_type == "TURN_COMPLETE":
+            break
+
+# After playback finishes, update played_offset from the player
+played_offset = player.last_played_offset
+
+# Signal the workflow to truncate consumed items
+await handle.signal(workflow.truncate, played_offset)
+```
+
+The workflow truncates only up to `played_offset`, preserving any items the
+subscriber has received but not yet consumed. Before continue-as-new, the
+workflow truncates to the last acked offset rather than the log tail.
+
+## Properties
+
+- **No new workflow state.** Offsets are computed at poll time from
+  `base_offset` and the item's position in the log.
+- **Backward compatible.** Default `offset=0` means existing code is
+  unaffected.
+- **Enables safe truncation.** Subscribers can report exactly which items
+  they have consumed, not just which batches they have received.
+- **Works with topic filtering.** Per-item offsets are correct regardless of
+  which topics the subscriber requests.
+
+## Relationship to existing design
+
+The [DESIGN-ADDENDUM-TOPICS.md](./DESIGN-ADDENDUM-TOPICS.md) states:
+
+> `PubSubItem` does not carry an offset. The global offset is an internal
+> detail exposed only through `PollResult.next_offset` and the `get_offset()`
+> query.
+
+This addendum revises that decision. The global offset is no longer purely
+internal — it is exposed per-item to enable consumption tracking. The offset
+model (global, monotonic, single log) is unchanged. The BFF containment
+strategy for end-client leakage is also unchanged — the BFF still assigns its
+own SSE event IDs.
diff --git a/temporalio/contrib/pubsub/DESIGN-ADDENDUM-TOPICS.md b/temporalio/contrib/pubsub/DESIGN-ADDENDUM-TOPICS.md
new file mode 100644
index 000000000..a99bf91d4
--- /dev/null
+++ b/temporalio/contrib/pubsub/DESIGN-ADDENDUM-TOPICS.md
@@ -0,0 +1,272 @@
+# Topic Offsets and Cursor Design — Addendum
+
+Addendum to [DESIGN.md](./DESIGN.md). Revises section 3 ("Global monotonic
+offsets, not per-topic") after evaluating per-topic offset models. Concludes
+that global offsets are the right choice for workflow-scoped pub/sub, with
+information leakage addressed at the BFF layer rather than the pub/sub API.
+
+## Problem
+
+The original design assigns every log entry a global monotonic offset regardless
+of topic. A single-topic subscriber sees gaps in offset numbers — e.g., offsets
+0, 3, 7, 12. These gaps leak information about activity on other topics. A
+subscriber to `"events"` can infer the volume of traffic on `"thinking"` or
+`"status"` from the size of the gaps, even though it has no direct access to
+those topics.
+
+This is an information leakage concern, not a correctness bug.
+
+## Industry Survey
+
+We surveyed offset/cursor models across major pub/sub and streaming systems to
+inform the design.
+
+| System | Cursor Scope | Unified Multi-Topic Cursor? |
+|---|---|---|
+| Kafka | Per-partition offset (int64) | No — separate offset per partition per topic |
+| Redis Streams | Per-stream entry ID (timestamp-seq) | No — separate ID per stream |
+| NATS JetStream | Per-stream sequence (uint64) | Yes — one stream captures multiple subjects |
+| PubNub | Per-channel timetoken (nanosecond timestamp) | Yes — single timestamp spans channels |
+| Google Pub/Sub | Per-subscription ack set | No |
+| RabbitMQ Streams | Per-stream offset (uint64) | No |
+| Amazon SQS/SNS | Ack-and-delete (no offset) | No |
+
+**Key finding:** No major system provides a true global offset across
+independent topics. The two that offer unified multi-topic cursors do it
+differently:
+
+- **NATS JetStream** defines a single stream that captures messages from
+  multiple subjects (via wildcards). The stream has one sequence counter.
+  Interleaving happens at write time. This is closest to our design.
+
+- **PubNub** uses a wall-clock nanosecond timestamp as the cursor, so a single
+  timetoken naturally spans channels. The tradeoff is timestamp-based ordering
+  rather than sequence-based.
+
+Every other system requires the consumer to maintain independent cursors per
+topic/partition/stream.
+
+## Options Considered
+
+### Option A: Per-topic item count as cursor
+
+The subscriber's cursor represents "I've seen N items matching my filter." The
+workflow translates that back to a global log position internally.
+
+- **Pro:** Zero information leakage. Total ordering preserved internally.
+- **Con:** Resume requires translating per-topic offset → global log position.
+  Either O(n) scan on every poll, or a per-topic index that adds state to
+  manage through continue-as-new. Also, the cursor is coupled to the topic
+  filter — a cursor from `subscribe(["events"])` is meaningless if you later
+  call `subscribe(["events", "status"])`.
+
+### Option B: Opaque cursor wrapping the global offset
+
+Cursor is typed as `str`, documented as opaque. Internally contains the global
+offset.
+
+- **Pro:** Zero internal complexity. O(1) resume. Cursor works regardless of
+  topic filter changes.
+- **Con:** Information leakage remains observable to anyone who inspects cursor
+  values across polls. "Opaque" is a social contract, not a technical one.
+  Gaps in the underlying numbers are still visible.
+
+### Option C: Encrypted/HMAC'd global offset
+
+Same as B but cryptographically opaque.
+
+- **Pro:** Leakage is technically unobservable.
+- **Con:** Requires a stable key across continue-as-new. Introduces crypto into
+  workflow code (determinism concerns). Complexity disproportionate to the
+  threat model — the subscriber already has access to its own data.
+
+### Option D: Per-topic offsets everywhere
+
+Separate log per topic. Each topic has its own 0-based sequence.
+
+- **Pro:** No leakage by construction. Simplest mental model per topic.
+- **Con:** Loses total cross-topic ordering. Multi-topic subscription requires
+  merging N streams with no defined interleaving. More internal state. More
+  complex continue-as-new serialization.
+
+### Option E: Accept the leakage
+
+Keep global offsets exposed as-is (original design).
+
+- **Pro:** Simplest implementation. Offset = list index.
+- **Con:** The information leakage identified above.
+
+### Option F: Per-topic offsets with cursor hints
+
+Per-topic offsets on the wire, single global log internally, opaque cursors
+carrying a global position hint for efficient resume.
+
+- **Pro:** Zero information leakage. Global insertion order preserved. Efficient
+  resume via hints. Graceful degradation if hints are stale.
+- **Con:** Cursor parsing/formatting logic. `topic_counts` dict that survives
+  continue-as-new. Multi-cursor alignment algorithm. Cursors are per-topic,
+  not portable across filter changes. Complexity unjustified for expected log
+  sizes (thousands of items where a filtered slice is microseconds).
+
+### Summary
+
+| | Leakage | Ordering | Resume cost | Complexity | Cursor portability |
+|---|---|---|---|---|---|
+| A. Per-topic count | None | Preserved | O(n) or extra state | Medium | Coupled to filter |
+| B. Opaque global | Observable | Preserved | O(1) | Minimal | Filter-independent |
+| C. Encrypted global | None | Preserved | O(1) | High | Filter-independent |
+| D. Per-topic lists | None | **Lost** | O(1) | High | N/A |
+| E. Accept it | Yes | Preserved | O(1) | None | Filter-independent |
+| F. Per-topic + hints | None | Preserved | O(new items) | Medium-High | Per-topic only |
+
+## Design Decision: Global offsets with BFF-layer containment
+
+We evaluated per-topic offset models (Options A, D, F) and concluded that the
+complexity is not justified. The information leakage concern is real but is
+better addressed at the trust boundary (the BFF) than in the pub/sub API itself.
+
+### Why not per-topic offsets?
+
+The subscriber in our architecture is the BFF — trusted server-side code that
+could just as easily subscribe to all topics. The threat model for information
+leakage assumes untrusted multi-tenant subscribers (Kafka's world: separate
+consumers for separate services). That does not apply to workflow-scoped
+pub/sub, where one workflow serves one subscriber through a server-side proxy.
+
+Per-topic cursors (Option F) also sacrifice cursor portability. A global offset
+is a stream position that works regardless of which topics you filter on.
+Changing your topic filter does not invalidate your cursor. Per-topic cursors
+are coupled to the filter — you need a separate cursor per topic, and adding a
+topic to your subscription requires starting that topic from the beginning.
+
+### Why not just accept the leakage (Option E)?
+
+We accept the leakage **within the pub/sub API** (between workflow and BFF) but
+contain it there. The global offset must not leak to the end client (browser).
+The BFF is the trust boundary: it consumes global offsets from the workflow and
+presents a clean, opaque interface to the browser.
+
+### The NATS JetStream model
+
+Our design follows the NATS JetStream model: one stream, multiple subjects, one
+sequence counter. The industry survey identified this as the closest analogue,
+and we adopt it directly. Topics are labels for server-side filtering, not
+independent streams with independent cursors.
+
+### Information leakage containment at the BFF
+
+The BFF assigns its own gapless sequence numbers to SSE events using the
+standard SSE `id` field. The browser sees `id: 1`, `id: 2`, `id: 3` — no gaps,
+no global offsets, no information about other topics.
+
+On reconnect, the browser sends `Last-Event-ID` (built into the SSE spec). The
+BFF maps that back to a global offset internally and resumes the subscription.
+
+This keeps:
+- The **workflow API** simple (global offsets, single integer cursor)
+- The **browser API** clean (SSE event IDs, no workflow internals)
+- The **mapping** where it belongs (the BFF, which is the trust boundary)
+
+### Final design
+
+**Global offsets internally and on the pub/sub wire. Single append-only log.
+BFF contains the leakage by assigning SSE event IDs at the trust boundary.**
+
+### Wire types
+
+```python
+@dataclass
+class PubSubItem:
+    topic: str
+    data: bytes
+
+@dataclass
+class PollInput:
+    topics: list[str] = field(default_factory=list)
+    from_offset: int = 0
+    timeout: float = 300.0
+
+@dataclass
+class PollResult:
+    items: list[PubSubItem]
+    next_offset: int = 0
+```
+
+`PubSubItem` does not carry an offset. The global offset is an internal detail
+exposed only through `PollResult.next_offset` and the `get_offset()` query.
+
+### `get_offset()` remains public
+
+The `__pubsub_offset` query returns the current log length (next offset). This
+is essential for the "snapshot the watermark, then subscribe from there" pattern
+used by the BFF:
+
+```python
+start_offset = await pubsub.get_offset()  # capture position before starting work
+# ... start the agent turn ...
+async for item in pubsub.subscribe(topics=["events"], from_offset=start_offset):
+    yield sse_event(item)
+```
+
+### Internal state
+
+```python
+self._pubsub_log: list[PubSubItem]   # single ordered log, all topics
+self._base_offset: int = 0           # global offset of log[0]
+```
+
+The `base_offset` is 0 today. It exists to support future log truncation: when
+a prefix of the log is discarded (e.g., after continue-as-new compaction), the
+base offset advances so that global offsets remain monotonic across the
+workflow's lifetime. All log access uses `self._pubsub_log[offset - self._base_offset]`.
+If `offset < self._base_offset`, the subscriber has fallen behind the
+truncation point — this is an error.
+
+Log truncation and compaction are deferred to a future design iteration. Until
+then, the log grows without bound and `base_offset` remains 0.
+
+### Poll algorithm
+
+Given `from_offset = 4702`:
+
+1. Compute log index: `start = from_offset - self._base_offset`.
+2. If `start < 0`, the subscriber fell behind truncation — raise error.
+3. Slice: `self._pubsub_log[start:]`.
+4. Filter to requested topics (if any).
+5. Return filtered items plus `next_offset = self._base_offset + len(self._pubsub_log)`.
+
+**Efficiency:** O(new items since last poll). The global offset points directly
+to where the last poll left off. No scanning, no alignment, no cursor parsing.
+
+### Continue-as-new state
+
+```python
+@dataclass
+class PubSubState:
+    log: list[PubSubItem] = field(default_factory=list)
+    base_offset: int = 0
+```
+
+The full log is carried through continue-as-new. Truncation (discarding a
+prefix and advancing `base_offset`) is deferred to a future iteration.
+
+### Properties
+
+- **No leakage to end clients.** Global offsets stay between workflow and BFF.
+  The browser sees SSE event IDs assigned by the BFF.
+- **Global insertion order preserved.** Poll responses return items in the order
+  they were published, across all requested topics.
+- **Efficient resume.** O(new items) — the offset points directly to the
+  resume position.
+- **Cursor portability.** The global offset works regardless of topic filter.
+  Change your topic filter without invalidating your cursor.
+- **Simple internal state.** One list, one integer. No auxiliary data structures,
+  no per-topic indices, no cursor parsing.
+- **Truncation-ready.** `base_offset` supports future log prefix removal
+  without changing the offset model or the external API.
+
+## Relationship to Other Addenda
+
+The [continue-as-new addendum](./DESIGN-ADDENDUM-CAN.md) remains valid. The
+CAN state shape is `PubSubState` with `log` and `base_offset`. The
+drain/validator/follow-CAN-chain mechanisms are unaffected.
diff --git a/temporalio/contrib/pubsub/DESIGN-v2.md b/temporalio/contrib/pubsub/DESIGN-v2.md
new file mode 100644
index 000000000..0a5739d01
--- /dev/null
+++ b/temporalio/contrib/pubsub/DESIGN-v2.md
@@ -0,0 +1,634 @@
+# Temporal Workflow Pub/Sub — Design Document v2
+
+Consolidated design document reflecting the current implementation.
+Supersedes [DESIGN.md](./DESIGN.md) and its addenda
+([CAN](./DESIGN-ADDENDUM-CAN.md), [Topics](./DESIGN-ADDENDUM-TOPICS.md),
+[Dedup](./DESIGN-ADDENDUM-DEDUP.md)), which are preserved as historical
+records of the design exploration.
+
+## Overview
+
+A reusable pub/sub module for Temporal workflows. The workflow acts as the
+message broker — it holds an append-only log of `(topic, data)` entries.
+External clients (activities, starters, other services) publish and subscribe
+through the workflow handle using Temporal primitives (signals, updates,
+queries).
+
+The module ships as `temporalio.contrib.pubsub` in the Python SDK and is
+designed to be cross-language compatible. Payloads are opaque byte strings —
+the workflow does not interpret them.
+
+## Architecture
+
+```
+                    ┌──────────────────────────────────┐
+                    │         Temporal Workflow         │
+                    │          (PubSubMixin)            │
+                    │                                   │
+                    │  ┌─────────────────────────────┐  │
+                    │  │   Append-only log            │  │
+                    │  │   [(topic, data), ...]       │  │
+                    │  │   base_offset: int           │  │
+                    │  │   publisher_sequences: {}    │  │
+                    │  └─────────────────────────────┘  │
+                    │                                   │
+  signal ──────────►│  __pubsub_publish (with dedup)    │
+  update ──────────►│  __pubsub_poll (long-poll)        │◄── subscribe()
+  query  ──────────►│  __pubsub_offset                  │
+                    │                                   │
+                    │  publish() ── workflow-side        │
+                    └──────────────────────────────────┘
+                              │
+                              │ continue-as-new
+                              ▼
+                    ┌──────────────────────────────────┐
+                    │  PubSubState carries:             │
+                    │    log, base_offset,              │
+                    │    publisher_sequences            │
+                    └──────────────────────────────────┘
+```
+
+## API Surface
+
+### Workflow side — `PubSubMixin`
+
+A mixin class that adds signal, update, and query handlers to any workflow.
+
+```python
+from temporalio import workflow
+from temporalio.contrib.pubsub import PubSubMixin
+
+@workflow.defn
+class MyWorkflow(PubSubMixin):
+    @workflow.init
+    def __init__(self, input: MyInput) -> None:
+        self.init_pubsub()
+
+    @workflow.run
+    async def run(self, input: MyInput) -> None:
+        self.publish("status", b"started")
+        await do_work()
+        self.publish("status", b"done")
+```
+
+Call `init_pubsub()` in `__init__` for fresh workflows. When accepting
+continue-as-new state, call it in `run()` with the `prior_state` argument
+(see [Continue-as-New](#continue-as-new)).
+
+| Method / Handler | Kind | Description |
+|---|---|---|
+| `init_pubsub(prior_state=None)` | instance method | Initialize internal state. Must be called before use. |
+| `publish(topic, data)` | instance method | Append to the log from workflow code. |
+| `get_pubsub_state(publisher_ttl=900)` | instance method | Snapshot for CAN. Prunes dedup entries older than TTL. |
+| `drain_pubsub()` | instance method | Unblock polls and reject new ones for CAN. |
+| `truncate_pubsub(up_to_offset)` | instance method | Discard log entries before offset. |
+| `__pubsub_publish` | `@workflow.signal` | Receives publications from external clients (with dedup). |
+| `__pubsub_poll` | `@workflow.update` | Long-poll subscription: blocks until new items or drain. |
+| `__pubsub_offset` | `@workflow.query` | Returns the current global offset. |
+
+### Client side — `PubSubClient`
+
+Used by activities, starters, and any code with a workflow handle.
+
+```python
+from temporalio.contrib.pubsub import PubSubClient
+
+# Preferred: factory method (enables CAN following + activity auto-detect)
+client = PubSubClient.create(temporal_client, workflow_id)
+
+# --- Publishing (with batching) ---
+async with client:
+    client.publish("events", b'{"type":"TEXT_DELTA","delta":"hello"}')
+    client.publish("events", b'{"type":"TEXT_DELTA","delta":" world"}')
+    client.publish("events", b'{"type":"TEXT_COMPLETE"}', priority=True)
+
+# --- Subscribing ---
+async for item in client.subscribe(["events"], from_offset=0):
+    print(item.topic, item.data)
+    if is_done(item):
+        break
+```
+
+| Method | Description |
+|---|---|
+| `PubSubClient.create(client?, wf_id?)` | Factory (preferred). Auto-detects activity context if args omitted. |
+| `PubSubClient(handle)` | From handle directly (no CAN following). |
+| `publish(topic, data, priority=False)` | Buffer a message. Priority triggers immediate flush (fire-and-forget). |
+| `subscribe(topics, from_offset, poll_cooldown=0.1)` | Async iterator. Always follows CAN chains when created via `create`. |
+| `get_offset()` | Query current global offset. |
+
+Use as `async with` for batched publishing with automatic flush on exit.
+There is no public `flush()` method — use `priority=True` on `publish()`
+for immediate delivery, or rely on the background flusher and context
+manager exit flush.
+
+#### Activity convenience
+
+When called from within an activity, `client` and `workflow_id` can be
+omitted from `create()` — they are inferred from the activity context:
+
+```python
+@activity.defn
+async def stream_events() -> None:
+    client = PubSubClient.create(batch_interval=2.0)
+    async with client:
+        for chunk in generate_chunks():
+            client.publish("events", chunk)
+            activity.heartbeat()
+```
+
+## Data Types
+
+```python
+@dataclass
+class PubSubItem:
+    topic: str        # Topic string
+    data: bytes       # Opaque payload
+
+@dataclass
+class PublishEntry:
+    topic: str
+    data: bytes
+
+@dataclass
+class PublishInput:
+    items: list[PublishEntry]
+    publisher_id: str = ""     # For exactly-once dedup
+    sequence: int = 0          # Monotonic per publisher
+
+@dataclass
+class PollInput:
+    topics: list[str]          # Filter (empty = all)
+    from_offset: int = 0       # Global offset to resume from
+
+@dataclass
+class PollResult:
+    items: list[PubSubItem]
+    next_offset: int = 0       # Offset for next poll
+
+@dataclass
+class PubSubState:
+    log: list[PubSubItem] = field(default_factory=list)
+    base_offset: int = 0
+    publisher_sequences: dict[str, int] = field(default_factory=dict)
+    publisher_last_seen: dict[str, float] = field(default_factory=dict)  # For TTL pruning
+```
+
+`PubSubItem` does not carry an offset field. The global offset is derived
+from the item's position in the log plus `base_offset`. It is exposed only
+through `PollResult.next_offset` and the `__pubsub_offset` query.
+
+The containing workflow input must type the field as `PubSubState | None`,
+not `Any` — `Any`-typed fields deserialize as plain dicts, losing the type.
+
+## Design Decisions
+
+### 1. Topics are plain strings, no hierarchy
+
+Topics are exact-match strings. No prefix matching, no wildcards. A subscriber
+provides a list of topic strings to filter on; an empty list means "all topics."
+
+### 2. Items are opaque byte strings
+
+The workflow does not interpret payloads. This enables cross-language
+compatibility. The pub/sub layer is transport; application semantics belong
+in the application.
+
+### 3. Global offsets, NATS JetStream model
+
+Every entry gets a global offset from a single counter. Subscribers filter by
+topic but advance through the global offset space.
+
+We surveyed offset models across Kafka, Redis Streams, NATS JetStream, PubNub,
+Google Pub/Sub, RabbitMQ Streams, and Amazon SQS/SNS. No major system provides
+a true global offset across independent topics. The two closest:
+
+- **NATS JetStream**: one stream captures multiple subjects via wildcards, with
+  a single sequence counter. This is our model.
+- **PubNub**: wall-clock nanosecond timestamp as cursor across channels.
+
+We evaluated six alternatives for handling the information leakage that global
+offsets create (a single-topic subscriber can infer other-topic activity from
+gaps): per-topic counts, opaque cursors, encrypted cursors, per-topic lists,
+per-topic offsets with cursor hints, and accepting the leakage. See
+[DESIGN-ADDENDUM-TOPICS.md](./DESIGN-ADDENDUM-TOPICS.md) for the full
+analysis.
+
+**Decision:** Global offsets are the right choice for workflow-scoped pub/sub.
+
+**Why not per-topic offsets?** The most sophisticated alternative — per-topic
+offsets with opaque cursors carrying global position hints (Option F in the
+addendum) — was rejected for three reasons:
+
+1. **The threat model doesn't apply.** Information leakage assumes untrusted
+   multi-tenant subscribers who shouldn't learn about each other's traffic
+   volumes. That's Kafka's world — separate consumers for separate services.
+   In workflow-scoped pub/sub, the subscriber is the BFF: trusted server-side
+   code that could just as easily subscribe to all topics.
+
+2. **Cursor portability.** A global offset is a stream position that works
+   regardless of which topics you filter on. You can subscribe to `["events"]`,
+   then later subscribe to `["events", "thinking"]` with the same offset.
+   Per-topic cursors are coupled to the filter — you need a separate cursor per
+   topic, and adding a topic to your subscription requires starting it from the
+   beginning.
+
+3. **Unjustified complexity.** Per-topic cursors require cursor
+   parsing/formatting, a `topic_counts` dict that survives continue-as-new, a
+   multi-cursor alignment algorithm, and stale-hint fallback paths. For log
+   sizes of thousands of items where a filtered slice is microseconds, this
+   machinery adds cost without measurable benefit.
+
+**Leakage is contained at the BFF trust boundary.** The global offset stays
+between workflow and BFF. The BFF assigns its own gapless SSE event IDs to the
+browser. The global offset never reaches the end client. See
+[Information Leakage and the BFF](#information-leakage-and-the-bff) for the
+full mechanism.
+
+### 4. No topic creation
+
+Topics are implicit. Publishing to a topic creates it. Subscribing to a
+nonexistent topic returns no items and waits for new ones.
+
+### 5. Priority forces flush, does not reorder
+
+`priority=True` causes the client to immediately flush its buffer. It does NOT
+reorder items — the priority item appears in its natural position after any
+previously-buffered items. The purpose is latency-sensitive delivery, not
+importance ranking.
+
+### 6. Session ordering
+
+Publications from a single client are ordered. This relies on two Temporal
+guarantees: (1) signals sent sequentially from the same client appear in
+workflow history in send order, and (2) signal handlers are invoked in
+history order. The `PubSubClient` flush lock ensures signals are never in
+flight concurrently, so both guarantees apply.
+
+Concurrent publishers get a total order in the log (the workflow serializes
+all signal processing), but the interleaving is nondeterministic — it depends
+on arrival order at the server. Per-publisher ordering is preserved. This is
+formally verified as `OrderPreservedPerPublisher` in `PubSubDedupTTL.tla`.
+
+Once items are in the log, their order is stable — reads are repeatable.
+
+### 7. Batching is built into the client
+
+`PubSubClient` includes a Nagle-like batcher (buffer + timer). The async
+context manager starts a background flush task; exiting cancels it and does a
+final flush. Batching amortizes Temporal signal overhead.
+
+Parameters:
+- `batch_interval` (default 2.0s): timer between automatic flushes.
+- `max_batch_size` (optional): auto-flush when buffer reaches this size.
+
+### 8. Subscription is poll-based, exposed as async iterator
+
+The primitive is `__pubsub_poll` (a Temporal update with `wait_condition`).
+`subscribe()` wraps this in an `AsyncIterator` with a configurable
+`poll_interval` (default 0.1s) to rate-limit polls.
+
+Temporal has no server-push to external clients. Updates with `wait_condition`
+are the closest thing — the workflow blocks until data is available.
+
+**Poll efficiency.** The poll slices `self._pubsub_log[from_offset - base_offset:]`
+and filters by topic. The common case — single topic, continuing from last
+poll — is O(new items since last poll). The global offset points directly to
+the resume position with no scanning or cursor alignment. Multi-topic polls
+are the same cost: one slice, one filter pass. The worst case is a poll from
+offset 0 (full log scan), which only happens on first connection or after the
+subscriber falls behind.
+
+### 9. Workflow can publish but should not subscribe
+
+Workflow code can call `self.publish()` directly — this is deterministic.
+Reading from the log within workflow code is possible but breaks the
+failure-free abstraction because external publishers send data via signals
+(non-deterministic inputs), and branching on signal content creates
+replay-sensitive code paths.
+
+### 10. `base_offset` for future truncation
+
+The log carries a `base_offset` (0 today). All offset arithmetic uses
+`offset - base_offset` to index into the log. This supports future log
+truncation: discard a prefix of consumed entries, advance `base_offset`,
+and global offsets remain monotonic. If `offset < base_offset`, the
+subscriber has fallen behind truncation — the poll raises an error.
+
+Truncation is deferred to a future iteration. Until then, the log grows
+without bound within a run and is compacted only through continue-as-new.
+
+### 11. No timeout on long-poll
+
+`wait_condition` in the poll handler has no timeout. The poll blocks
+indefinitely until one of three things happens:
+
+1. **New data arrives** — the `len(log) > offset` condition fires.
+2. **Draining for continue-as-new** — `drain_pubsub()` sets the flag.
+3. **Client disconnects** — the BFF drops the SSE connection, cancels the
+   update RPC, and the handler becomes an inert coroutine cleaned up at
+   the next drain cycle.
+
+A previous design used a 5-minute timeout as a defensive "don't block
+forever" mechanism. This was removed because:
+
+- **It adds unnecessary history events.** Every poll creates a `TimerStarted`
+  event. For a streaming session doing hundreds of polls, this doubles the
+  history event count and accelerates approach to the ~50K event CAN threshold.
+- **The drain mechanism already handles cleanup.** `drain_pubsub()` unblocks
+  all waiting polls, and the update validator rejects new polls, so
+  `all_handlers_finished()` converges without timers.
+- **Zombie polls are harmless.** If a client crashes without cancelling, its
+  poll handler is just an in-memory coroutine waiting on a condition. It
+  consumes no Temporal actions and is cleaned up at the next CAN cycle.
+
+## Exactly-Once Publish Delivery
+
+External publishers get exactly-once delivery through publisher ID + sequence
+number deduplication, following the Kafka producer model.
+
+### Problem
+
+`flush()` sends items via a Temporal signal. If the signal call raises after
+the server accepted it (e.g., network timeout on the response), the client
+cannot distinguish delivered from not-delivered. Without dedup, the client
+must choose between at-most-once (data loss) and at-least-once (silent
+duplication).
+
+### Solution
+
+Each `PubSubClient` instance generates a UUID (`publisher_id`) on creation.
+Each `flush()` increments a monotonic `sequence` counter. The signal payload
+includes both. The workflow tracks the highest seen sequence per publisher in
+`_publisher_sequences: dict[str, int]` and rejects any signal with
+`sequence <= last_seen`.
+
+```
+Client                              Workflow
+  │                                    │
+  │  signal(publisher_id, seq=1)       │
+  │───────────────────────────────────►│ seq 1 > 0 → accept, record seq=1
+  │                                    │
+  │  signal(publisher_id, seq=1)       │  (retry after timeout)
+  │───────────────────────────────────►│ seq 1 <= 1 → reject (duplicate)
+  │                                    │
+  │  signal(publisher_id, seq=2)       │
+  │───────────────────────────────────►│ seq 2 > 1 → accept, record seq=2
+```
+
+### Client-side flush (TLA+-verified algorithm)
+
+The flush algorithm has been formally verified using TLA+ model checking.
+See `verification/PROOF.md` for the full correctness proof and
+`verification/PubSubDedup.tla` for the spec.
+
+```python
+async def _flush(self) -> None:
+    async with self._flush_lock:
+        if self._pending is not None:
+            # Retry failed batch with same sequence
+            batch = self._pending
+            seq = self._pending_seq
+        elif self._buffer:
+            # New batch
+            seq = self._sequence + 1
+            batch = self._buffer
+            self._buffer = []
+            self._pending = batch
+            self._pending_seq = seq
+        else:
+            return
+        try:
+            await self._handle.signal(
+                "__pubsub_publish",
+                PublishInput(items=batch, publisher_id=self._publisher_id,
+                             sequence=seq),
+            )
+            self._sequence = seq     # advance confirmed sequence
+            self._pending = None     # clear pending
+        except Exception:
+            pass                     # pending stays for retry
+            raise
+```
+
+- **Separate pending from buffer**: failed batches stay in `_pending`, not
+  restored to `_buffer`. New `publish()` calls during retry go to the fresh
+  buffer. This prevents the data-loss bug where items would be merged into a
+  retry batch under a different sequence number.
+- **Retry with same sequence**: on failure, the next `_flush()` retries the
+  same `_pending` with the same `_pending_seq`. If the signal was delivered
+  but the client saw an error, the workflow deduplicates the retry.
+- **Sequence advances only on success**: `_sequence` (confirmed) is updated
+  only after the signal call returns without error.
+- **Lock for coalescing**: concurrent `_flush()` callers queue on the lock.
+- **max_retry_duration**: if set, the client gives up retrying after this
+  duration and raises `TimeoutError`. Must be less than the workflow's
+  `publisher_ttl` to preserve exactly-once guarantees.
+
+### Dedup state and TTL pruning
+
+`publisher_sequences` is `dict[str, int]` — bounded by number of publishers
+(typically 1-2), not number of flushes. Carried through continue-as-new in
+`PubSubState`. If `publisher_id` is empty (workflow-internal publish or legacy
+client), dedup is skipped.
+
+`publisher_last_seen` tracks the last `workflow.time()` each publisher was
+seen. During `get_pubsub_state(publisher_ttl=900)`, entries older than TTL
+are pruned to bound memory across long-lived workflow chains.
+
+**Safety constraint**: `publisher_ttl` must exceed the client's
+`max_retry_duration`. If a publisher's dedup entry is pruned while it still
+has a pending retry, the retry could be accepted as new, creating duplicates.
+This is formally verified in `verification/PubSubDedupTTL.tla` — TLC finds
+the counterexample for unsafe pruning and confirms safe pruning preserves
+NoDuplicates.
+
+## Continue-as-New
+
+### Problem
+
+The pub/sub mixin accumulates workflow history through signals (each
+`__pubsub_publish`) and updates (each `__pubsub_poll` response). Over a
+streaming session, history grows toward the ~50K event threshold. CAN resets
+the history while carrying the canonical log copy forward.
+
+### State
+
+```python
+@dataclass
+class PubSubState:
+    log: list[PubSubItem] = field(default_factory=list)
+    base_offset: int = 0
+    publisher_sequences: dict[str, int] = field(default_factory=dict)
+    publisher_last_seen: dict[str, float] = field(default_factory=dict)
+```
+
+`init_pubsub(prior_state)` restores all four fields. `get_pubsub_state()`
+snapshots them.
+
+### Draining
+
+A long-poll `__pubsub_poll` blocks indefinitely until new data arrives. To
+allow CAN to proceed, draining uses two mechanisms:
+
+1. **`drain_pubsub()`** sets a flag that unblocks all waiting poll handlers
+   (the `or self._pubsub_draining` clause in `wait_condition`).
+2. **Update validator** rejects new polls when draining, so no new handlers
+   start and `all_handlers_finished()` stabilizes.
+
+```python
+# CAN sequence in the parent workflow:
+self.drain_pubsub()
+await workflow.wait_condition(workflow.all_handlers_finished)
+workflow.continue_as_new(args=[WorkflowInput(
+    pubsub_state=self.get_pubsub_state(),
+)])
+```
+
+### Client-side CAN following
+
+`subscribe()` always follows CAN chains when the client was created via
+`for_workflow()`. When a poll fails with
+`WorkflowUpdateRPCTimeoutOrCancelledError`, the client calls `describe()` on
+the handle. If the status is `CONTINUED_AS_NEW`, it gets a fresh handle for
+the same workflow ID (targeting the latest run) and retries the poll from the
+same offset.
+
+```python
+async def _follow_continue_as_new(self) -> bool:
+    if self._client is None:
+        return False
+    try:
+        desc = await self._handle.describe()
+    except Exception:
+        return False
+    if desc.status == WorkflowExecutionStatus.CONTINUED_AS_NEW:
+        self._handle = self._client.get_workflow_handle(self._workflow_id)
+        return True
+    return False
+```
+
+The `describe()` check prevents infinite loops: if the workflow completed or
+failed (not CAN), the subscriber stops instead of retrying.
+
+### Offset continuity
+
+Since the full log is carried forward:
+
+- Pre-CAN: offsets `0..N-1`, log length N.
+- Post-CAN: `init_pubsub(prior_state)` restores N items. New appends start
+  at offset N.
+- A subscriber at offset K resumes seamlessly against the new run.
+
+### Edge cases
+
+**Payload size limit.** The full log in CAN input could approach Temporal's
+2 MB limit for very long sessions. Mitigation: truncation (discarding consumed
+entries before CAN) is the natural extension, supported by `base_offset`.
+
+**Signal delivery during CAN.** A publisher sending mid-CAN may get errors if
+its handle is pinned to the old run. The workflow should ensure activities
+complete before triggering CAN.
+
+**Concurrent subscribers.** Each maintains its own offset. Sharing a
+`PubSubClient` across concurrent `subscribe()` calls is safe.
+
+## Information Leakage and the BFF
+
+Global offsets leak cross-topic activity (a single-topic subscriber sees gaps).
+This is acceptable within the pub/sub API because the subscriber is the BFF —
+trusted server-side code. The leakage must not reach the end client (browser).
+
+### The problem
+
+If the BFF forwarded `PollResult.next_offset` to the browser (e.g., as an SSE
+reconnection cursor), the browser could observe gaps and infer activity on
+topics it is not subscribed to. Even if the offset is "opaque," a monotonic
+integer with gaps is trivially inspectable.
+
+### Options considered
+
+We evaluated four approaches for browser-side reconnection:
+
+1. **BFF tracks the cursor server-side.** The BFF maintains a per-session
+   `session_id → last_offset` mapping. The browser reconnects with just the
+   session ID. On BFF restart, cursors are lost — fall back to replaying from
+   turn start.
+
+2. **Opaque token from the BFF.** The BFF wraps the global offset in an
+   encoded or encrypted token. The browser passes it back on reconnect.
+   `base64(offset)` is trivially reversible (security theater); real encryption
+   needs a key and adds a layer for marginal benefit over option 1.
+
+3. **BFF assigns SSE event IDs with `Last-Event-ID`.** The BFF emits SSE
+   events with `id: 1`, `id: 2`, `id: 3` (a BFF-local counter per stream).
+   On reconnect, the browser sends `Last-Event-ID` (built into the SSE spec).
+   The BFF maps that back to a global offset internally.
+
+4. **No mid-stream resume.** Browser reconnects, BFF replays from start of
+   the current turn. Frontend deduplicates. Simplest, but replays more data
+   than necessary.
+
+### Decision: SSE event IDs (option 3)
+
+The BFF assigns gapless integer IDs to SSE events and maintains a small
+mapping from SSE event index to global offset. The browser never sees the
+workflow's offset — it sees the BFF's event numbering.
+
+```python
+sse_id = 0
+sse_id_to_offset: dict[int, int] = {}
+
+start_offset = await pubsub.get_offset()
+async for item in pubsub.subscribe(topics=["events"], from_offset=start_offset):
+    sse_id += 1
+    sse_id_to_offset[sse_id] = item_global_offset
+    yield f"id: {sse_id}\ndata: {item.data}\n\n"
+```
+
+On reconnect, the browser sends `Last-Event-ID: 47`. The BFF looks up the
+corresponding global offset and resumes the subscription from there.
+
+The BFF is already per-session and stateful (it holds the SSE connection).
+The `sse_id → global_offset` mapping is negligible additional state. On BFF
+restart, the mapping is lost — fall back to replaying from turn start (option
+4), which is acceptable because agent turns produce modest event volumes and
+the frontend reducer is idempotent.
+
+This uses the SSE spec as designed: `Last-Event-ID` exists for exactly this
+reconnection pattern.
+
+## Cross-Language Protocol
+
+Any Temporal client in any language can interact with a pub/sub workflow by:
+
+1. **Publishing**: Signal `__pubsub_publish` with `PublishInput` payload
+2. **Subscribing**: Execute update `__pubsub_poll` with `PollInput`, loop
+3. **Checking offset**: Query `__pubsub_offset`
+
+Double-underscore prefix on handler names avoids collisions with application
+signals/updates. The payload types are simple composites of strings, bytes,
+and ints — representable in every Temporal SDK's default data converter.
+
+## File Layout
+
+```
+temporalio/contrib/pubsub/
+├── __init__.py                  # Public API exports
+├── _mixin.py                    # PubSubMixin (workflow-side)
+├── _client.py                   # PubSubClient (external-side)
+├── _types.py                    # Shared data types
+├── README.md                    # Usage documentation
+├── DESIGN-v2.md                 # This document
+├── DESIGN.md                    # Historical: original design
+├── DESIGN-ADDENDUM-CAN.md       # Historical: CAN exploration
+├── DESIGN-ADDENDUM-TOPICS.md    # Historical: offset model exploration
+├── DESIGN-ADDENDUM-DEDUP.md     # Historical: dedup exploration
+└── verification/                # TLA+ formal verification
+    ├── README.md                # Overview and running instructions
+    ├── PROOF.md                 # Full correctness proof
+    ├── PubSubDedup.tla          # Correct single-publisher protocol
+    ├── PubSubDedupInductive.tla # Inductive invariant (unbounded proof)
+    ├── PubSubDedupTTL.tla       # Multi-publisher + TTL pruning
+    └── PubSubDedupBroken.tla    # Old (broken) algorithm — counterexample
+```
diff --git a/temporalio/contrib/pubsub/DESIGN.md b/temporalio/contrib/pubsub/DESIGN.md
new file mode 100644
index 000000000..da5914664
--- /dev/null
+++ b/temporalio/contrib/pubsub/DESIGN.md
@@ -0,0 +1,299 @@
+# Temporal Workflow Pub/Sub — Design Document
+
+## Overview
+
+A reusable pub/sub module for Temporal workflows. The workflow acts as the message
+broker — it holds an append-only log of `(offset, topic, data)` entries. External
+clients (activities, starters, other services) publish and subscribe through the
+workflow handle using Temporal primitives (signals, updates, queries).
+
+The module ships as `temporalio.contrib.pubsub` in the Python SDK and is designed
+to be cross-language compatible. Payloads are opaque byte strings — the workflow
+does not interpret them.
+
+## API Surface
+
+### Workflow side — `PubSubMixin`
+
+A mixin class that adds signal, update, and query handlers to any workflow.
+
+```python
+from temporalio.contrib.pubsub import PubSubMixin
+
+@workflow.defn
+class MyWorkflow(PubSubMixin):
+    @workflow.run
+    async def run(self, input: MyInput) -> MyOutput:
+        self.init_pubsub()
+        # The workflow is now a pub/sub broker.
+        # It can also publish directly:
+        self.publish("status", b"started")
+        await do_work()
+        self.publish("status", b"done")
+```
+
+`PubSubMixin` provides:
+
+| Method / Handler | Kind | Description |
+|---|---|---|
+| `init_pubsub()` | instance method | Initialize internal state. Must be called before use. |
+| `publish(topic, data, priority=False)` | instance method | Append to the log from workflow code. |
+| `__pubsub_publish` | `@workflow.signal` | Receives publications from external clients. |
+| `__pubsub_poll` | `@workflow.update` | Long-poll subscription: blocks until new items or completion. |
+| `__pubsub_offset` | `@workflow.query` | Returns the current log length (next offset). |
+
+Double-underscore prefix on handler names avoids collisions with application signals/updates.
+
+### Client side — `PubSubClient`
+
+Used by activities, starters, and any code with a workflow handle.
+
+```python
+from temporalio.contrib.pubsub import PubSubClient
+
+client = PubSubClient(workflow_handle, batch_interval=2.0)
+
+# --- Publishing ---
+async with client:
+    client.publish("events", b'{"type":"TEXT_DELTA","delta":"hello"}')
+    client.publish("events", b'{"type":"TEXT_DELTA","delta":" world"}')
+    client.publish("events", b'{"type":"TEXT_COMPLETE"}', priority=True)
+    # priority=True forces an immediate flush
+    # context manager exit flushes remaining buffer
+
+# --- Subscribing ---
+async for item in client.subscribe(["events"], from_offset=0):
+    print(item.offset, item.topic, item.data)
+    if is_done(item):
+        break
+```
+
+### `PubSubClient` details
+
+| Method | Description |
+|---|---|
+| `publish(topic, data, priority=False)` | Buffer a message. If `priority=True`, flush immediately. |
+| `flush()` | Send all buffered messages to the workflow via signal. |
+| `subscribe(topics, from_offset=0)` | Returns an `AsyncIterator[PubSubItem]`. Internally polls via the `__pubsub_poll` update. |
+| `get_offset()` | Query the current log offset. |
+
+Constructor parameters:
+
+| Parameter | Default | Description |
+|---|---|---|
+| `handle` | required | `WorkflowHandle` to the broker workflow. |
+| `batch_interval` | `2.0` | Seconds between automatic flushes. |
+
+The client implements `AsyncContextManager`. Entering starts the background flush
+timer; exiting cancels it and does a final flush.
+
+### Activity convenience
+
+```python
+from temporalio.contrib.pubsub import PubSubClient
+from temporalio import activity
+
+async def get_pubsub_client(**kwargs) -> PubSubClient:
+    """Create a PubSubClient for the current activity's parent workflow."""
+    info = activity.info()
+    handle = activity.client().get_workflow_handle(info.workflow_id)
+    return PubSubClient(handle, **kwargs)
+```
+
+## Data Types
+
+All types use standard Temporal serialization (default data converter) for
+cross-language compatibility.
+
+```python
+@dataclass
+class PubSubItem:
+    offset: int       # Global monotonic offset
+    topic: str        # Topic string
+    data: bytes       # Opaque payload
+
+@dataclass
+class PublishInput:
+    items: list[PublishEntry]
+
+@dataclass
+class PublishEntry:
+    topic: str
+    data: bytes
+    priority: bool = False
+
+@dataclass
+class PollInput:
+    topics: list[str]       # Filter to these topics (empty = all)
+    from_offset: int        # Start reading from this global offset
+    timeout: float = 300.0  # Server-side wait timeout
+
+@dataclass
+class PollResult:
+    items: list[PubSubItem]
+    next_offset: int         # Offset for next poll call
+```
+
+## Design Decisions
+
+### 1. Topics are plain strings, no hierarchy
+
+Topics are exact-match strings. No prefix matching, no wildcards. A subscriber
+provides a list of topic strings to filter on; an empty list means "all topics."
+
+**Rationale**: Simplicity. Prefix matching adds implementation complexity and is
+rarely needed for the streaming use cases this targets.
+
+### 2. Items are opaque byte strings
+
+The workflow does not interpret payloads. This enables cross-language
+compatibility — each SDK's client serializes/deserializes in its own language.
+
+**Rationale**: The pub/sub layer is transport. Application semantics belong in the
+application.
+
+### 3. Global monotonic offsets, not per-topic
+
+Every entry gets a global offset from a single counter. Subscribers filter by topic
+but advance through the global offset space.
+
+**Rationale**: Simpler implementation. Global ordering means a subscriber to
+multiple topics sees a consistent interleaving. The tradeoff is that a
+single-topic subscriber may see gaps in offset numbers — but `next_offset` in
+`PollResult` handles continuation cleanly.
+
+### 4. No topic creation
+
+Topics are implicit. Publishing to a topic creates it. Subscribing to a
+nonexistent topic returns no items (and waits for new ones).
+
+**Rationale**: Eliminates a management API and lifecycle concerns. Matches the
+lightweight "just strings" philosophy.
+
+### 5. Priority forces flush, does not reorder
+
+Setting `priority=True` on a publish causes the client to immediately flush its
+buffer. It does NOT reorder items in the log — the priority item appears in its
+natural position after any previously-buffered items.
+
+**Rationale**: Reordering would break the append-only log invariant and complicate
+offset semantics. The purpose of priority is latency-sensitive delivery (e.g.,
+"thinking complete" events), not importance ranking.
+
+### 6. Session ordering
+
+Publications from a single client are ordered. The workflow serializes all signal
+processing, so concurrent publishers get a total order (though the interleaving is
+nondeterministic). Once items are in the log, their order is stable — reads are
+repeatable.
+
+### 7. Batching is built into the client
+
+The `PubSubClient` includes a Nagle-like batcher (buffer + timer). This is the
+same pattern as the existing `EventBatcher` but generalized. Batching amortizes
+Temporal signal overhead — instead of one signal per token, a 2-second window
+batches hundreds of tokens into a single signal.
+
+### 8. Subscription is poll-based, exposed as async iterator
+
+The primitive is `__pubsub_poll` (a Temporal update with `wait_condition`). The
+`subscribe()` method wraps this in an `AsyncIterator` that handles polling,
+reconnection, and yielding items one at a time.
+
+**Why poll, not push**: Temporal has no server-push to external clients. Updates
+with `wait_condition` are the closest thing — the workflow blocks until data is
+available, so the client doesn't busy-wait.
+
+**Why async iterator**: Idiomatic Python. Matches what users expect from
+Kafka consumers, Redis XREAD, NATS subscriptions, etc.
+
+### 9. Workflow can publish but should not subscribe
+
+Workflow code can call `self.publish()` directly — this is deterministic (appends
+to a list). Reading from the log within workflow code is also possible via
+`self._pubsub_log` but breaks the failure-free abstraction because:
+
+- External publishers send data via signals, which are non-deterministic inputs
+- Branching on signal content creates replay-sensitive code paths
+
+If a workflow needs to react to published data, it should do so in signal handlers,
+not by polling its own log.
+
+### 10. Event retention: full log for workflow lifetime (future: snapshot + truncate)
+
+For now, the log grows unbounded for the workflow's lifetime. This is acceptable
+for the target use cases (streaming agent sessions lasting minutes to hours).
+
+**Future extension — snapshot + truncate**:
+
+1. `snapshot(topic)` → serialize current subscriber state as a special log entry
+2. `truncate(before_offset)` → discard entries before the offset
+3. Offsets remain monotonic (never reset)
+4. New subscribers start from the snapshot entry
+5. Natural integration with `continue_as_new()` — carry the snapshot forward
+
+This follows the event sourcing pattern (snapshot + event replay) and is analogous
+to Kafka's log compaction. We note it here as a planned extension but do not
+implement it in v1.
+
+## Signal / Update / Query Names
+
+For cross-language interop, the handler names are fixed strings:
+
+| Handler | Temporal name | Kind |
+|---|---|---|
+| `__pubsub_publish` | `__pubsub_publish` | signal |
+| `__pubsub_poll` | `__pubsub_poll` | update |
+| `__pubsub_offset` | `__pubsub_offset` | query |
+
+Other language SDKs implementing the same protocol must use these exact names.
+
+## Cross-Language Protocol
+
+Any Temporal client in any language can interact with a pub/sub workflow by:
+
+1. **Publishing**: Send signal `__pubsub_publish` with `PublishInput` payload
+2. **Subscribing**: Execute update `__pubsub_poll` with `PollInput`, loop
+3. **Checking offset**: Query `__pubsub_offset`
+
+The payload types are simple composites of strings, bytes, ints, and bools — all
+representable in every Temporal SDK's default data converter.
+
+## File Layout
+
+```
+temporalio/contrib/pubsub/
+├── __init__.py          # Public API exports
+├── _mixin.py            # PubSubMixin (workflow-side)
+├── _client.py           # PubSubClient (external-side, includes batcher)
+├── _types.py            # Shared data types
+└── README.md            # Usage documentation
+```
+
+## Local Development
+
+To use the local sdk-python with temporal-streaming-agents-samples:
+
+```toml
+# In temporal-streaming-agents-samples/backend-temporal/pyproject.toml
+[tool.uv.sources]
+temporalio = { path = "../../../sdk-python", editable = true }
+```
+
+This requires `maturin develop` to have been run at least once (for the Rust
+bridge), but subsequent Python-only changes are reflected immediately.
+
+## Migration Plan (temporal-streaming-agents-samples)
+
+The existing streaming code maps directly to the new contrib:
+
+| Current code | Replaces with |
+|---|---|
+| `EventBatcher` | `PubSubClient` (with batching) |
+| `receive_events` signal | `__pubsub_publish` signal (from mixin) |
+| `poll_events` update | `__pubsub_poll` update (from mixin) |
+| `get_event_count` query | `__pubsub_offset` query (from mixin) |
+| `_event_list` state | `PubSubMixin._pubsub_log` |
+| `_get_batcher()` helper | `get_pubsub_client()` or `PubSubClient(handle)` |
+| `ActivityEventsInput` | `PublishInput` |
+| `PollEventsInput/Result` | `PollInput/PollResult` |
diff --git a/temporalio/contrib/pubsub/README.md b/temporalio/contrib/pubsub/README.md
new file mode 100644
index 000000000..a18e2024b
--- /dev/null
+++ b/temporalio/contrib/pubsub/README.md
@@ -0,0 +1,165 @@
+# Temporal Workflow Pub/Sub
+
+Reusable pub/sub for Temporal workflows. The workflow acts as a message broker
+with an append-only log. External clients (activities, starters, other services)
+publish and subscribe through the workflow handle using Temporal primitives.
+
+Payloads are base64-encoded byte strings for cross-language compatibility.
+
+## Quick Start
+
+### Workflow side
+
+Add `PubSubMixin` to your workflow and call `init_pubsub()`:
+
+```python
+from temporalio import workflow
+from temporalio.contrib.pubsub import PubSubMixin
+
+@workflow.defn
+class MyWorkflow(PubSubMixin):
+    @workflow.init
+    def __init__(self, input: MyInput) -> None:
+        self.init_pubsub()
+
+    @workflow.run
+    async def run(self, input: MyInput) -> None:
+        self.publish("status", b"started")
+        await do_work()
+        self.publish("status", b"done")
+```
+
+### Activity side (publishing)
+
+Use `PubSubClient.for_workflow()` with the async context manager for batched
+publishing. When called from within an activity, the client and workflow ID
+are inferred automatically:
+
+```python
+from temporalio import activity
+from temporalio.contrib.pubsub import PubSubClient
+
+@activity.defn
+async def stream_events() -> None:
+    client = PubSubClient.for_workflow(batch_interval=2.0)
+    async with client:
+        for chunk in generate_chunks():
+            client.publish("events", chunk)
+            activity.heartbeat()
+        # Buffer is flushed automatically on context manager exit
+```
+
+Use `priority=True` to flush immediately for latency-sensitive events:
+
+```python
+client.publish("events", data, priority=True)
+```
+
+### Subscribing
+
+Use `PubSubClient.for_workflow()` and the `subscribe()` async iterator:
+
+```python
+from temporalio.contrib.pubsub import PubSubClient
+
+client = PubSubClient.for_workflow(temporal_client, workflow_id)
+async for item in client.subscribe(["events"], from_offset=0):
+    print(item.topic, item.data)
+    if is_done(item):
+        break
+```
+
+## Topics
+
+Topics are plain strings with exact matching. No hierarchy or wildcards.
+
+- Publish to one topic at a time
+- Subscribe to a list of topics (empty list = all topics)
+- Publishing to a topic implicitly creates it
+
+## Continue-as-new
+
+Carry pub/sub state across continue-as-new boundaries:
+
+```python
+from dataclasses import dataclass
+from temporalio import workflow
+from temporalio.contrib.pubsub import PubSubMixin, PubSubState
+
+@dataclass
+class WorkflowInput:
+    pubsub_state: PubSubState | None = None
+
+@workflow.defn
+class MyWorkflow(PubSubMixin):
+    @workflow.init
+    def __init__(self, input: WorkflowInput) -> None:
+        self.init_pubsub(prior_state=input.pubsub_state)
+
+    @workflow.run
+    async def run(self, input: WorkflowInput) -> None:
+        # ... do work ...
+
+        if workflow.info().is_continue_as_new_suggested():
+            self.drain_pubsub()
+            await workflow.wait_condition(workflow.all_handlers_finished)
+            workflow.continue_as_new(args=[WorkflowInput(
+                pubsub_state=self.get_pubsub_state(),
+            )])
+```
+
+`drain_pubsub()` unblocks waiting subscribers and rejects new polls so
+`all_handlers_finished` can stabilize. Subscribers created via
+`PubSubClient.for_workflow()` automatically follow continue-as-new chains.
+
+**Important:** Type the pubsub_state field as `PubSubState | None`, not `Any`.
+`Any`-typed fields deserialize as plain dicts, which breaks `init_pubsub()`.
+
+## Exactly-Once Delivery
+
+External publishers (via `PubSubClient`) get exactly-once delivery through
+publisher ID + sequence number deduplication. Each client instance generates
+a unique publisher ID and increments a monotonic sequence on each flush.
+The workflow tracks the highest seen sequence per publisher and rejects
+duplicates. See `DESIGN-ADDENDUM-DEDUP.md` for details.
+
+## API Reference
+
+### PubSubMixin
+
+| Method | Description |
+|---|---|
+| `init_pubsub(prior_state=None)` | Initialize state. Call in `__init__` for fresh workflows, or in `run()` when accepting CAN state. |
+| `publish(topic, data)` | Append to the log from workflow code. |
+| `get_pubsub_state()` | Snapshot for continue-as-new. |
+| `drain_pubsub()` | Unblock polls and reject new ones. |
+
+Handlers added automatically:
+
+| Handler | Kind | Name |
+|---|---|---|
+| Signal | `__pubsub_publish` | Receive external publications (with dedup) |
+| Update | `__pubsub_poll` | Long-poll subscription |
+| Query | `__pubsub_offset` | Current global offset |
+
+### PubSubClient
+
+| Method | Description |
+|---|---|
+| `PubSubClient.for_workflow(client, wf_id)` | Factory (preferred). Auto-detects activity context if args omitted. |
+| `PubSubClient(handle)` | From handle (no CAN follow). |
+| `publish(topic, data, priority=False)` | Buffer a message. |
+| `flush()` | Send buffered messages (with dedup). |
+| `subscribe(topics, from_offset, poll_interval=0.1)` | Async iterator. Always follows CAN chains when created via `for_workflow`. |
+| `get_offset()` | Query current global offset. |
+
+Use as `async with` for batched publishing with automatic flush.
+
+## Cross-Language Protocol
+
+Any Temporal client can interact with a pub/sub workflow using these
+fixed handler names:
+
+1. **Publish:** Signal `__pubsub_publish` with `PublishInput`
+2. **Subscribe:** Update `__pubsub_poll` with `PollInput` -> `PollResult`
+3. **Offset:** Query `__pubsub_offset` -> `int`
diff --git a/temporalio/contrib/pubsub/__init__.py b/temporalio/contrib/pubsub/__init__.py
new file mode 100644
index 000000000..b9978f94a
--- /dev/null
+++ b/temporalio/contrib/pubsub/__init__.py
@@ -0,0 +1,31 @@
+"""Pub/sub support for Temporal workflows.
+
+This module provides a reusable pub/sub pattern where a workflow acts as a
+message broker. External clients (activities, starters, other services) publish
+and subscribe through the workflow handle using Temporal primitives.
+
+Payloads are opaque bytes. Base64 encoding is used on the wire for
+cross-language compatibility, but users work with native byte types.
+"""
+
+from temporalio.contrib.pubsub._client import PubSubClient
+from temporalio.contrib.pubsub._mixin import PubSubMixin
+from temporalio.contrib.pubsub._types import (
+    PollInput,
+    PollResult,
+    PubSubItem,
+    PubSubState,
+    PublishEntry,
+    PublishInput,
+)
+
+__all__ = [
+    "PollInput",
+    "PollResult",
+    "PubSubClient",
+    "PubSubItem",
+    "PubSubMixin",
+    "PubSubState",
+    "PublishEntry",
+    "PublishInput",
+]
diff --git a/temporalio/contrib/pubsub/_client.py b/temporalio/contrib/pubsub/_client.py
new file mode 100644
index 000000000..c316c005a
--- /dev/null
+++ b/temporalio/contrib/pubsub/_client.py
@@ -0,0 +1,312 @@
+"""External-side pub/sub client.
+
+Used by activities, starters, and any code with a workflow handle to publish
+messages and subscribe to topics on a pub/sub workflow.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import time
+import uuid
+from collections.abc import AsyncIterator
+from typing import Self
+
+from temporalio import activity
+from temporalio.client import (
+    Client,
+    WorkflowExecutionStatus,
+    WorkflowHandle,
+    WorkflowUpdateFailedError,
+    WorkflowUpdateRPCTimeoutOrCancelledError,
+)
+
+from ._types import (
+    PollInput,
+    PollResult,
+    PubSubItem,
+    PublishEntry,
+    PublishInput,
+    decode_data,
+    encode_data,
+)
+
+
+class PubSubClient:
+    """Client for publishing to and subscribing from a pub/sub workflow.
+
+    Create via :py:meth:`create` (preferred) or by passing a handle
+    directly to the constructor.
+
+    For publishing, use as an async context manager to get automatic batching::
+
+        client = PubSubClient.create(temporal_client, workflow_id)
+        async with client:
+            client.publish("events", b"hello")
+            client.publish("events", b"world", priority=True)
+
+    For subscribing::
+
+        client = PubSubClient.create(temporal_client, workflow_id)
+        async for item in client.subscribe(["events"], from_offset=0):
+            process(item)
+    """
+
+    def __init__(
+        self,
+        handle: WorkflowHandle,
+        *,
+        batch_interval: float = 2.0,
+        max_batch_size: int | None = None,
+        max_retry_duration: float = 600.0,
+    ) -> None:
+        """Create a pub/sub client from a workflow handle.
+
+        Prefer :py:meth:`create` when you need continue-as-new
+        following in ``subscribe()``.
+
+        Args:
+            handle: Workflow handle to the pub/sub workflow.
+            batch_interval: Seconds between automatic flushes.
+            max_batch_size: Auto-flush when buffer reaches this size.
+            max_retry_duration: Maximum seconds to retry a failed flush
+                before raising TimeoutError. Must be less than the
+                workflow's ``publisher_ttl`` (default 900s) to preserve
+                exactly-once delivery. Default: 600s.
+        """
+        self._handle = handle
+        self._client: Client | None = None
+        self._workflow_id = handle.id
+        self._batch_interval = batch_interval
+        self._max_batch_size = max_batch_size
+        self._max_retry_duration = max_retry_duration
+        self._buffer: list[PublishEntry] = []
+        self._flush_event = asyncio.Event()
+        self._flush_task: asyncio.Task[None] | None = None
+        self._flush_lock = asyncio.Lock()
+        self._publisher_id: str = uuid.uuid4().hex[:16]
+        self._sequence: int = 0
+        self._pending: list[PublishEntry] | None = None
+        self._pending_seq: int = 0
+        self._pending_since: float | None = None
+
+    @classmethod
+    def create(
+        cls,
+        client: Client | None = None,
+        workflow_id: str | None = None,
+        *,
+        batch_interval: float = 2.0,
+        max_batch_size: int | None = None,
+        max_retry_duration: float = 600.0,
+    ) -> PubSubClient:
+        """Create a pub/sub client from a Temporal client and workflow ID.
+
+        This is the preferred constructor. It enables continue-as-new
+        following in ``subscribe()``.
+
+        If called from within an activity, ``client`` and ``workflow_id``
+        can be omitted — they are inferred from the activity context.
+
+        Args:
+            client: Temporal client. If None and in an activity, uses
+                ``activity.client()``.
+            workflow_id: ID of the pub/sub workflow. If None and in an
+                activity, uses the activity's parent workflow ID.
+            batch_interval: Seconds between automatic flushes.
+            max_batch_size: Auto-flush when buffer reaches this size.
+            max_retry_duration: Maximum seconds to retry a failed flush
+                before raising TimeoutError. Default: 600s.
+        """
+        if client is None or workflow_id is None:
+            info = activity.info()
+            if client is None:
+                client = activity.client()
+            if workflow_id is None:
+                wf_id = info.workflow_id
+                assert wf_id is not None, (
+                    "activity must be called from within a workflow"
+                )
+                workflow_id = wf_id
+        handle = client.get_workflow_handle(workflow_id)
+        instance = cls(
+            handle,
+            batch_interval=batch_interval,
+            max_batch_size=max_batch_size,
+            max_retry_duration=max_retry_duration,
+        )
+        instance._client = client
+        return instance
+
+    async def __aenter__(self) -> Self:
+        self._flush_task = asyncio.create_task(self._run_flusher())
+        return self
+
+    async def __aexit__(self, *_exc: object) -> None:
+        if self._flush_task:
+            self._flush_task.cancel()
+            try:
+                await self._flush_task
+            except asyncio.CancelledError:
+                pass
+            self._flush_task = None
+        await self._flush()
+
+    def publish(self, topic: str, data: bytes, priority: bool = False) -> None:
+        """Buffer a message for publishing.
+
+        Args:
+            topic: Topic string.
+            data: Opaque byte payload.
+            priority: If True, wake the flusher to send immediately
+                (fire-and-forget — does not block the caller).
+        """
+        self._buffer.append(PublishEntry(topic=topic, data=encode_data(data)))
+        if priority or (
+            self._max_batch_size is not None
+            and len(self._buffer) >= self._max_batch_size
+        ):
+            self._flush_event.set()
+
+    async def _flush(self) -> None:
+        """Send buffered or pending messages to the workflow via signal.
+
+        On failure, the pending batch and sequence are kept for retry.
+        Only advances the confirmed sequence on success.
+        """
+        async with self._flush_lock:
+            if self._pending is not None:
+                # Retry path: check max_retry_duration
+                if (
+                    self._pending_since is not None
+                    and time.monotonic() - self._pending_since
+                    > self._max_retry_duration
+                ):
+                    self._pending = None
+                    self._pending_seq = 0
+                    self._pending_since = None
+                    raise TimeoutError(
+                        f"Flush retry exceeded max_retry_duration "
+                        f"({self._max_retry_duration}s). Pending batch dropped. "
+                        f"If the signal was delivered, items are in the log. "
+                        f"If not, they are lost."
+                    )
+                batch = self._pending
+                seq = self._pending_seq
+            elif self._buffer:
+                # New batch path
+                seq = self._sequence + 1
+                batch = self._buffer
+                self._buffer = []
+                self._pending = batch
+                self._pending_seq = seq
+                self._pending_since = time.monotonic()
+            else:
+                return
+
+            try:
+                await self._handle.signal(
+                    "__pubsub_publish",
+                    PublishInput(
+                        items=batch,
+                        publisher_id=self._publisher_id,
+                        sequence=seq,
+                    ),
+                )
+                # Success: advance confirmed sequence, clear pending
+                self._sequence = seq
+                self._pending = None
+                self._pending_seq = 0
+                self._pending_since = None
+            except Exception:
+                # Pending stays set for retry on the next _flush() call
+                raise
+
+    async def _run_flusher(self) -> None:
+        """Background task: wait for timer OR priority wakeup, then flush."""
+        while True:
+            try:
+                await asyncio.wait_for(
+                    self._flush_event.wait(), timeout=self._batch_interval
+                )
+            except asyncio.TimeoutError:
+                pass
+            self._flush_event.clear()
+            await self._flush()
+
+    async def subscribe(
+        self,
+        topics: list[str] | None = None,
+        from_offset: int = 0,
+        *,
+        poll_cooldown: float = 0.1,
+    ) -> AsyncIterator[PubSubItem]:
+        """Async iterator that polls for new items.
+
+        Automatically follows continue-as-new chains when the client
+        was created via :py:meth:`create`.
+
+        Args:
+            topics: Topic filter. None or empty list means all topics.
+            from_offset: Global offset to start reading from.
+            poll_cooldown: Minimum seconds between polls to avoid
+                overwhelming the workflow when items arrive faster than
+                the poll round-trip. Defaults to 0.1.
+
+        Yields:
+            PubSubItem for each matching item.
+        """
+        offset = from_offset
+        while True:
+            try:
+                result: PollResult = await self._handle.execute_update(
+                    "__pubsub_poll",
+                    PollInput(topics=topics or [], from_offset=offset),
+                    result_type=PollResult,
+                )
+            except asyncio.CancelledError:
+                return
+            except WorkflowUpdateFailedError as e:
+                if (
+                    e.cause
+                    and getattr(e.cause, "type", None) == "TruncatedOffset"
+                ):
+                    # Subscriber fell behind truncation. Retry from offset 0
+                    # which the mixin treats as "from the beginning of
+                    # whatever exists" (i.e., from base_offset).
+                    offset = 0
+                    continue
+                raise
+            except WorkflowUpdateRPCTimeoutOrCancelledError:
+                if await self._follow_continue_as_new():
+                    continue
+                return
+            for wire_item in result.items:
+                yield PubSubItem(
+                    topic=wire_item.topic,
+                    data=decode_data(wire_item.data),
+                    offset=wire_item.offset,
+                )
+            offset = result.next_offset
+            if poll_cooldown > 0:
+                await asyncio.sleep(poll_cooldown)
+
+    async def _follow_continue_as_new(self) -> bool:
+        """Check if the workflow continued-as-new and re-target the handle.
+
+        Returns True if the handle was updated (caller should retry).
+        """
+        if self._client is None:
+            return False
+        try:
+            desc = await self._handle.describe()
+        except Exception:
+            return False
+        if desc.status == WorkflowExecutionStatus.CONTINUED_AS_NEW:
+            self._handle = self._client.get_workflow_handle(self._workflow_id)
+            return True
+        return False
+
+    async def get_offset(self) -> int:
+        """Query the current global offset (base_offset + log length)."""
+        return await self._handle.query("__pubsub_offset", result_type=int)
diff --git a/temporalio/contrib/pubsub/_mixin.py b/temporalio/contrib/pubsub/_mixin.py
new file mode 100644
index 000000000..35f683863
--- /dev/null
+++ b/temporalio/contrib/pubsub/_mixin.py
@@ -0,0 +1,241 @@
+"""Workflow-side pub/sub mixin.
+
+Add PubSubMixin as a base class to any workflow to get pub/sub signal, update,
+and query handlers.
+
+Call ``init_pubsub()`` in ``__init__`` for fresh workflows, or in ``run()``
+when accepting ``prior_state`` from continue-as-new arguments.
+"""
+
+from __future__ import annotations
+
+from temporalio import workflow
+from temporalio.exceptions import ApplicationError
+
+from ._types import (
+    PollInput,
+    PollResult,
+    PubSubItem,
+    PubSubState,
+    PublishInput,
+    _WireItem,
+    decode_data,
+    encode_data,
+)
+
+
+class PubSubMixin:
+    """Mixin that turns a workflow into a pub/sub broker.
+
+    Provides:
+    - ``publish(topic, data)`` for workflow-side publishing
+    - ``__pubsub_publish`` signal for external publishing (with dedup)
+    - ``__pubsub_poll`` update for long-poll subscription
+    - ``__pubsub_offset`` query for current log length
+    - ``drain_pubsub()`` / ``get_pubsub_state()`` for continue-as-new
+    - ``truncate_pubsub(offset)`` for log prefix truncation
+    """
+
+    _pubsub_log: list[PubSubItem]
+    _pubsub_base_offset: int
+    _pubsub_publisher_sequences: dict[str, int]
+    _pubsub_publisher_last_seen: dict[str, float]
+    _pubsub_draining: bool
+
+    def init_pubsub(self, prior_state: PubSubState | None = None) -> None:
+        """Initialize pub/sub state.
+
+        Args:
+            prior_state: State carried from a previous run via
+                ``get_pubsub_state()`` through continue-as-new. Pass None
+                on the first run.
+        """
+        if prior_state is not None:
+            self._pubsub_log = [
+                PubSubItem(topic=item.topic, data=decode_data(item.data))
+                for item in prior_state.log
+            ]
+            self._pubsub_base_offset = prior_state.base_offset
+            self._pubsub_publisher_sequences = dict(
+                prior_state.publisher_sequences
+            )
+            self._pubsub_publisher_last_seen = dict(
+                prior_state.publisher_last_seen
+            )
+        else:
+            self._pubsub_log = []
+            self._pubsub_base_offset = 0
+            self._pubsub_publisher_sequences = {}
+            self._pubsub_publisher_last_seen = {}
+        self._pubsub_draining = False
+
+    def get_pubsub_state(
+        self, *, publisher_ttl: float = 900.0
+    ) -> PubSubState:
+        """Return a serializable snapshot of pub/sub state for continue-as-new.
+
+        Prunes publisher dedup entries older than ``publisher_ttl`` seconds.
+        The TTL must exceed the ``max_retry_duration`` of any client that
+        may still be retrying a failed flush.
+
+        Args:
+            publisher_ttl: Seconds after which a publisher's dedup entry
+                is pruned. Default 900 (15 minutes).
+        """
+        self._check_initialized()
+        now = workflow.time()
+
+        # Determine which publishers to retain. Publishers with timestamps
+        # are pruned by TTL. Publishers without timestamps (legacy state
+        # from before publisher_last_seen was added) are always retained
+        # to avoid silently dropping dedup entries on upgrade.
+        active_sequences: dict[str, int] = {}
+        active_last_seen: dict[str, float] = {}
+        for pid, seq in self._pubsub_publisher_sequences.items():
+            ts = self._pubsub_publisher_last_seen.get(pid)
+            if ts is None or now - ts < publisher_ttl:
+                active_sequences[pid] = seq
+                if ts is not None:
+                    active_last_seen[pid] = ts
+
+        return PubSubState(
+            log=[
+                _WireItem(topic=item.topic, data=encode_data(item.data))
+                for item in self._pubsub_log
+            ],
+            base_offset=self._pubsub_base_offset,
+            publisher_sequences=active_sequences,
+            publisher_last_seen=active_last_seen,
+        )
+
+    def drain_pubsub(self) -> None:
+        """Unblock all waiting poll handlers and reject new polls.
+
+        Call this before ``await workflow.wait_condition(workflow.all_handlers_finished)``
+        and ``workflow.continue_as_new()``.
+        """
+        self._check_initialized()
+        self._pubsub_draining = True
+
+    def truncate_pubsub(self, up_to_offset: int) -> None:
+        """Discard log entries before ``up_to_offset``.
+
+        After truncation, polls requesting an offset before the new
+        base will receive a ValueError. All global offsets remain
+        monotonic.
+
+        Args:
+            up_to_offset: The global offset to truncate up to (exclusive).
+                Entries at offsets ``[base_offset, up_to_offset)`` are
+                discarded.
+        """
+        self._check_initialized()
+        log_index = up_to_offset - self._pubsub_base_offset
+        if log_index <= 0:
+            return
+        if log_index > len(self._pubsub_log):
+            raise ValueError(
+                f"Cannot truncate to offset {up_to_offset}: "
+                f"only {self._pubsub_base_offset + len(self._pubsub_log)} "
+                f"items exist"
+            )
+        self._pubsub_log = self._pubsub_log[log_index:]
+        self._pubsub_base_offset = up_to_offset
+
+    def _check_initialized(self) -> None:
+        if not hasattr(self, "_pubsub_log"):
+            raise RuntimeError(
+                "PubSubMixin not initialized. Call self.init_pubsub() in "
+                "your workflow's __init__ or at the start of run()."
+            )
+
+    def publish(self, topic: str, data: bytes) -> None:
+        """Publish an item from within workflow code. Deterministic — just appends."""
+        self._check_initialized()
+        self._pubsub_log.append(PubSubItem(topic=topic, data=data))
+
+    @workflow.signal(name="__pubsub_publish")
+    def _pubsub_publish(self, input: PublishInput) -> None:
+        """Receive publications from external clients (activities, starters).
+
+        Deduplicates using (publisher_id, sequence). If publisher_id is set
+        and the sequence is <= the last seen sequence for that publisher,
+        the entire batch is dropped as a duplicate. Batches are atomic:
+        the dedup decision applies to the whole batch, not individual items.
+        """
+        self._check_initialized()
+        if input.publisher_id:
+            last_seq = self._pubsub_publisher_sequences.get(
+                input.publisher_id, 0
+            )
+            if input.sequence <= last_seq:
+                return
+            self._pubsub_publisher_sequences[input.publisher_id] = (
+                input.sequence
+            )
+            self._pubsub_publisher_last_seen[input.publisher_id] = (
+                workflow.time()
+            )
+        for entry in input.items:
+            self._pubsub_log.append(
+                PubSubItem(topic=entry.topic, data=decode_data(entry.data))
+            )
+
+    @workflow.update(name="__pubsub_poll")
+    async def _pubsub_poll(self, input: PollInput) -> PollResult:
+        """Long-poll: block until new items available or draining, then return."""
+        self._check_initialized()
+        log_offset = input.from_offset - self._pubsub_base_offset
+        if log_offset < 0:
+            if input.from_offset == 0:
+                # "From the beginning" — start at whatever is available.
+                log_offset = 0
+            else:
+                # Subscriber had a specific position that's been truncated.
+                # ApplicationError fails this update (client gets the error)
+                # without crashing the workflow task — avoids a poison pill
+                # during replay.
+                raise ApplicationError(
+                    f"Requested offset {input.from_offset} has been truncated. "
+                    f"Current base offset is {self._pubsub_base_offset}.",
+                    type="TruncatedOffset",
+                    non_retryable=True,
+                )
+        await workflow.wait_condition(
+            lambda: len(self._pubsub_log) > log_offset
+            or self._pubsub_draining,
+        )
+        all_new = self._pubsub_log[log_offset:]
+        next_offset = self._pubsub_base_offset + len(self._pubsub_log)
+        if input.topics:
+            topic_set = set(input.topics)
+            filtered = [
+                (self._pubsub_base_offset + log_offset + i, item)
+                for i, item in enumerate(all_new)
+                if item.topic in topic_set
+            ]
+        else:
+            filtered = [
+                (self._pubsub_base_offset + log_offset + i, item)
+                for i, item in enumerate(all_new)
+            ]
+        return PollResult(
+            items=[
+                _WireItem(topic=item.topic, data=encode_data(item.data), offset=off)
+                for off, item in filtered
+            ],
+            next_offset=next_offset,
+        )
+
+    @_pubsub_poll.validator
+    def _validate_pubsub_poll(self, input: PollInput) -> None:  # noqa: A002
+        """Reject new polls when draining for continue-as-new."""
+        self._check_initialized()
+        if self._pubsub_draining:
+            raise RuntimeError("Workflow is draining for continue-as-new")
+
+    @workflow.query(name="__pubsub_offset")
+    def _pubsub_offset(self) -> int:
+        """Return the current global offset (base_offset + log length)."""
+        self._check_initialized()
+        return self._pubsub_base_offset + len(self._pubsub_log)
diff --git a/temporalio/contrib/pubsub/_types.py b/temporalio/contrib/pubsub/_types.py
new file mode 100644
index 000000000..69cc5f431
--- /dev/null
+++ b/temporalio/contrib/pubsub/_types.py
@@ -0,0 +1,100 @@
+"""Shared data types for the pub/sub contrib module."""
+
+from __future__ import annotations
+
+import base64
+from dataclasses import dataclass, field
+
+
+def encode_data(data: bytes) -> str:
+    """Encode bytes to base64 string for wire format."""
+    return base64.b64encode(data).decode("ascii")
+
+
+def decode_data(data: str) -> bytes:
+    """Decode base64 string from wire format to bytes."""
+    return base64.b64decode(data)
+
+
+@dataclass
+class PubSubItem:
+    """A single item in the pub/sub log.
+
+    The ``offset`` field is populated at poll time from the item's position
+    in the global log. It defaults to 0 ("unknown") for backward compatibility.
+    See DESIGN-ADDENDUM-ITEM-OFFSET.md.
+    """
+
+    topic: str
+    data: bytes
+    offset: int = 0
+
+
+@dataclass
+class PublishEntry:
+    """A single entry to publish via signal (wire type).
+
+    The ``data`` field is a base64-encoded string for cross-language
+    compatibility over Temporal's JSON payload converter.
+    """
+
+    topic: str
+    data: str  # base64-encoded bytes
+
+
+@dataclass
+class PublishInput:
+    """Signal payload: batch of entries to publish.
+
+    Includes publisher_id and sequence for exactly-once deduplication.
+    See DESIGN-ADDENDUM-DEDUP.md.
+    """
+
+    items: list[PublishEntry] = field(default_factory=list)
+    publisher_id: str = ""
+    sequence: int = 0
+
+
+@dataclass
+class PollInput:
+    """Update payload: request to poll for new items."""
+
+    topics: list[str] = field(default_factory=list)
+    from_offset: int = 0
+
+
+@dataclass
+class _WireItem:
+    """Wire representation of a PubSubItem (base64 data)."""
+
+    topic: str
+    data: str  # base64-encoded bytes
+    offset: int = 0
+
+
+@dataclass
+class PollResult:
+    """Update response: items matching the poll request.
+
+    Items use base64-encoded data for cross-language wire compatibility.
+    """
+
+    items: list[_WireItem] = field(default_factory=list)
+    next_offset: int = 0
+
+
+@dataclass
+class PubSubState:
+    """Serializable snapshot of pub/sub state for continue-as-new.
+
+    The containing workflow input must type the field as
+    ``PubSubState | None``, not ``Any``, so that the default data converter
+    can reconstruct the dataclass from JSON.
+
+    The log items use base64-encoded data for serialization stability.
+    """
+
+    log: list[_WireItem] = field(default_factory=list)
+    base_offset: int = 0
+    publisher_sequences: dict[str, int] = field(default_factory=dict)
+    publisher_last_seen: dict[str, float] = field(default_factory=dict)
diff --git a/temporalio/contrib/pubsub/docs/end-to-end-dedup-analysis.md b/temporalio/contrib/pubsub/docs/end-to-end-dedup-analysis.md
new file mode 100644
index 000000000..a6de76028
--- /dev/null
+++ b/temporalio/contrib/pubsub/docs/end-to-end-dedup-analysis.md
@@ -0,0 +1,190 @@
+# Analysis: End-to-End Principle Applied to Deduplication
+
+Should pub/sub dedup live in the workflow (middle layer), or should
+consumers handle it at the edges? This analysis applies the end-to-end
+argument to the different types of duplicates in the system.
+
+## The End-to-End Argument
+
+Saltzer, Reed, and Clark (1984): a function can be correctly and
+completely implemented only with the knowledge and help of the
+application standing at the endpoints. Putting it in the middle layer
+may improve performance but cannot guarantee correctness — the endpoints
+must still handle the failure cases themselves.
+
+Applied here: if the consumer must handle duplicates anyway (because some
+duplicates originate above or below the transport layer), then dedup in
+the pub/sub workflow is redundant complexity.
+
+## The Pipeline
+
+```
+LLM API  -->  Activity  -->  PubSubClient  -->  Workflow Log  -->  BFF/SSE  -->  Browser
+  (1)           (2)              (3)              (4)              (5)           (6)
+```
+
+Duplicates can arise at stages 1, 3, and 5. Each has different
+characteristics.
+
+## Types of Duplicates
+
+### Type A: Duplicate LLM Responses (Stage 1)
+
+**Cause**: Activity retries. If an activity calling an LLM times out but
+the LLM actually completed, the retry produces a second, semantically
+equivalent but textually different response.
+
+**Nature**: The two responses have *different content*. They are not
+byte-identical duplicates — they are duplicate *requests* that produce
+duplicate *work*.
+
+**Why this doesn't belong in pub/sub**: Not because pub/sub can't detect
+it — in principle, you could fingerprint content or track LLM request
+IDs in the workflow. The real reason is that **data escapes to the
+application before you know whether dedup will be needed.** The activity
+streams the first LLM response through the pub/sub log as tokens arrive.
+The subscriber consumes them. The BFF forwards them to the browser. The
+user sees them rendered. All of this happens during the first LLM call,
+before any retry occurs.
+
+By the time the activity fails and retries, the first response's tokens
+are already consumed, rendered, and acted upon. The duplicate LLM
+response hasn't been produced yet — it doesn't exist until the retry
+completes. So there is no point during the first call where the pub/sub
+layer could suppress it, because at that point there is nothing to
+suppress.
+
+When the retry does produce a second response, the application must
+decide what to do: discard it, replace the first, merge them, show both.
+That decision depends on application semantics that the pub/sub layer
+has no knowledge of. The correct place for this dedup is the activity
+(don't retry completed LLM calls), the orchestrating workflow (use
+activity idempotency keys), or the application's own recovery logic.
+
+**End-to-end verdict**: Type A dedup belongs at the application layer,
+not because pub/sub lacks the capability, but because the data has
+already escaped before the duplicate exists.
+
+### Type B: Duplicate Signal Batches (Stage 3)
+
+**Cause**: `PubSubClient._flush()` sends a signal. The server accepts it
+but the client sees a network error. The client retries, sending the
+same batch again. The workflow receives both signals.
+
+**Nature**: Byte-identical duplicate batches with the same
+`(publisher_id, sequence)`.
+
+**Why this belongs in pub/sub**: Two reasons.
+
+First, **encapsulation**: the fact that publishing goes through batched
+signals is an implementation detail of the pub/sub transport. The
+consumer shouldn't need to know about `(publisher_id, sequence)`, batch
+boundaries, or signal retry semantics. Leaking batch-level dedup to the
+consumer would couple it to the transport mechanism. If we later switch
+to updates, change the batching strategy, or introduce a different
+transport, the consumer's dedup logic would break.
+
+Second, **the consumer cannot do it correctly**. The subscriber sees
+`PubSubItem(topic, data)` — items have no unique ID. If the workflow
+accepts a duplicate batch, it assigns *new* offsets to the duplicate
+items, making them indistinguishable from originals. Content-based dedup
+has false positives (an LLM legitimately produces the same token twice;
+a status event like `{"type":"THINKING_START"}` is repeated across
+turns). The consumer would need to implement a fragile, heuristic dedup
+that still misses edge cases.
+
+The pub/sub layer, by contrast, can detect these duplicates cheaply and
+precisely: `sequence <= last_seen` is a single integer comparison per
+batch. The sequence number is generated and validated within the same
+control boundary (publisher client + workflow handler). This is not a
+"middle layer redundantly implementing endpoint functionality" — it is
+the only layer with sufficient context to do it correctly.
+
+**End-to-end verdict**: Type B dedup is properly placed in the workflow.
+It preserves transport encapsulation and is the only correct
+implementation.
+
+### Type C: Duplicate SSE Delivery (Stage 5)
+
+**Cause**: Browser reconnection. The SSE connection drops, the browser
+reconnects with `Last-Event-ID`, and the BFF replays from that offset.
+If the BFF replays too far back, the browser sees duplicate events.
+
+**Nature**: Exact replay of previously-delivered events.
+
+**Where dedup must live**: The **BFF** (stage 5) and/or the **browser**
+(stage 6). The BFF must track SSE event IDs and resume from the correct
+point. The browser/frontend reducer should be idempotent — applying the
+same event twice should not corrupt state (e.g., append a text delta
+twice).
+
+**End-to-end verdict**: Pub/sub dedup is irrelevant for Type C. This
+duplicate exists below the pub/sub layer, in the SSE transport.
+
+## Summary Table
+
+| Type | Cause | Why not in pub/sub? | Where dedup belongs |
+|---|---|---|---|
+| A: Duplicate LLM work | Activity retry | Data escapes before duplicate exists | Activity / workflow orchestration |
+| B: Duplicate batches | Signal retry | *Does* belong in pub/sub | Workflow (pub/sub layer) |
+| C: Duplicate SSE events | Browser reconnect | Below the pub/sub layer | BFF / browser |
+
+## Proper Layering
+
+Each layer handles the duplicates it introduces:
+
+```
+┌─────────────────────────────────────────────────────────┐
+│  Application layer (activity / workflow orchestration)   │
+│  Handles: Type A — duplicate LLM work                   │
+│  Mechanism: activity idempotency keys, don't retry      │
+│  completed LLM calls, application recovery logic        │
+├─────────────────────────────────────────────────────────┤
+│  Transport layer (pub/sub workflow)                      │
+│  Handles: Type B — duplicate signal batches              │
+│  Mechanism: (publisher_id, sequence) dedup               │
+│  Encapsulates: batching, signals, retry semantics        │
+├─────────────────────────────────────────────────────────┤
+│  Delivery layer (BFF / SSE / browser)                    │
+│  Handles: Type C — duplicate SSE events                  │
+│  Mechanism: Last-Event-ID, idempotent reducers           │
+└─────────────────────────────────────────────────────────┘
+```
+
+Each layer is self-contained. The application doesn't know about signal
+batches. The pub/sub layer doesn't know about LLM semantics. The SSE
+layer doesn't know about either. Duplicates are resolved at the layer
+that introduces them, with the context needed to resolve them correctly.
+
+## Does the Consumer Need Type B Dedup Anyway?
+
+The end-to-end argument would apply if consumers needed Type B dedup
+regardless of what the workflow does. They don't:
+
+1. **Consumers cannot detect Type B duplicates.** Items have no unique
+   ID. Offsets are assigned by the workflow — if it accepts a duplicate
+   batch, the duplicates get fresh offsets and are indistinguishable.
+
+2. **Consumers already handle Type C independently.** SSE reconnection
+   and idempotent reducers are standard patterns that exist regardless
+   of what the pub/sub layer does.
+
+3. **Type A is handled above.** The activity/workflow prevents duplicate
+   work from being published in the first place.
+
+The consumer does *not* need Type B dedup. The layers are clean.
+
+## Conclusion
+
+The `(publisher_id, sequence)` dedup protocol is correctly placed in the
+pub/sub workflow. It handles the one type of duplicate that originates
+within the transport layer, using context that only the transport layer
+has, without leaking transport implementation details to the consumer.
+
+What the pub/sub layer should *not* attempt:
+- Type A dedup (duplicate LLM work) — data has already escaped to the
+  application before the duplicate exists; resolution requires
+  application semantics
+- Type C dedup (SSE reconnection) — below the pub/sub layer
+- General-purpose content dedup — false positive risk, wrong abstraction
+  level
diff --git a/temporalio/contrib/pubsub/docs/signal-vs-update-dedup-analysis.md b/temporalio/contrib/pubsub/docs/signal-vs-update-dedup-analysis.md
new file mode 100644
index 000000000..de17e0eb3
--- /dev/null
+++ b/temporalio/contrib/pubsub/docs/signal-vs-update-dedup-analysis.md
@@ -0,0 +1,198 @@
+# Analysis: Signal vs Update for Publishing — Deduplication Tradeoffs
+
+Should pub/sub publishing use signals (current) or updates? This analysis
+examines what Temporal provides natively for deduplication and whether
+application-level dedup can be eliminated.
+
+## What Temporal Provides
+
+### Signals
+
+- **Delivery guarantee**: at-least-once.
+- **Request-level dedup**: the gRPC layer attaches a random `request_id` to
+  each RPC. If the SDK's internal retry resends the *same* RPC (e.g., due to
+  a transient gRPC error), the server deduplicates it. This is transparent
+  and not controllable by the application.
+- **No application-level dedup key**: there is no way to attach an
+  idempotency key to a signal. If the client makes a *new* signal call with
+  the same logical content (a retry after a timeout where the outcome is
+  unknown), Temporal treats it as a distinct signal and delivers it.
+- **Official guidance**: "For Signals, you should use a custom idempotency
+  key that you send as part of your own signal inputs, implementing the
+  deduplication in your Workflow code."
+  ([docs](https://docs.temporal.io/handling-messages#exactly-once-message-processing))
+
+### Updates
+
+- **Delivery guarantee**: exactly-once *per workflow run*, via Update ID.
+- **Update ID**: defaults to a random UUID but can be set by the caller. The
+  server deduplicates accepted updates by Update ID within a single workflow
+  execution.
+- **Cross-CAN boundary**: Update ID dedup state does *not* persist across
+  continue-as-new. A retry that lands on a new run is treated as a new
+  update.
+- **Known bug (temporal/temporal#6375)**: `CompleteUpdate` is sometimes not
+  honored when in the same WFT completion as CAN. The frontend retries and
+  the update can be delivered to the post-CAN run as a distinct update.
+  This makes cross-CAN dedup unreliable even for updates.
+- **Official guidance**: "If you are using Updates with Continue-As-New you
+  should implement the deduplication in your Workflow code, since Update ID
+  deduplication by the server is per Workflow run."
+
+### Summary
+
+| | Signals (current) | Updates |
+|---|---|---|
+| Per-run dedup | None (app must provide) | Built-in via Update ID |
+| Cross-CAN dedup | None (app must provide) | None (app must provide) |
+| App-level dedup needed? | **Yes** | **Yes** (for CAN workflows) |
+
+Since pub/sub workflows use continue-as-new, **application-level dedup is
+required regardless of whether we use signals or updates for publishing.**
+
+**Pragmatic view**: The cross-CAN update dedup gap (temporal/temporal#6375)
+is a known issue that Temporal will likely fix. If we used updates for
+publishing and accepted this edge case as a temporary platform limitation,
+we could eventually drop application-level dedup entirely once the fix
+ships. With signals, application-level dedup is a permanent requirement —
+there are no plans to add signal idempotency keys to the platform.
+
+## Tradeoffs Beyond Dedup
+
+### Latency and blocking
+
+| | Signals | Updates |
+|---|---|---|
+| Client blocks? | No — fire-and-forget | Yes — until workflow processes it |
+| Flush latency | ~0 (signal enqueued at server) | Round-trip to worker + processing |
+| Caller impact | `publish()` never blocks | Flush blocks for ~10-50ms |
+
+With signals, the flush is non-blocking. The client can immediately continue
+buffering new items. With updates, the flush would block until the workflow
+worker processes the batch and returns a result.
+
+For high-throughput publishing from activities (e.g., streaming LLM tokens),
+the non-blocking property matters. The activity can buffer tokens at whatever
+rate they arrive without being throttled by the workflow's processing speed.
+
+### Backpressure
+
+| | Signals | Updates |
+|---|---|---|
+| Natural backpressure | No | Yes |
+| Overflow risk | Workflow history grows unbounded | Client slows to workflow speed |
+
+Updates provide natural backpressure: a fast publisher automatically slows
+down because each flush blocks. With signals, a fast publisher can
+overwhelm the workflow's event history (each signal adds events). The
+current mitigation is batching (amortizes signal count) and relying on the
+workflow to CAN before history gets too large.
+
+### Batching
+
+Batching works identically with either approach. The client-side buffer/swap/
+flush logic is unchanged — only the flush transport differs:
+
+```python
+# Signal (current)
+await self._handle.signal("__pubsub_publish", PublishInput(...))
+
+# Update (alternative)
+await self._handle.execute_update("__pubsub_publish", PublishInput(...))
+```
+
+My earlier claim that batching would be "awkward" with updates was wrong.
+
+### Return value
+
+Updates can return a result. A publish-via-update could return the assigned
+offsets, confirmation of delivery, or the current log length. With signals,
+the client has no way to learn the outcome without a separate query.
+
+### Event history cost
+
+Each signal adds `WorkflowSignalReceived` to history (1 event). Each update
+adds `WorkflowExecutionUpdateAccepted` + `WorkflowExecutionUpdateCompleted`
+(2 events). Updates consume history faster, bringing CAN sooner.
+
+### Concurrency limits
+
+Temporal Cloud has [per-workflow update limits](https://docs.temporal.io/cloud/limits#per-workflow-execution-update-limits).
+Signals have no equivalent limit. For very high-throughput scenarios, signals
+may be the only option.
+
+## Recommendation
+
+**Keep signals for publishing.** The non-blocking property is the decisive
+factor for the streaming use case. The application-level dedup
+(`publisher_id` + `sequence`) is a permanent requirement for signals and
+is already implemented with TLA+ verification.
+
+**Alternative worth revisiting**: If the non-blocking property were less
+important (e.g., lower-throughput use case), updates would be attractive.
+Once temporal/temporal#6375 is fixed, update-based publishing with CAN
+would get platform-native exactly-once with no application dedup needed.
+The tradeoff is blocking flush + 2x history events per batch.
+
+For the current streaming use case, signals remain the right choice.
+
+**Keep updates for polling.** The `__pubsub_poll` update is the correct
+choice for subscription: the caller needs a result (the items), and blocking
+is the desired behavior (long-poll semantics).
+
+## What Would Change If We Switched
+
+For completeness, here's what a switch to update-based publishing would
+require:
+
+1. Replace signal handler `__pubsub_publish` with an update handler
+2. The publish handler becomes synchronous (just appends to log) — fast
+3. Client flush changes from `handle.signal(...)` to
+   `handle.execute_update(...)`
+4. Background flusher blocks on the update call instead of fire-and-forget
+5. Application-level dedup stays (CAN requirement)
+6. Update validator could reject publishes during drain (already done for
+   polls)
+7. Return type could include assigned offsets
+
+The dedup protocol, TLA+ specs, and mixin-side handler logic would be
+essentially unchanged. The change is mechanical, not architectural.
+
+## Signal Ordering Guarantee
+
+Temporal guarantees that signals from a single client, sent sequentially
+(each signal call completes before the next is sent), are delivered in order:
+
+> "Signals are delivered in the order they are received by the Cluster and
+> written to History."
+> ([docs](https://docs.temporal.io/workflows#signal))
+
+The guarantee breaks down only for *concurrent* signals — if two signal RPCs
+are in flight simultaneously, their order in history is nondeterministic.
+
+The pub/sub client's `_flush_lock` ensures signals are never sent
+concurrently from a single `PubSubClient` instance. The sequence is:
+
+1. Acquire lock
+2. `await handle.signal(...)` — blocks until server writes to history
+3. Release lock
+
+This means batches from a single publisher are ordered in the workflow log.
+Combined with the workflow's single-threaded signal processing (the
+`_pubsub_publish` handler is synchronous — no `await`), items within and
+across batches preserve their publish order.
+
+**Cross-publisher ordering** is nondeterministic. If publisher A and
+publisher B send signals concurrently, the interleaving in history depends
+on arrival order at the server. Within each publisher's stream, ordering is
+preserved. This matches the `OrderPreservedPerPublisher` invariant verified
+in `PubSubDedupTTL.tla`.
+
+## Sources
+
+- [Temporal docs: Message handler patterns — exactly-once processing](https://docs.temporal.io/handling-messages#exactly-once-message-processing)
+- [Temporal docs: Signals vs Updates decision table](https://docs.temporal.io/encyclopedia/workflow-message-passing)
+- [temporal/temporal#6375: CompleteUpdate not honored during CAN](https://github.com/temporalio/temporal/issues/6375)
+- [Community: Deduping workflow signals](https://community.temporal.io/t/deduping-workflow-signals/5547)
+- [Community: Idempotent signals investigation](https://community.temporal.io/t/preliminary-investigation-into-idempotent-signals/13694)
+- [Slack: request_id is for client call dedup, not application dedup](https://temporalio.slack.com/archives/C012SHMPDDZ/p1729554260821239)
diff --git a/temporalio/contrib/pubsub/verification/PROOF.md b/temporalio/contrib/pubsub/verification/PROOF.md
new file mode 100644
index 000000000..9562822ed
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PROOF.md
@@ -0,0 +1,322 @@
+# Proof of Exactly-Once Delivery
+
+Formal verification that the pub/sub dedup protocol guarantees no duplicates
+and no data loss, for any number of published items.
+
+## Protocol
+
+A client flushes batches of items to a workflow via Temporal signals:
+
+1. **Buffer swap**: `pending = buffer; buffer = []`
+2. **Assign sequence**: `pending_seq = confirmed_seq + 1`
+3. **Send signal** with `(publisher_id, pending_seq, pending)`
+4. **On success**: `confirmed_seq = pending_seq; pending = None`
+5. **On failure**: keep `pending` and `pending_seq` for retry
+
+The workflow deduplicates: reject if `sequence <= last_seen_seq[publisher_id]`.
+
+The network is non-deterministic: a signal may be delivered to the workflow
+but the client may see a failure (e.g., network timeout on the response).
+
+## Properties
+
+- **NoDuplicates** (safety): each item appears at most once in the workflow log.
+- **OrderPreserved** (safety): items appear in the log in the order they were
+  published. This is stronger than within-batch ordering — it covers
+  cross-batch ordering too.
+- **AllItemsDelivered** (liveness): under fairness, every published item
+  eventually reaches the log. Note: the TLA+ spec models a protocol without
+  `max_retry_duration`. The implementation intentionally sacrifices this
+  liveness property by dropping pending batches after a timeout to bound
+  resource usage. This is a design choice — when a batch is dropped, items
+  may be lost if the signal was not delivered.
+
+## Bounded Model Checking
+
+`PubSubDedup.tla` models the protocol with TLC model checking:
+
+| MaxItems | States Generated | Distinct States | Depth | Result |
+|----------|-----------------|-----------------|-------|--------|
+| 4 | 320 | 175 | 19 | Pass |
+| 6 | 1,202 | 609 | 27 | Pass |
+
+NoDuplicates, OrderPreserved (invariants) and AllItemsDelivered (liveness
+under weak fairness) all pass.
+
+## Inductive Invariant (Unbounded Argument)
+
+Bounded model checking proves correctness for specific MaxItems values.
+To extend to all N, we define a strengthened invariant `IndInv` in
+`PubSubDedupInductive.tla` and verify that it holds for all reachable
+states under the standard specification.
+
+Note: TLC checks `IndInv` as a reachable-state invariant of `Spec`
+(i.e., `Init => IndInv` and preservation along all reachable behaviors),
+not as a true inductive invariant from arbitrary `IndInv` states.
+The per-action proof sketch below argues inductiveness informally.
+Since the invariant's clauses are structural relationships independent
+of N, verification at MaxItems=6 gives high confidence in the general
+case.
+
+### Definition
+
+`IndInv` has 13 clauses organized into 5 groups:
+
+**Uniqueness (C1-C3):** Items are unique within each container.
+- C1: `Unique(wf_log)` — no duplicates in the log
+- C2: `Unique(buffer)` — no duplicates in the buffer
+- C3: `Unique(pending)` — no duplicates in the pending batch
+
+**Disjointness (C4-C5):** Buffer items are always fresh.
+- C4: `Disjoint(buffer, pending)`
+- C5: `Disjoint(buffer, wf_log)`
+
+**Dedup relationship (C6-C7):** The critical property linking pending to the log.
+- C6: If `pending_seq > wf_last_seq` (not yet delivered), then `Disjoint(pending, wf_log)`
+- C7: If `pending_seq <= wf_last_seq` (already delivered), then `IsSubseq(pending, wf_log)`
+
+**Sequence consistency (C8-C11):** Sequence numbers track delivery correctly.
+- C8: `confirmed_seq <= wf_last_seq`
+- C9: `pending = <<>> => confirmed_seq = wf_last_seq`
+- C10: `pending = <<>> <=> pending_seq = 0`
+- C11: `pending /= <<>> => pending_seq = confirmed_seq + 1`
+
+**Bounds (C12-C13):** All item IDs are in `1..item_counter`.
+
+### IndInv implies NoDuplicates
+
+Trivially: NoDuplicates is clause C1.
+
+### Init implies IndInv
+
+All containers are empty, all counters are 0. Every clause is vacuously true
+or directly satisfied.
+
+### IndInv is preserved by every action
+
+**Publish:** Adds `item_counter + 1` to buffer. This ID is fresh — not in
+any container (by C12, all existing IDs are in `1..item_counter`). Uniqueness
+and disjointness are preserved. `item_counter` increments, so C12 holds for
+the new ID.
+
+**StartFlush (retry):** No changes to buffer, pending, or wf_log. Only
+`flushing` and `delivered` change. All structural properties preserved.
+
+**StartFlush (new):** Requires `pending = <<>>`. By C9, `confirmed_seq = wf_last_seq`.
+So `pending_seq' = confirmed_seq + 1 = wf_last_seq + 1 > wf_last_seq`.
+Buffer moves to pending: C2 (buffer unique) transfers to C3 (pending unique).
+C5 (buffer disjoint from log) transfers to C6 (pending disjoint from log,
+since `pending_seq' > wf_last_seq`). New buffer is `<<>>`, satisfying C4-C5
+vacuously.
+
+**Deliver (accepted, `pending_seq > wf_last_seq`):** Appends pending to wf_log.
+By C6, pending is disjoint from wf_log. Combined with C1 (log unique) and
+C3 (pending unique), the extended log has no duplicates → C1 preserved.
+Sets `wf_last_seq' = pending_seq`, so now `pending_seq <= wf_last_seq'`.
+Pending items are in the new log → C7 satisfied. C5 preserved: buffer was
+disjoint from both pending and old log, so disjoint from new log.
+
+**Deliver (rejected, `pending_seq <= wf_last_seq`):** wf_log unchanged.
+Sets `delivered = TRUE`. All properties trivially preserved.
+
+**FlushSuccess:** Requires `delivered = TRUE` (so Deliver has fired). Sets
+`confirmed_seq' = pending_seq`, `pending' = <<>>`. By C11,
+`pending_seq = confirmed_seq + 1`. The Deliver action that set
+`delivered = TRUE` either accepted (setting `wf_last_seq = pending_seq`)
+or rejected (leaving `wf_last_seq` unchanged, which means
+`pending_seq <= wf_last_seq` was already true — but since
+`pending_seq = confirmed_seq + 1` and `confirmed_seq <= wf_last_seq` (C8),
+we need `wf_last_seq >= confirmed_seq + 1 = pending_seq`). In both cases,
+`wf_last_seq >= pending_seq` after Deliver. FlushSuccess requires
+`delivered = TRUE`, meaning Deliver fired. If Deliver accepted,
+`wf_last_seq = pending_seq`. If Deliver rejected, `pending_seq <= wf_last_seq`
+was already true. So `confirmed_seq' = pending_seq <= wf_last_seq`, and
+since `confirmed_seq <= wf_last_seq` is C8 (not strict equality), C8 is
+preserved. C9 requires `pending = <<>> => confirmed_seq = wf_last_seq`.
+After FlushSuccess, `pending' = <<>>` and `confirmed_seq' = pending_seq`.
+If Deliver accepted: `wf_last_seq = pending_seq = confirmed_seq'` → C9 holds.
+If Deliver rejected: `pending_seq <= wf_last_seq`, so `confirmed_seq' <= wf_last_seq`.
+But can `confirmed_seq' < wf_last_seq`? Only if another delivery advanced
+`wf_last_seq` past `pending_seq` — but there is only one publisher, so no.
+In the single-publisher model, `wf_last_seq` is only set by Deliver for
+this publisher's `pending_seq`, so after acceptance `wf_last_seq = pending_seq`.
+If rejected, `wf_last_seq` was already `>= pending_seq`, but since only
+this publisher writes to `wf_last_seq`, and the last accepted sequence was
+`confirmed_seq` (by C9 before StartFlush), and `pending_seq = confirmed_seq + 1`,
+we have `wf_last_seq >= confirmed_seq + 1 = pending_seq`. If Deliver rejected,
+it means `wf_last_seq >= pending_seq` already, but the only way `wf_last_seq`
+could exceed `confirmed_seq` is from a previous delivered-but-not-confirmed
+flush — which is exactly `pending_seq`. So `wf_last_seq = pending_seq`,
+and C9 holds. Clearing pending makes C3, C4, C6, C7 vacuously true.
+
+**FlushFail:** Sets `flushing' = FALSE`. No changes to buffer, pending,
+wf_log, or sequences. All properties preserved.
+
+### Why this generalizes beyond MaxItems
+
+The 13 clauses of IndInv are structural relationships between containers
+(uniqueness, disjointness, subset, sequence ordering). None depends on the
+value of MaxItems or the total number of items published. The per-action
+preservation arguments above use only these structural properties, not any
+bound on N.
+
+TLC verifies IndInv for all 609 reachable states at MaxItems=6. The
+proof sketch above argues inductiveness informally — since the clauses
+are structural relationships independent of N, this gives high
+confidence in the general case.
+
+## Order Preservation
+
+`OrderPreserved` states that items appear in the log in ascending order of
+their IDs. This is verified as an invariant alongside NoDuplicates.
+
+The property follows from the protocol structure:
+
+1. `Publish` assigns monotonically increasing IDs (`item_counter + 1`)
+2. `StartFlush` moves the entire buffer to pending, preserving order
+3. `Deliver` appends the entire pending sequence to the log, preserving order
+4. Retries re-send the same pending with the same order; dedup ensures only
+   one copy appears in the log
+5. The flush lock serializes batches, so all items in batch N have lower IDs
+   than all items in batch N+1
+
+For multi-publisher scenarios (`PubSubDedupTTL.tla`), ordering is preserved
+**per publisher** but not globally across publishers, since concurrent
+publishers interleave non-deterministically. The `OrderPreservedPerPublisher`
+invariant verifies this.
+
+## TTL-Based Pruning of Dedup Entries
+
+### Problem
+
+`publisher_sequences` grows with each distinct publisher. During
+continue-as-new, stale entries (from publishers that are no longer active)
+waste space. TTL-based pruning removes entries that haven't been updated
+within a time window.
+
+### Safety Constraint
+
+`PubSubDedupTTL.tla` models two publishers with a `Prune` action that
+resets a publisher's `wf_last` to 0 (forgetting its dedup history).
+
+**Unsafe pruning** (prune any publisher at any time) violates NoDuplicates.
+TLC finds the counterexample in 9 states:
+
+```
+1. Publisher A sends batch [1,3] with seq=1
+2. Delivered to workflow (log=[1,3], wf_last[A]=1)
+3. Client sees failure, keeps pending for retry
+4. Retry starts (same pending, same seq=1)
+5. PruneUnsafe: wf_last[A] reset to 0 (TTL expired!)
+6. Deliver: seq=1 > 0 → accepted → log=[1,3,1,3] — DUPLICATE
+```
+
+The root cause: the publisher still has an in-flight retry, but the workflow
+has forgotten its dedup entry.
+
+**Safe pruning** (prune only when the publisher has no pending batch and is
+not flushing) preserves NoDuplicates. TLC verifies this across 7,635 states
+with 2 publishers and MaxItemsPerPub=2.
+
+### Implementation Constraint
+
+The TLA+ safety condition `pend[p] = <<>> /\ ~flush_active[p]` translates
+to a real-world constraint: **TTL must exceed the maximum time a publisher
+might retry a failed flush.** In practice:
+
+- `PubSubClient` instances are ephemeral (activity-scoped or request-scoped)
+- When the activity completes, the client is gone — no more retries
+- A 15-minute TTL exceeds any reasonable activity execution time
+- During CAN, `get_pubsub_state()` prunes entries older than TTL
+- The workflow should wait for activities to complete before triggering CAN
+
+### Multi-Publisher Protocol
+
+The base multi-publisher protocol (without pruning) also passes all
+properties: NoDuplicates, OrderPreservedPerPublisher, and AllItemsDelivered.
+5,143 states explored with 2 publishers and MaxItemsPerPub=2.
+
+## Scope and Limitations
+
+The TLA+ specs model the core dedup protocol. The following implementation
+paths are not modeled:
+
+- **`max_retry_duration` timeout**: The implementation drops pending batches
+  after a timeout. This sacrifices `AllItemsDelivered` (liveness) for bounded
+  resource usage. `NoDuplicates` (safety) is not affected — dropping a batch
+  cannot create duplicates.
+
+- **Late delivery after client failure**: The model only allows `Deliver`
+  while `flushing = TRUE`. In practice, a signal could be delivered after the
+  client observes failure and stops flushing. This cannot cause duplicates:
+  if the signal is delivered between FlushFail and the next retry StartFlush,
+  `wf_last_seq` advances to `pending_seq`. When the retry fires, Deliver
+  sees `pending_seq <= wf_last_seq` and rejects (dedup). If the signal was
+  already delivered before FlushFail, the retry is also rejected.
+
+- **Legacy `publisher_id = ""` (dedup bypass)**: When `publisher_id` is empty,
+  the workflow skips dedup entirely. This path is not modeled — it's
+  intentionally at-least-once for backward compatibility.
+
+- **Workflow-internal `publish()`**: Deterministic, no signal involved, no
+  dedup needed. Not modeled because there's no concurrency to verify.
+
+- **TTL pruning is assumption-dependent**: `PruneSafe` in the TLA+ spec
+  requires `pend[p] = <<>> /\ ~flush_active[p]`. The implementation
+  approximates this via timestamps (`publisher_ttl > max_retry_duration`).
+  Safety depends on the user aligning these two settings.
+
+- **Publisher ID uniqueness**: The TLA+ model uses fixed publisher identities
+  (`{"A", "B"}`). The implementation uses random 64-bit UUIDs
+  (`uuid.uuid4().hex[:16]`). If two client instances received the same
+  publisher ID and the first's dedup entry was pruned, the second could
+  have its sequence 1 accepted even though the first's sequence 1 was
+  already delivered. Collision probability is ~2^-64, making this
+  practically impossible, but the safety argument implicitly relies on
+  publisher ID uniqueness across the TTL window.
+
+## Counterexample: Broken Algorithm
+
+`PubSubDedupBroken.tla` models the old algorithm where on failure the client:
+- Restores items to the main buffer
+- Advances the sequence number
+
+TLC finds a NoDuplicates violation in 10 states:
+
+```
+State 1:  Initial (empty)
+State 2:  Publish item 1
+State 3:  StartFlush: in_flight=[1], seq=1, buffer=[]
+State 4-6: Publish items 2,3,4 (arrive during flush)
+State 7:  Deliver: wf_log=[1], wf_last_seq=1 (signal delivered)
+State 8:  FlushFail: buffer=[1,2,3,4], confirmed_seq=1 (BUG: item 1 restored)
+State 9:  StartFlush: in_flight=[1,2,3,4], seq=2
+State 10: Deliver: wf_log=[1,1,2,3,4] — DUPLICATE!
+```
+
+The root cause: item 1 was delivered (in the log) but also restored to the
+buffer under a new sequence number, bypassing the workflow's dedup check.
+
+The correct algorithm prevents this by keeping the failed batch **separate**
+(`pending`) and retrying with the **same** sequence number. If the signal was
+already delivered, the retry is deduplicated (same sequence). If it wasn't,
+the retry delivers it.
+
+## Correspondence to Implementation
+
+| TLA+ Variable | Python Implementation |
+|---|---|
+| `buffer` | `PubSubClient._buffer` |
+| `pending` | `PubSubClient._pending` |
+| `pending_seq` | `PubSubClient._pending_seq` |
+| `confirmed_seq` | `PubSubClient._sequence` |
+| `wf_last_seq` | `PubSubMixin._pubsub_publisher_sequences[publisher_id]` |
+
+| TLA+ Action | Python Code |
+|---|---|
+| `Publish` | `PubSubClient.publish()` appends to `_buffer` |
+| `StartFlush` (retry) | `_flush()` detects `_pending is not None` |
+| `StartFlush` (new) | `_flush()` swaps: `batch = _buffer; _buffer = []` |
+| `Deliver` | Temporal signal delivery + `_pubsub_publish` handler |
+| `FlushSuccess` | Signal call returns without exception |
+| `FlushFail` | Signal call raises; `_pending` retained for retry |
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedup.cfg b/temporalio/contrib/pubsub/verification/PubSubDedup.cfg
new file mode 100644
index 000000000..859346ed3
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedup.cfg
@@ -0,0 +1,14 @@
+SPECIFICATION FairSpec
+
+CONSTANTS
+    MaxItems = 4
+
+INVARIANTS
+    NoDuplicates
+    OrderPreserved
+
+PROPERTIES
+    AllItemsDelivered
+
+CHECK_DEADLOCK
+    FALSE
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedup.tla b/temporalio/contrib/pubsub/verification/PubSubDedup.tla
new file mode 100644
index 000000000..ba939f4e6
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedup.tla
@@ -0,0 +1,205 @@
+--------------------------- MODULE PubSubDedup ----------------------------
+(*
+ * Formal verification of the pub/sub exactly-once delivery protocol.
+ *
+ * Models a single publisher flushing batches to a workflow via Temporal
+ * signals, with non-deterministic network behavior (signals may be
+ * delivered but the client sees a failure).
+ *
+ * The protocol:
+ *   - Client swaps buffer → pending batch, assigns sequence = confirmed + 1
+ *   - Client sends signal with (publisher_id, sequence, batch)
+ *   - On confirmed success: advance confirmed_seq, clear pending
+ *   - On failure: keep pending batch + sequence for retry (DO NOT advance)
+ *   - Workflow deduplicates: reject if sequence <= last_seen_seq
+ *
+ * Verified properties:
+ *   - NoDuplicates: each item appears at most once in the workflow log
+ *   - NoDataLoss:   every published item eventually reaches the log
+ *   - OrderPreserved: items within a batch maintain their relative order
+ *)
+EXTENDS Integers, Sequences, FiniteSets
+
+CONSTANTS
+    MaxItems     \* Upper bound on items published (for finite model checking)
+
+VARIABLES
+    (* === Client state === *)
+    buffer,          \* Seq of item IDs waiting to be flushed
+    pending,         \* Seq of item IDs in the current pending batch (<<>> if none)
+    pending_seq,     \* Sequence number assigned to the pending batch
+    confirmed_seq,   \* Last sequence number confirmed delivered
+    flushing,        \* TRUE when a signal send is in-flight
+
+    (* === Network state === *)
+    delivered,       \* TRUE if the current in-flight signal reached the workflow
+
+    (* === Workflow state === *)
+    wf_log,          \* Append-only log of item IDs
+    wf_last_seq,     \* Highest accepted sequence for this publisher
+
+    (* === Bookkeeping === *)
+    item_counter     \* Monotonic counter for generating unique item IDs
+
+vars == <<buffer, pending, pending_seq, confirmed_seq, flushing,
+          delivered, wf_log, wf_last_seq, item_counter>>
+
+------------------------------------------------------------------------
+(* Initial state *)
+
+Init ==
+    /\ buffer        = <<>>
+    /\ pending       = <<>>
+    /\ pending_seq   = 0
+    /\ confirmed_seq = 0
+    /\ flushing      = FALSE
+    /\ delivered     = FALSE
+    /\ wf_log        = <<>>
+    /\ wf_last_seq   = 0
+    /\ item_counter  = 0
+
+------------------------------------------------------------------------
+(* Client actions *)
+
+\* Publish a new item into the buffer.
+\* Can happen at any time, including while a flush is in-flight.
+\* This models the buffer swap: new items go to the fresh buffer,
+\* not the pending batch.
+Publish ==
+    /\ item_counter < MaxItems
+    /\ item_counter' = item_counter + 1
+    /\ buffer' = Append(buffer, item_counter + 1)
+    /\ UNCHANGED <<pending, pending_seq, confirmed_seq, flushing,
+                   delivered, wf_log, wf_last_seq>>
+
+\* Start a flush attempt.
+\*   - If there is a pending batch (from a prior failure), retry it.
+\*   - Otherwise, swap buffer into pending with a new sequence number.
+\*   - If nothing to send, this action is not enabled.
+StartFlush ==
+    /\ ~flushing
+    /\ \/ (* Case 1: retry a failed batch *)
+          /\ pending /= <<>>
+          /\ flushing'  = TRUE
+          /\ delivered'  = FALSE
+          /\ UNCHANGED <<buffer, pending, pending_seq, confirmed_seq,
+                         item_counter, wf_log, wf_last_seq>>
+       \/ (* Case 2: new batch from buffer *)
+          /\ pending = <<>>
+          /\ buffer /= <<>>
+          /\ pending'      = buffer
+          /\ buffer'       = <<>>
+          /\ pending_seq'  = confirmed_seq + 1
+          /\ flushing'     = TRUE
+          /\ delivered'    = FALSE
+          /\ UNCHANGED <<confirmed_seq, item_counter, wf_log, wf_last_seq>>
+
+------------------------------------------------------------------------
+(* Network / Workflow actions *)
+
+\* The signal reaches the workflow. The workflow applies dedup logic:
+\*   - If pending_seq > wf_last_seq: accept (append items, update last_seq)
+\*   - Otherwise: reject (duplicate)
+\*
+\* This may or may not happen before the client observes a result.
+\* Non-determinism is captured by allowing Deliver to fire or not.
+Deliver ==
+    /\ flushing
+    /\ ~delivered
+    /\ IF pending_seq > wf_last_seq
+       THEN /\ wf_log'      = wf_log \o pending
+            /\ wf_last_seq'  = pending_seq
+       ELSE /\ UNCHANGED <<wf_log, wf_last_seq>>
+    /\ delivered' = TRUE
+    /\ UNCHANGED <<buffer, pending, pending_seq, confirmed_seq,
+                   flushing, item_counter>>
+
+------------------------------------------------------------------------
+(* Client observes result *)
+
+\* Client sees success. This can only happen if the signal was delivered
+\* (you cannot get a success response for an undelivered signal).
+FlushSuccess ==
+    /\ flushing
+    /\ delivered
+    /\ flushing'      = FALSE
+    /\ confirmed_seq' = pending_seq
+    /\ pending'       = <<>>
+    /\ pending_seq'   = 0
+    /\ UNCHANGED <<buffer, item_counter, delivered, wf_log, wf_last_seq>>
+
+\* Client sees failure. The signal may or may not have been delivered.
+\* Pending batch and sequence are kept for retry.
+FlushFail ==
+    /\ flushing
+    /\ flushing' = FALSE
+    /\ UNCHANGED <<buffer, pending, pending_seq, confirmed_seq,
+                   item_counter, delivered, wf_log, wf_last_seq>>
+
+------------------------------------------------------------------------
+(* State machine *)
+
+Next ==
+    \/ Publish
+    \/ StartFlush
+    \/ Deliver
+    \/ FlushSuccess
+    \/ FlushFail
+
+Spec == Init /\ [][Next]_vars
+
+\* Fairness: under weak fairness, every continuously enabled action
+\* eventually executes. This ensures the system makes progress.
+Fairness ==
+    /\ WF_vars(StartFlush)
+    /\ WF_vars(Deliver)
+    /\ WF_vars(FlushSuccess)
+    /\ WF_vars(FlushFail)
+
+FairSpec == Spec /\ Fairness
+
+------------------------------------------------------------------------
+(* Safety properties *)
+
+\* Every item ID in wf_log is unique — no duplicates.
+NoDuplicates ==
+    \A i, j \in 1..Len(wf_log) :
+        (i /= j) => (wf_log[i] /= wf_log[j])
+
+\* Global ordering: items appear in the log in the order they were
+\* published (ascending item IDs). This is stronger than within-batch
+\* ordering — it covers cross-batch ordering too.
+\*
+\* This holds because:
+\*   1. Publish appends item_counter+1 (monotonically increasing)
+\*   2. StartFlush moves the entire buffer to pending (preserving order)
+\*   3. Deliver appends the entire pending sequence (preserving order)
+\*   4. Retries re-send the same pending (same order), and dedup
+\*      means the log only contains one copy
+\*   5. The flush lock serializes batches, so batch N's items all
+\*      have lower IDs than batch N+1's items
+OrderPreserved ==
+    \A i, j \in 1..Len(wf_log) :
+        (i < j) => (wf_log[i] < wf_log[j])
+
+------------------------------------------------------------------------
+(* Liveness properties *)
+
+\* Every published item eventually appears in the workflow log.
+\* This requires fairness (otherwise the system can stutter forever).
+\*
+\* Stated as: it is always the case that eventually all published items
+\* are in the log (assuming the system keeps running).
+AllItemsDelivered ==
+    <>(\A id \in 1..item_counter :
+        \E i \in 1..Len(wf_log) : wf_log[i] = id)
+
+\* The system does not deadlock: some action is always enabled.
+\* (Not strictly a liveness property but useful to check.)
+NoDeadlock ==
+    \/ item_counter < MaxItems   \* Can still publish
+    \/ buffer /= <<>>            \* Can flush
+    \/ pending /= <<>>           \* Can retry
+    \/ flushing                  \* Waiting for network result
+
+========================================================================
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedupBroken.cfg b/temporalio/contrib/pubsub/verification/PubSubDedupBroken.cfg
new file mode 100644
index 000000000..7a376151d
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedupBroken.cfg
@@ -0,0 +1,10 @@
+SPECIFICATION FairSpec
+
+CONSTANTS
+    MaxItems = 4
+
+INVARIANTS
+    NoDuplicates
+
+CHECK_DEADLOCK
+    FALSE
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedupBroken.tla b/temporalio/contrib/pubsub/verification/PubSubDedupBroken.tla
new file mode 100644
index 000000000..43475b417
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedupBroken.tla
@@ -0,0 +1,120 @@
+------------------------ MODULE PubSubDedupBroken -------------------------
+(*
+ * BROKEN version of the dedup protocol: advances sequence on failure
+ * and restores items to the main buffer.
+ *
+ * This models the OLD algorithm. TLC should find a NoDuplicates or
+ * data loss violation, confirming the bug that motivated the redesign.
+ *
+ * The broken behavior:
+ *   - On failure: restore items to buffer, advance sequence anyway
+ *   - Next flush merges restored + new items under a new sequence
+ *   - If the original signal WAS delivered, the merged batch creates
+ *     duplicates (original items appear twice in the log)
+ *)
+EXTENDS Integers, Sequences, FiniteSets
+
+CONSTANTS
+    MaxItems
+
+VARIABLES
+    buffer,
+    confirmed_seq,
+    flushing,
+    in_flight_batch,   \* The batch currently being sent
+    in_flight_seq,     \* Its sequence number
+    delivered,
+    wf_log,
+    wf_last_seq,
+    item_counter
+
+vars == <<buffer, confirmed_seq, flushing, in_flight_batch, in_flight_seq,
+          delivered, wf_log, wf_last_seq, item_counter>>
+
+Init ==
+    /\ buffer          = <<>>
+    /\ confirmed_seq   = 0
+    /\ flushing        = FALSE
+    /\ in_flight_batch = <<>>
+    /\ in_flight_seq   = 0
+    /\ delivered       = FALSE
+    /\ wf_log          = <<>>
+    /\ wf_last_seq     = 0
+    /\ item_counter    = 0
+
+Publish ==
+    /\ item_counter < MaxItems
+    /\ item_counter' = item_counter + 1
+    /\ buffer' = Append(buffer, item_counter + 1)
+    /\ UNCHANGED <<confirmed_seq, flushing, in_flight_batch, in_flight_seq,
+                   delivered, wf_log, wf_last_seq>>
+
+\* BROKEN: always takes from buffer (no separate pending/retry)
+StartFlush ==
+    /\ ~flushing
+    /\ buffer /= <<>>
+    /\ in_flight_seq'   = confirmed_seq + 1
+    /\ in_flight_batch' = buffer
+    /\ buffer'          = <<>>
+    /\ flushing'        = TRUE
+    /\ delivered'       = FALSE
+    /\ UNCHANGED <<confirmed_seq, item_counter, wf_log, wf_last_seq>>
+
+Deliver ==
+    /\ flushing
+    /\ ~delivered
+    /\ IF in_flight_seq > wf_last_seq
+       THEN /\ wf_log'      = wf_log \o in_flight_batch
+            /\ wf_last_seq'  = in_flight_seq
+       ELSE /\ UNCHANGED <<wf_log, wf_last_seq>>
+    /\ delivered' = TRUE
+    /\ UNCHANGED <<buffer, confirmed_seq, flushing, in_flight_batch,
+                   in_flight_seq, item_counter>>
+
+FlushSuccess ==
+    /\ flushing
+    /\ delivered
+    /\ flushing'      = FALSE
+    /\ confirmed_seq' = in_flight_seq
+    /\ in_flight_batch' = <<>>
+    /\ in_flight_seq'   = 0
+    /\ UNCHANGED <<buffer, item_counter, delivered, wf_log, wf_last_seq>>
+
+\* BROKEN: On failure, restore items to front of buffer AND advance sequence.
+\* This is the bug: if the signal was delivered, the next flush will
+\* re-send these items under a new sequence, creating duplicates.
+FlushFail ==
+    /\ flushing
+    /\ flushing'      = FALSE
+    /\ confirmed_seq' = in_flight_seq     \* <-- BUG: advance anyway
+    /\ buffer'        = in_flight_batch \o buffer  \* <-- BUG: restore to buffer
+    /\ in_flight_batch' = <<>>
+    /\ in_flight_seq'   = 0
+    /\ UNCHANGED <<item_counter, delivered, wf_log, wf_last_seq>>
+
+Next ==
+    \/ Publish
+    \/ StartFlush
+    \/ Deliver
+    \/ FlushSuccess
+    \/ FlushFail
+
+Spec == Init /\ [][Next]_vars
+
+Fairness ==
+    /\ WF_vars(StartFlush)
+    /\ WF_vars(Deliver)
+    /\ WF_vars(FlushSuccess)
+    /\ WF_vars(FlushFail)
+
+FairSpec == Spec /\ Fairness
+
+NoDuplicates ==
+    \A i, j \in 1..Len(wf_log) :
+        (i /= j) => (wf_log[i] /= wf_log[j])
+
+AllItemsDelivered ==
+    <>(\A id \in 1..item_counter :
+        \E i \in 1..Len(wf_log) : wf_log[i] = id)
+
+========================================================================
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedupBroken_TTrace_1775536423.bin b/temporalio/contrib/pubsub/verification/PubSubDedupBroken_TTrace_1775536423.bin
new file mode 100644
index 000000000..0d1676142
Binary files /dev/null and b/temporalio/contrib/pubsub/verification/PubSubDedupBroken_TTrace_1775536423.bin differ
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedupBroken_TTrace_1775536423.tla b/temporalio/contrib/pubsub/verification/PubSubDedupBroken_TTrace_1775536423.tla
new file mode 100644
index 000000000..e130026cb
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedupBroken_TTrace_1775536423.tla
@@ -0,0 +1,187 @@
+---- MODULE PubSubDedupBroken_TTrace_1775536423 ----
+EXTENDS Sequences, TLCExt, Toolbox, Naturals, TLC, PubSubDedupBroken
+
+_expression ==
+    LET PubSubDedupBroken_TEExpression == INSTANCE PubSubDedupBroken_TEExpression
+    IN PubSubDedupBroken_TEExpression!expression
+----
+
+_trace ==
+    LET PubSubDedupBroken_TETrace == INSTANCE PubSubDedupBroken_TETrace
+    IN PubSubDedupBroken_TETrace!trace
+----
+
+_inv ==
+    ~(
+        TLCGet("level") = Len(_TETrace)
+        /\
+        item_counter = (4)
+        /\
+        in_flight_batch = (<<1, 2, 3, 4>>)
+        /\
+        wf_last_seq = (2)
+        /\
+        delivered = (TRUE)
+        /\
+        flushing = (TRUE)
+        /\
+        buffer = (<<>>)
+        /\
+        in_flight_seq = (2)
+        /\
+        wf_log = (<<1, 1, 2, 3, 4>>)
+        /\
+        confirmed_seq = (1)
+    )
+----
+
+_init ==
+    /\ wf_log = _TETrace[1].wf_log
+    /\ flushing = _TETrace[1].flushing
+    /\ in_flight_batch = _TETrace[1].in_flight_batch
+    /\ in_flight_seq = _TETrace[1].in_flight_seq
+    /\ buffer = _TETrace[1].buffer
+    /\ item_counter = _TETrace[1].item_counter
+    /\ confirmed_seq = _TETrace[1].confirmed_seq
+    /\ wf_last_seq = _TETrace[1].wf_last_seq
+    /\ delivered = _TETrace[1].delivered
+----
+
+_next ==
+    /\ \E i,j \in DOMAIN _TETrace:
+        /\ \/ /\ j = i + 1
+              /\ i = TLCGet("level")
+        /\ wf_log  = _TETrace[i].wf_log
+        /\ wf_log' = _TETrace[j].wf_log
+        /\ flushing  = _TETrace[i].flushing
+        /\ flushing' = _TETrace[j].flushing
+        /\ in_flight_batch  = _TETrace[i].in_flight_batch
+        /\ in_flight_batch' = _TETrace[j].in_flight_batch
+        /\ in_flight_seq  = _TETrace[i].in_flight_seq
+        /\ in_flight_seq' = _TETrace[j].in_flight_seq
+        /\ buffer  = _TETrace[i].buffer
+        /\ buffer' = _TETrace[j].buffer
+        /\ item_counter  = _TETrace[i].item_counter
+        /\ item_counter' = _TETrace[j].item_counter
+        /\ confirmed_seq  = _TETrace[i].confirmed_seq
+        /\ confirmed_seq' = _TETrace[j].confirmed_seq
+        /\ wf_last_seq  = _TETrace[i].wf_last_seq
+        /\ wf_last_seq' = _TETrace[j].wf_last_seq
+        /\ delivered  = _TETrace[i].delivered
+        /\ delivered' = _TETrace[j].delivered
+
+\* Uncomment the ASSUME below to write the states of the error trace
+\* to the given file in Json format. Note that you can pass any tuple
+\* to `JsonSerialize`. For example, a sub-sequence of _TETrace.
+    \* ASSUME
+    \*     LET J == INSTANCE Json
+    \*         IN J!JsonSerialize("PubSubDedupBroken_TTrace_1775536423.json", _TETrace)
+
+=============================================================================
+
+ Note that you can extract this module `PubSubDedupBroken_TEExpression`
+  to a dedicated file to reuse `expression` (the module in the 
+  dedicated `PubSubDedupBroken_TEExpression.tla` file takes precedence 
+  over the module `PubSubDedupBroken_TEExpression` below).
+
+---- MODULE PubSubDedupBroken_TEExpression ----
+EXTENDS Sequences, TLCExt, Toolbox, Naturals, TLC, PubSubDedupBroken
+
+expression == 
+    [
+        \* To hide variables of the `PubSubDedupBroken` spec from the error trace,
+        \* remove the variables below.  The trace will be written in the order
+        \* of the fields of this record.
+        wf_log |-> wf_log
+        ,flushing |-> flushing
+        ,in_flight_batch |-> in_flight_batch
+        ,in_flight_seq |-> in_flight_seq
+        ,buffer |-> buffer
+        ,item_counter |-> item_counter
+        ,confirmed_seq |-> confirmed_seq
+        ,wf_last_seq |-> wf_last_seq
+        ,delivered |-> delivered
+        
+        \* Put additional constant-, state-, and action-level expressions here:
+        \* ,_stateNumber |-> _TEPosition
+        \* ,_wf_logUnchanged |-> wf_log = wf_log'
+        
+        \* Format the `wf_log` variable as Json value.
+        \* ,_wf_logJson |->
+        \*     LET J == INSTANCE Json
+        \*     IN J!ToJson(wf_log)
+        
+        \* Lastly, you may build expressions over arbitrary sets of states by
+        \* leveraging the _TETrace operator.  For example, this is how to
+        \* count the number of times a spec variable changed up to the current
+        \* state in the trace.
+        \* ,_wf_logModCount |->
+        \*     LET F[s \in DOMAIN _TETrace] ==
+        \*         IF s = 1 THEN 0
+        \*         ELSE IF _TETrace[s].wf_log # _TETrace[s-1].wf_log
+        \*             THEN 1 + F[s-1] ELSE F[s-1]
+        \*     IN F[_TEPosition - 1]
+    ]
+
+=============================================================================
+
+
+
+Parsing and semantic processing can take forever if the trace below is long.
+ In this case, it is advised to uncomment the module below to deserialize the
+ trace from a generated binary file.
+
+\*
+\*---- MODULE PubSubDedupBroken_TETrace ----
+\*EXTENDS IOUtils, TLC, PubSubDedupBroken
+\*
+\*trace == IODeserialize("PubSubDedupBroken_TTrace_1775536423.bin", TRUE)
+\*
+\*=============================================================================
+\*
+
+---- MODULE PubSubDedupBroken_TETrace ----
+EXTENDS TLC, PubSubDedupBroken
+
+trace == 
+    <<
+    ([item_counter |-> 0,in_flight_batch |-> <<>>,wf_last_seq |-> 0,delivered |-> FALSE,flushing |-> FALSE,buffer |-> <<>>,in_flight_seq |-> 0,wf_log |-> <<>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 1,in_flight_batch |-> <<>>,wf_last_seq |-> 0,delivered |-> FALSE,flushing |-> FALSE,buffer |-> <<1>>,in_flight_seq |-> 0,wf_log |-> <<>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 1,in_flight_batch |-> <<1>>,wf_last_seq |-> 0,delivered |-> FALSE,flushing |-> TRUE,buffer |-> <<>>,in_flight_seq |-> 1,wf_log |-> <<>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 2,in_flight_batch |-> <<1>>,wf_last_seq |-> 0,delivered |-> FALSE,flushing |-> TRUE,buffer |-> <<2>>,in_flight_seq |-> 1,wf_log |-> <<>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 3,in_flight_batch |-> <<1>>,wf_last_seq |-> 0,delivered |-> FALSE,flushing |-> TRUE,buffer |-> <<2, 3>>,in_flight_seq |-> 1,wf_log |-> <<>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 4,in_flight_batch |-> <<1>>,wf_last_seq |-> 0,delivered |-> FALSE,flushing |-> TRUE,buffer |-> <<2, 3, 4>>,in_flight_seq |-> 1,wf_log |-> <<>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 4,in_flight_batch |-> <<1>>,wf_last_seq |-> 1,delivered |-> TRUE,flushing |-> TRUE,buffer |-> <<2, 3, 4>>,in_flight_seq |-> 1,wf_log |-> <<1>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 4,in_flight_batch |-> <<>>,wf_last_seq |-> 1,delivered |-> TRUE,flushing |-> FALSE,buffer |-> <<1, 2, 3, 4>>,in_flight_seq |-> 0,wf_log |-> <<1>>,confirmed_seq |-> 1]),
+    ([item_counter |-> 4,in_flight_batch |-> <<1, 2, 3, 4>>,wf_last_seq |-> 1,delivered |-> FALSE,flushing |-> TRUE,buffer |-> <<>>,in_flight_seq |-> 2,wf_log |-> <<1>>,confirmed_seq |-> 1]),
+    ([item_counter |-> 4,in_flight_batch |-> <<1, 2, 3, 4>>,wf_last_seq |-> 2,delivered |-> TRUE,flushing |-> TRUE,buffer |-> <<>>,in_flight_seq |-> 2,wf_log |-> <<1, 1, 2, 3, 4>>,confirmed_seq |-> 1])
+    >>
+----
+
+
+=============================================================================
+
+---- CONFIG PubSubDedupBroken_TTrace_1775536423 ----
+CONSTANTS
+    MaxItems = 4
+
+INVARIANT
+    _inv
+
+CHECK_DEADLOCK
+    \* CHECK_DEADLOCK off because of PROPERTY or INVARIANT above.
+    FALSE
+
+INIT
+    _init
+
+NEXT
+    _next
+
+CONSTANT
+    _TETrace <- _trace
+
+ALIAS
+    _expression
+=============================================================================
+\* Generated on Mon Apr 06 21:33:43 PDT 2026
\ No newline at end of file
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedupInductive.cfg b/temporalio/contrib/pubsub/verification/PubSubDedupInductive.cfg
new file mode 100644
index 000000000..789d9e80d
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedupInductive.cfg
@@ -0,0 +1,25 @@
+\* Verify IndInv holds for all reachable states of the standard spec.
+\*
+\* This checks:
+\*   1. Init => IndInv
+\*   2. IndInv is preserved along all reachable behaviors
+\*
+\* This is reachable-state invariant checking, not full inductiveness
+\* checking (which would require IndSpec with all IndInv states as
+\* initial states — not feasible with TLC for sequence-valued state).
+\* The per-action proof sketch in the .tla file argues inductiveness
+\* informally. Since the invariant's clauses are structural relationships
+\* between containers — not functions of MaxItems — verification at
+\* small N gives high confidence in the general case.
+
+SPECIFICATION Spec
+
+CONSTANTS
+    MaxItems = 6
+
+INVARIANTS
+    IndInv
+    OrderPreserved
+
+CHECK_DEADLOCK
+    FALSE
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedupInductive.tla b/temporalio/contrib/pubsub/verification/PubSubDedupInductive.tla
new file mode 100644
index 000000000..ddf5787c6
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedupInductive.tla
@@ -0,0 +1,244 @@
+---------------------- MODULE PubSubDedupInductive -------------------------
+(*
+ * Inductive invariant for the pub/sub dedup protocol.
+ *
+ * A strengthened invariant that implies NoDuplicates. If IndInv is
+ * preserved by every action (i.e., it is inductive), then NoDuplicates
+ * holds for ALL reachable states regardless of MaxItems.
+ *
+ * TLC checks IndInv as a reachable-state invariant of the standard
+ * Spec (Init /\ [][Next]_vars). This verifies Init => IndInv and
+ * preservation along all reachable behaviors, but does not check
+ * inductiveness from arbitrary IndInv states (which would require
+ * enumerating all sequence-valued states satisfying IndInv — not
+ * feasible with TLC). The per-action proof sketch below argues
+ * inductiveness informally.
+ *
+ * Proof sketch for each action preserving IndInv:
+ *
+ *   Publish: Adds item_counter+1 (fresh, not in any container).
+ *     All uniqueness/disjointness clauses preserved since the new
+ *     item is unique. item_counter increments, keeping Bounded.
+ *
+ *   StartFlush (retry): pending/buffer/wf_log unchanged.
+ *     Only flushing and delivered change. All structural properties
+ *     preserved trivially.
+ *
+ *   StartFlush (new): Moves buffer -> pending, buffer becomes <<>>.
+ *     pending_seq = confirmed_seq + 1. By SeqConsistency,
+ *     pending = <<>> before this step implies confirmed_seq = wf_last_seq,
+ *     so pending_seq = wf_last_seq + 1 > wf_last_seq. Since buffer was
+ *     Disjoint from wf_log (by BufferDisjointLog), pending is now
+ *     Disjoint from wf_log. Buffer uniqueness transfers to pending.
+ *
+ *   Deliver (accepted, pending_seq > wf_last_seq): Appends pending
+ *     to wf_log. By PendingLogRelation, pending is Disjoint from
+ *     wf_log. Combined with NoDuplicates and PendingUnique, the
+ *     extended log has no duplicates. Sets wf_last_seq = pending_seq,
+ *     so now pending_seq <= wf_last_seq, and SubsetWhenDelivered
+ *     is satisfied (pending items are in the new wf_log).
+ *
+ *   Deliver (rejected, pending_seq <= wf_last_seq): wf_log unchanged.
+ *     All properties trivially preserved.
+ *
+ *   FlushSuccess: Sets pending = <<>>, confirmed_seq = pending_seq.
+ *     Since Deliver already set wf_last_seq = pending_seq, we get
+ *     confirmed_seq = wf_last_seq, satisfying SeqConsistency.
+ *     Clearing pending satisfies all pending-related clauses vacuously.
+ *
+ *   FlushFail: Only sets flushing = FALSE. All structural state
+ *     (buffer, pending, wf_log, sequences) unchanged.
+ *)
+EXTENDS Integers, Sequences, FiniteSets
+
+CONSTANTS
+    MaxItems
+
+VARIABLES
+    buffer, pending, pending_seq, confirmed_seq, flushing,
+    delivered, wf_log, wf_last_seq, item_counter
+
+vars == <<buffer, pending, pending_seq, confirmed_seq, flushing,
+          delivered, wf_log, wf_last_seq, item_counter>>
+
+------------------------------------------------------------------------
+(* Import the protocol definition *)
+
+Init ==
+    /\ buffer        = <<>>
+    /\ pending       = <<>>
+    /\ pending_seq   = 0
+    /\ confirmed_seq = 0
+    /\ flushing      = FALSE
+    /\ delivered     = FALSE
+    /\ wf_log        = <<>>
+    /\ wf_last_seq   = 0
+    /\ item_counter  = 0
+
+Publish ==
+    /\ item_counter < MaxItems
+    /\ item_counter' = item_counter + 1
+    /\ buffer' = Append(buffer, item_counter + 1)
+    /\ UNCHANGED <<pending, pending_seq, confirmed_seq, flushing,
+                   delivered, wf_log, wf_last_seq>>
+
+StartFlush ==
+    /\ ~flushing
+    /\ \/ /\ pending /= <<>>
+          /\ flushing'  = TRUE
+          /\ delivered'  = FALSE
+          /\ UNCHANGED <<buffer, pending, pending_seq, confirmed_seq,
+                         item_counter, wf_log, wf_last_seq>>
+       \/ /\ pending = <<>>
+          /\ buffer /= <<>>
+          /\ pending'      = buffer
+          /\ buffer'       = <<>>
+          /\ pending_seq'  = confirmed_seq + 1
+          /\ flushing'     = TRUE
+          /\ delivered'    = FALSE
+          /\ UNCHANGED <<confirmed_seq, item_counter, wf_log, wf_last_seq>>
+
+Deliver ==
+    /\ flushing
+    /\ ~delivered
+    /\ IF pending_seq > wf_last_seq
+       THEN /\ wf_log'      = wf_log \o pending
+            /\ wf_last_seq'  = pending_seq
+       ELSE /\ UNCHANGED <<wf_log, wf_last_seq>>
+    /\ delivered' = TRUE
+    /\ UNCHANGED <<buffer, pending, pending_seq, confirmed_seq,
+                   flushing, item_counter>>
+
+FlushSuccess ==
+    /\ flushing
+    /\ delivered
+    /\ flushing'      = FALSE
+    /\ confirmed_seq' = pending_seq
+    /\ pending'       = <<>>
+    /\ pending_seq'   = 0
+    /\ UNCHANGED <<buffer, item_counter, delivered, wf_log, wf_last_seq>>
+
+FlushFail ==
+    /\ flushing
+    /\ flushing' = FALSE
+    /\ UNCHANGED <<buffer, pending, pending_seq, confirmed_seq,
+                   item_counter, delivered, wf_log, wf_last_seq>>
+
+Next ==
+    \/ Publish
+    \/ StartFlush
+    \/ Deliver
+    \/ FlushSuccess
+    \/ FlushFail
+
+------------------------------------------------------------------------
+(* Helper operators *)
+
+\* Set of elements in a sequence
+SeqToSet(s) == {s[i] : i \in 1..Len(s)}
+
+\* All elements of a sequence are distinct
+Unique(s) ==
+    \A i, j \in 1..Len(s) : (i /= j) => (s[i] /= s[j])
+
+\* Two sequences share no elements
+Disjoint(s1, s2) ==
+    SeqToSet(s1) \cap SeqToSet(s2) = {}
+
+\* All elements of s1 appear in s2
+IsSubseq(s1, s2) ==
+    SeqToSet(s1) \subseteq SeqToSet(s2)
+
+------------------------------------------------------------------------
+(* The inductive invariant *)
+
+IndInv ==
+    (* --- Uniqueness within each container --- *)
+    \* C1: No duplicates in the workflow log
+    /\ Unique(wf_log)
+    \* C2: No duplicates in the buffer
+    /\ Unique(buffer)
+    \* C3: No duplicates in the pending batch
+    /\ Unique(pending)
+
+    (* --- Disjointness between containers --- *)
+    \* C4: Buffer items are not in the pending batch
+    /\ Disjoint(buffer, pending)
+    \* C5: Buffer items are not in the log
+    /\ Disjoint(buffer, wf_log)
+
+    (* --- Pending-log relationship (key dedup property) --- *)
+    \* C6: If pending hasn't been delivered yet, its items are not in the log
+    /\ (pending /= <<>> /\ pending_seq > wf_last_seq)
+        => Disjoint(pending, wf_log)
+    \* C7: If pending WAS already delivered, its items are in the log
+    \*     (so a re-delivery would be a no-op)
+    /\ (pending /= <<>> /\ pending_seq <= wf_last_seq)
+        => IsSubseq(pending, wf_log)
+
+    (* --- Sequence consistency --- *)
+    \* C8: confirmed_seq never exceeds wf_last_seq
+    /\ confirmed_seq <= wf_last_seq
+    \* C9: When no pending batch, confirmed and wf sequences are in sync.
+    \*     This ensures StartFlush (new) always produces pending_seq > wf_last_seq.
+    /\ (pending = <<>>) => (confirmed_seq = wf_last_seq)
+    \* C10: pending_seq is 0 iff pending is empty
+    /\ (pending = <<>>) <=> (pending_seq = 0)
+    \* C11: pending_seq is bounded by confirmed_seq + 1
+    /\ (pending /= <<>>) => (pending_seq = confirmed_seq + 1)
+
+    (* --- Item ID bounds --- *)
+    \* C12: All item IDs are in 1..item_counter
+    /\ \A i \in 1..Len(wf_log) : wf_log[i] \in 1..item_counter
+    /\ \A i \in 1..Len(buffer) : buffer[i] \in 1..item_counter
+    /\ \A i \in 1..Len(pending) : pending[i] \in 1..item_counter
+
+    (* --- Non-negative sequences --- *)
+    /\ confirmed_seq >= 0
+    /\ wf_last_seq >= 0
+    /\ item_counter >= 0
+
+------------------------------------------------------------------------
+(* Safety properties implied by IndInv *)
+
+NoDuplicates == Unique(wf_log)
+THEOREM IndInv => NoDuplicates  \* Trivially: NoDuplicates is conjunct C1
+
+\* Global ordering: items appear in ascending order of their IDs.
+\* This follows from C12 (bounded IDs), C1 (unique), and the fact that
+\* Publish assigns monotonically increasing IDs, StartFlush preserves
+\* buffer order, and Deliver appends in order.
+OrderPreserved ==
+    \A i, j \in 1..Len(wf_log) :
+        (i < j) => (wf_log[i] < wf_log[j])
+
+------------------------------------------------------------------------
+(* Specification for checking inductiveness:
+ * Initial states = ALL states satisfying IndInv (within type bounds).
+ * If IndInv is an invariant of this spec, then IndInv is inductive. *)
+
+\* Type constraint to bound the state space for TLC
+TypeOK ==
+    /\ item_counter \in 0..MaxItems
+    /\ confirmed_seq \in 0..MaxItems
+    /\ wf_last_seq \in 0..MaxItems
+    /\ pending_seq \in 0..MaxItems
+    /\ flushing \in BOOLEAN
+    /\ delivered \in BOOLEAN
+    /\ Len(buffer) <= MaxItems
+    /\ Len(pending) <= MaxItems
+    /\ Len(wf_log) <= MaxItems        \* Conservative bound for TLC state enumeration
+    /\ \A i \in 1..Len(buffer) : buffer[i] \in 1..MaxItems
+    /\ \A i \in 1..Len(pending) : pending[i] \in 1..MaxItems
+    /\ \A i \in 1..Len(wf_log) : wf_log[i] \in 1..MaxItems
+
+\* For inductiveness checking: all IndInv states as initial states
+IndInit == TypeOK /\ IndInv
+
+\* The inductiveness-checking specification
+IndSpec == IndInit /\ [][Next]_vars
+
+\* The standard specification (for reference)
+Spec == Init /\ [][Next]_vars
+
+========================================================================
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedupTTL.tla b/temporalio/contrib/pubsub/verification/PubSubDedupTTL.tla
new file mode 100644
index 000000000..d105cc391
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedupTTL.tla
@@ -0,0 +1,203 @@
+--------------------------- MODULE PubSubDedupTTL --------------------------
+(*
+ * Verification of TTL-based pruning of publisher dedup entries.
+ *
+ * When a workflow continues-as-new, it can prune stale publisher_sequences
+ * entries to bound memory. This spec verifies:
+ *
+ *   1. UNSAFE pruning (prune any publisher at any time) allows duplicates.
+ *      TLC finds the counterexample.
+ *
+ *   2. SAFE pruning (prune only publishers with no pending batch) preserves
+ *      NoDuplicates. This models the real constraint: TTL must exceed the
+ *      maximum time a publisher might retry a failed flush.
+ *
+ * The spec models two publishers (A and B) sharing a single workflow log.
+ * Each publisher has independent buffer/pending/sequence state. The workflow
+ * tracks per-publisher last_seq in a function.
+ *
+ * The pruning action models what happens during continue-as-new when a
+ * publisher's TTL has expired: the workflow "forgets" that publisher's
+ * last_seq, resetting it to 0.
+ *)
+EXTENDS Integers, Sequences, FiniteSets
+
+CONSTANTS
+    MaxItemsPerPub   \* Max items each publisher can create
+
+Publishers == {"A", "B"}
+
+VARIABLES
+    (* === Per-publisher client state === *)
+    buf,             \* buf[p]: buffer for publisher p
+    pend,            \* pend[p]: pending batch for publisher p
+    pend_seq,        \* pend_seq[p]: sequence of pending batch
+    conf_seq,        \* conf_seq[p]: last confirmed sequence
+    flush_active,    \* flush_active[p]: TRUE when flush in-flight
+    delivered_flag,  \* delivered_flag[p]: TRUE if current signal delivered
+
+    (* === Workflow state === *)
+    wf_log,          \* Shared append-only log
+    wf_last,         \* wf_last[p]: last accepted seq for publisher p
+
+    (* === Bookkeeping === *)
+    ctr              \* ctr[p]: item counter per publisher
+
+vars == <<buf, pend, pend_seq, conf_seq, flush_active, delivered_flag,
+          wf_log, wf_last, ctr>>
+
+------------------------------------------------------------------------
+(* Initial state *)
+
+Init ==
+    /\ buf            = [p \in Publishers |-> <<>>]
+    /\ pend           = [p \in Publishers |-> <<>>]
+    /\ pend_seq       = [p \in Publishers |-> 0]
+    /\ conf_seq       = [p \in Publishers |-> 0]
+    /\ flush_active   = [p \in Publishers |-> FALSE]
+    /\ delivered_flag = [p \in Publishers |-> FALSE]
+    /\ wf_log         = <<>>
+    /\ wf_last        = [p \in Publishers |-> 0]
+    /\ ctr            = [p \in Publishers |-> 0]
+
+------------------------------------------------------------------------
+(* Per-publisher actions, parameterized by publisher p *)
+
+\* Unique item IDs: publisher A gets odd numbers, B gets even numbers.
+\* This ensures global uniqueness without a shared counter.
+ItemId(p, n) ==
+    IF p = "A" THEN 2 * n - 1 ELSE 2 * n
+
+Publish(p) ==
+    /\ ctr[p] < MaxItemsPerPub
+    /\ ctr' = [ctr EXCEPT ![p] = @ + 1]
+    /\ buf' = [buf EXCEPT ![p] = Append(@, ItemId(p, ctr[p] + 1))]
+    /\ UNCHANGED <<pend, pend_seq, conf_seq, flush_active, delivered_flag,
+                   wf_log, wf_last>>
+
+StartFlush(p) ==
+    /\ ~flush_active[p]
+    /\ \/ (* Retry *)
+          /\ pend[p] /= <<>>
+          /\ flush_active'   = [flush_active EXCEPT ![p] = TRUE]
+          /\ delivered_flag'  = [delivered_flag EXCEPT ![p] = FALSE]
+          /\ UNCHANGED <<buf, pend, pend_seq, conf_seq, ctr, wf_log, wf_last>>
+       \/ (* New batch *)
+          /\ pend[p] = <<>>
+          /\ buf[p] /= <<>>
+          /\ pend'           = [pend EXCEPT ![p] = buf[p]]
+          /\ buf'            = [buf EXCEPT ![p] = <<>>]
+          /\ pend_seq'       = [pend_seq EXCEPT ![p] = conf_seq[p] + 1]
+          /\ flush_active'   = [flush_active EXCEPT ![p] = TRUE]
+          /\ delivered_flag'  = [delivered_flag EXCEPT ![p] = FALSE]
+          /\ UNCHANGED <<conf_seq, ctr, wf_log, wf_last>>
+
+Deliver(p) ==
+    /\ flush_active[p]
+    /\ ~delivered_flag[p]
+    /\ IF pend_seq[p] > wf_last[p]
+       THEN /\ wf_log'  = wf_log \o pend[p]
+            /\ wf_last'  = [wf_last EXCEPT ![p] = pend_seq[p]]
+       ELSE /\ UNCHANGED <<wf_log, wf_last>>
+    /\ delivered_flag' = [delivered_flag EXCEPT ![p] = TRUE]
+    /\ UNCHANGED <<buf, pend, pend_seq, conf_seq, flush_active, ctr>>
+
+FlushSuccess(p) ==
+    /\ flush_active[p]
+    /\ delivered_flag[p]
+    /\ flush_active'  = [flush_active EXCEPT ![p] = FALSE]
+    /\ conf_seq'      = [conf_seq EXCEPT ![p] = pend_seq[p]]
+    /\ pend'          = [pend EXCEPT ![p] = <<>>]
+    /\ pend_seq'      = [pend_seq EXCEPT ![p] = 0]
+    /\ UNCHANGED <<buf, ctr, delivered_flag, wf_log, wf_last>>
+
+FlushFail(p) ==
+    /\ flush_active[p]
+    /\ flush_active' = [flush_active EXCEPT ![p] = FALSE]
+    /\ UNCHANGED <<buf, pend, pend_seq, conf_seq, ctr,
+                   delivered_flag, wf_log, wf_last>>
+
+------------------------------------------------------------------------
+(* TTL Pruning actions *)
+
+\* UNSAFE: Prune any publisher's dedup entry at any time.
+\* This models setting TTL too short — the publisher might still retry.
+PruneUnsafe(p) ==
+    /\ wf_last[p] > 0          \* Has a dedup entry to prune
+    /\ wf_last' = [wf_last EXCEPT ![p] = 0]
+    /\ UNCHANGED <<buf, pend, pend_seq, conf_seq, flush_active,
+                   delivered_flag, wf_log, ctr>>
+
+\* SAFE: Prune only when the publisher has no pending batch.
+\* This models the correct TTL constraint: the publisher has finished
+\* all retries before the entry is pruned. In practice, this means
+\* TTL > max activity/client lifetime.
+PruneSafe(p) ==
+    /\ wf_last[p] > 0          \* Has a dedup entry to prune
+    /\ pend[p] = <<>>           \* Publisher has no in-flight batch
+    /\ ~flush_active[p]         \* Not currently flushing
+    /\ wf_last' = [wf_last EXCEPT ![p] = 0]
+    /\ UNCHANGED <<buf, pend, pend_seq, conf_seq, flush_active,
+                   delivered_flag, wf_log, ctr>>
+
+------------------------------------------------------------------------
+(* Specifications *)
+
+\* Base actions (no pruning) — for verifying the multi-publisher protocol
+BaseNext ==
+    \E p \in Publishers :
+        \/ Publish(p)
+        \/ StartFlush(p)
+        \/ Deliver(p)
+        \/ FlushSuccess(p)
+        \/ FlushFail(p)
+
+\* With unsafe pruning — should FAIL NoDuplicates
+UnsafeNext ==
+    \/ BaseNext
+    \/ \E p \in Publishers : PruneUnsafe(p)
+
+\* With safe pruning — should PASS NoDuplicates
+SafeNext ==
+    \/ BaseNext
+    \/ \E p \in Publishers : PruneSafe(p)
+
+BaseSpec == Init /\ [][BaseNext]_vars
+UnsafeSpec == Init /\ [][UnsafeNext]_vars
+SafeSpec == Init /\ [][SafeNext]_vars
+
+\* Fairness for liveness checking
+BaseFairness ==
+    \A p \in Publishers :
+        /\ WF_vars(StartFlush(p))
+        /\ WF_vars(Deliver(p))
+        /\ WF_vars(FlushSuccess(p))
+        /\ WF_vars(FlushFail(p))
+
+BaseFairSpec == BaseSpec /\ BaseFairness
+SafeFairSpec == SafeSpec /\ BaseFairness
+
+------------------------------------------------------------------------
+(* Properties *)
+
+NoDuplicates ==
+    \A i, j \in 1..Len(wf_log) :
+        (i /= j) => (wf_log[i] /= wf_log[j])
+
+OrderPreservedPerPublisher ==
+    \* Within each publisher's items, order is preserved.
+    \* (Global order across publishers is non-deterministic.)
+    \A p \in Publishers :
+        \A i, j \in 1..Len(wf_log) :
+            /\ wf_log[i] \in {ItemId(p, n) : n \in 1..MaxItemsPerPub}
+            /\ wf_log[j] \in {ItemId(p, n) : n \in 1..MaxItemsPerPub}
+            /\ i < j
+            => wf_log[i] < wf_log[j]
+
+\* All published items eventually appear in the log (under fairness)
+AllItemsDelivered ==
+    <>(\A p \in Publishers :
+        \A n \in 1..ctr[p] :
+            \E i \in 1..Len(wf_log) : wf_log[i] = ItemId(p, n))
+
+========================================================================
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedupTTL_Base.cfg b/temporalio/contrib/pubsub/verification/PubSubDedupTTL_Base.cfg
new file mode 100644
index 000000000..55b378e2e
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedupTTL_Base.cfg
@@ -0,0 +1,17 @@
+\* Multi-publisher protocol without pruning.
+\* Verifies NoDuplicates and OrderPreservedPerPublisher.
+
+SPECIFICATION BaseFairSpec
+
+CONSTANTS
+    MaxItemsPerPub = 2
+
+INVARIANTS
+    NoDuplicates
+    OrderPreservedPerPublisher
+
+PROPERTIES
+    AllItemsDelivered
+
+CHECK_DEADLOCK
+    FALSE
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedupTTL_Safe.cfg b/temporalio/contrib/pubsub/verification/PubSubDedupTTL_Safe.cfg
new file mode 100644
index 000000000..04dd20c9c
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedupTTL_Safe.cfg
@@ -0,0 +1,17 @@
+\* Safe pruning: prune only when publisher has no pending batch and is not flushing.
+\* Should PASS NoDuplicates — confirms the TTL safety constraint.
+
+SPECIFICATION SafeFairSpec
+
+CONSTANTS
+    MaxItemsPerPub = 2
+
+INVARIANTS
+    NoDuplicates
+    OrderPreservedPerPublisher
+
+PROPERTIES
+    AllItemsDelivered
+
+CHECK_DEADLOCK
+    FALSE
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedupTTL_TTrace_1775536996.bin b/temporalio/contrib/pubsub/verification/PubSubDedupTTL_TTrace_1775536996.bin
new file mode 100644
index 000000000..4f2c39ea0
Binary files /dev/null and b/temporalio/contrib/pubsub/verification/PubSubDedupTTL_TTrace_1775536996.bin differ
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedupTTL_TTrace_1775536996.tla b/temporalio/contrib/pubsub/verification/PubSubDedupTTL_TTrace_1775536996.tla
new file mode 100644
index 000000000..ee25c0a00
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedupTTL_TTrace_1775536996.tla
@@ -0,0 +1,186 @@
+---- MODULE PubSubDedupTTL_TTrace_1775536996 ----
+EXTENDS Sequences, TLCExt, Toolbox, Naturals, TLC, PubSubDedupTTL
+
+_expression ==
+    LET PubSubDedupTTL_TEExpression == INSTANCE PubSubDedupTTL_TEExpression
+    IN PubSubDedupTTL_TEExpression!expression
+----
+
+_trace ==
+    LET PubSubDedupTTL_TETrace == INSTANCE PubSubDedupTTL_TETrace
+    IN PubSubDedupTTL_TETrace!trace
+----
+
+_inv ==
+    ~(
+        TLCGet("level") = Len(_TETrace)
+        /\
+        ctr = ([A |-> 2, B |-> 0])
+        /\
+        buf = ([A |-> <<>>, B |-> <<>>])
+        /\
+        conf_seq = ([A |-> 0, B |-> 0])
+        /\
+        pend_seq = ([A |-> 1, B |-> 0])
+        /\
+        wf_last = ([A |-> 1, B |-> 0])
+        /\
+        flush_active = ([A |-> TRUE, B |-> FALSE])
+        /\
+        wf_log = (<<1, 3, 1, 3>>)
+        /\
+        delivered_flag = ([A |-> TRUE, B |-> FALSE])
+        /\
+        pend = ([A |-> <<1, 3>>, B |-> <<>>])
+    )
+----
+
+_init ==
+    /\ delivered_flag = _TETrace[1].delivered_flag
+    /\ flush_active = _TETrace[1].flush_active
+    /\ wf_log = _TETrace[1].wf_log
+    /\ ctr = _TETrace[1].ctr
+    /\ pend_seq = _TETrace[1].pend_seq
+    /\ buf = _TETrace[1].buf
+    /\ pend = _TETrace[1].pend
+    /\ wf_last = _TETrace[1].wf_last
+    /\ conf_seq = _TETrace[1].conf_seq
+----
+
+_next ==
+    /\ \E i,j \in DOMAIN _TETrace:
+        /\ \/ /\ j = i + 1
+              /\ i = TLCGet("level")
+        /\ delivered_flag  = _TETrace[i].delivered_flag
+        /\ delivered_flag' = _TETrace[j].delivered_flag
+        /\ flush_active  = _TETrace[i].flush_active
+        /\ flush_active' = _TETrace[j].flush_active
+        /\ wf_log  = _TETrace[i].wf_log
+        /\ wf_log' = _TETrace[j].wf_log
+        /\ ctr  = _TETrace[i].ctr
+        /\ ctr' = _TETrace[j].ctr
+        /\ pend_seq  = _TETrace[i].pend_seq
+        /\ pend_seq' = _TETrace[j].pend_seq
+        /\ buf  = _TETrace[i].buf
+        /\ buf' = _TETrace[j].buf
+        /\ pend  = _TETrace[i].pend
+        /\ pend' = _TETrace[j].pend
+        /\ wf_last  = _TETrace[i].wf_last
+        /\ wf_last' = _TETrace[j].wf_last
+        /\ conf_seq  = _TETrace[i].conf_seq
+        /\ conf_seq' = _TETrace[j].conf_seq
+
+\* Uncomment the ASSUME below to write the states of the error trace
+\* to the given file in Json format. Note that you can pass any tuple
+\* to `JsonSerialize`. For example, a sub-sequence of _TETrace.
+    \* ASSUME
+    \*     LET J == INSTANCE Json
+    \*         IN J!JsonSerialize("PubSubDedupTTL_TTrace_1775536996.json", _TETrace)
+
+=============================================================================
+
+ Note that you can extract this module `PubSubDedupTTL_TEExpression`
+  to a dedicated file to reuse `expression` (the module in the 
+  dedicated `PubSubDedupTTL_TEExpression.tla` file takes precedence 
+  over the module `PubSubDedupTTL_TEExpression` below).
+
+---- MODULE PubSubDedupTTL_TEExpression ----
+EXTENDS Sequences, TLCExt, Toolbox, Naturals, TLC, PubSubDedupTTL
+
+expression == 
+    [
+        \* To hide variables of the `PubSubDedupTTL` spec from the error trace,
+        \* remove the variables below.  The trace will be written in the order
+        \* of the fields of this record.
+        delivered_flag |-> delivered_flag
+        ,flush_active |-> flush_active
+        ,wf_log |-> wf_log
+        ,ctr |-> ctr
+        ,pend_seq |-> pend_seq
+        ,buf |-> buf
+        ,pend |-> pend
+        ,wf_last |-> wf_last
+        ,conf_seq |-> conf_seq
+        
+        \* Put additional constant-, state-, and action-level expressions here:
+        \* ,_stateNumber |-> _TEPosition
+        \* ,_delivered_flagUnchanged |-> delivered_flag = delivered_flag'
+        
+        \* Format the `delivered_flag` variable as Json value.
+        \* ,_delivered_flagJson |->
+        \*     LET J == INSTANCE Json
+        \*     IN J!ToJson(delivered_flag)
+        
+        \* Lastly, you may build expressions over arbitrary sets of states by
+        \* leveraging the _TETrace operator.  For example, this is how to
+        \* count the number of times a spec variable changed up to the current
+        \* state in the trace.
+        \* ,_delivered_flagModCount |->
+        \*     LET F[s \in DOMAIN _TETrace] ==
+        \*         IF s = 1 THEN 0
+        \*         ELSE IF _TETrace[s].delivered_flag # _TETrace[s-1].delivered_flag
+        \*             THEN 1 + F[s-1] ELSE F[s-1]
+        \*     IN F[_TEPosition - 1]
+    ]
+
+=============================================================================
+
+
+
+Parsing and semantic processing can take forever if the trace below is long.
+ In this case, it is advised to uncomment the module below to deserialize the
+ trace from a generated binary file.
+
+\*
+\*---- MODULE PubSubDedupTTL_TETrace ----
+\*EXTENDS IOUtils, TLC, PubSubDedupTTL
+\*
+\*trace == IODeserialize("PubSubDedupTTL_TTrace_1775536996.bin", TRUE)
+\*
+\*=============================================================================
+\*
+
+---- MODULE PubSubDedupTTL_TETrace ----
+EXTENDS TLC, PubSubDedupTTL
+
+trace == 
+    <<
+    ([ctr |-> [A |-> 0, B |-> 0],buf |-> [A |-> <<>>, B |-> <<>>],conf_seq |-> [A |-> 0, B |-> 0],pend_seq |-> [A |-> 0, B |-> 0],wf_last |-> [A |-> 0, B |-> 0],flush_active |-> [A |-> FALSE, B |-> FALSE],wf_log |-> <<>>,delivered_flag |-> [A |-> FALSE, B |-> FALSE],pend |-> [A |-> <<>>, B |-> <<>>]]),
+    ([ctr |-> [A |-> 1, B |-> 0],buf |-> [A |-> <<1>>, B |-> <<>>],conf_seq |-> [A |-> 0, B |-> 0],pend_seq |-> [A |-> 0, B |-> 0],wf_last |-> [A |-> 0, B |-> 0],flush_active |-> [A |-> FALSE, B |-> FALSE],wf_log |-> <<>>,delivered_flag |-> [A |-> FALSE, B |-> FALSE],pend |-> [A |-> <<>>, B |-> <<>>]]),
+    ([ctr |-> [A |-> 2, B |-> 0],buf |-> [A |-> <<1, 3>>, B |-> <<>>],conf_seq |-> [A |-> 0, B |-> 0],pend_seq |-> [A |-> 0, B |-> 0],wf_last |-> [A |-> 0, B |-> 0],flush_active |-> [A |-> FALSE, B |-> FALSE],wf_log |-> <<>>,delivered_flag |-> [A |-> FALSE, B |-> FALSE],pend |-> [A |-> <<>>, B |-> <<>>]]),
+    ([ctr |-> [A |-> 2, B |-> 0],buf |-> [A |-> <<>>, B |-> <<>>],conf_seq |-> [A |-> 0, B |-> 0],pend_seq |-> [A |-> 1, B |-> 0],wf_last |-> [A |-> 0, B |-> 0],flush_active |-> [A |-> TRUE, B |-> FALSE],wf_log |-> <<>>,delivered_flag |-> [A |-> FALSE, B |-> FALSE],pend |-> [A |-> <<1, 3>>, B |-> <<>>]]),
+    ([ctr |-> [A |-> 2, B |-> 0],buf |-> [A |-> <<>>, B |-> <<>>],conf_seq |-> [A |-> 0, B |-> 0],pend_seq |-> [A |-> 1, B |-> 0],wf_last |-> [A |-> 1, B |-> 0],flush_active |-> [A |-> TRUE, B |-> FALSE],wf_log |-> <<1, 3>>,delivered_flag |-> [A |-> TRUE, B |-> FALSE],pend |-> [A |-> <<1, 3>>, B |-> <<>>]]),
+    ([ctr |-> [A |-> 2, B |-> 0],buf |-> [A |-> <<>>, B |-> <<>>],conf_seq |-> [A |-> 0, B |-> 0],pend_seq |-> [A |-> 1, B |-> 0],wf_last |-> [A |-> 1, B |-> 0],flush_active |-> [A |-> FALSE, B |-> FALSE],wf_log |-> <<1, 3>>,delivered_flag |-> [A |-> TRUE, B |-> FALSE],pend |-> [A |-> <<1, 3>>, B |-> <<>>]]),
+    ([ctr |-> [A |-> 2, B |-> 0],buf |-> [A |-> <<>>, B |-> <<>>],conf_seq |-> [A |-> 0, B |-> 0],pend_seq |-> [A |-> 1, B |-> 0],wf_last |-> [A |-> 1, B |-> 0],flush_active |-> [A |-> TRUE, B |-> FALSE],wf_log |-> <<1, 3>>,delivered_flag |-> [A |-> FALSE, B |-> FALSE],pend |-> [A |-> <<1, 3>>, B |-> <<>>]]),
+    ([ctr |-> [A |-> 2, B |-> 0],buf |-> [A |-> <<>>, B |-> <<>>],conf_seq |-> [A |-> 0, B |-> 0],pend_seq |-> [A |-> 1, B |-> 0],wf_last |-> [A |-> 0, B |-> 0],flush_active |-> [A |-> TRUE, B |-> FALSE],wf_log |-> <<1, 3>>,delivered_flag |-> [A |-> FALSE, B |-> FALSE],pend |-> [A |-> <<1, 3>>, B |-> <<>>]]),
+    ([ctr |-> [A |-> 2, B |-> 0],buf |-> [A |-> <<>>, B |-> <<>>],conf_seq |-> [A |-> 0, B |-> 0],pend_seq |-> [A |-> 1, B |-> 0],wf_last |-> [A |-> 1, B |-> 0],flush_active |-> [A |-> TRUE, B |-> FALSE],wf_log |-> <<1, 3, 1, 3>>,delivered_flag |-> [A |-> TRUE, B |-> FALSE],pend |-> [A |-> <<1, 3>>, B |-> <<>>]])
+    >>
+----
+
+
+=============================================================================
+
+---- CONFIG PubSubDedupTTL_TTrace_1775536996 ----
+CONSTANTS
+    MaxItemsPerPub = 2
+
+INVARIANT
+    _inv
+
+CHECK_DEADLOCK
+    \* CHECK_DEADLOCK off because of PROPERTY or INVARIANT above.
+    FALSE
+
+INIT
+    _init
+
+NEXT
+    _next
+
+CONSTANT
+    _TETrace <- _trace
+
+ALIAS
+    _expression
+=============================================================================
+\* Generated on Mon Apr 06 21:43:16 PDT 2026
\ No newline at end of file
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedupTTL_Unsafe.cfg b/temporalio/contrib/pubsub/verification/PubSubDedupTTL_Unsafe.cfg
new file mode 100644
index 000000000..4420da7ef
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedupTTL_Unsafe.cfg
@@ -0,0 +1,13 @@
+\* Unsafe pruning: prune any publisher's dedup entry at any time.
+\* Should FAIL NoDuplicates — confirms that unbounded pruning is dangerous.
+
+SPECIFICATION UnsafeSpec
+
+CONSTANTS
+    MaxItemsPerPub = 2
+
+INVARIANTS
+    NoDuplicates
+
+CHECK_DEADLOCK
+    FALSE
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedup_TTrace_1775536362.bin b/temporalio/contrib/pubsub/verification/PubSubDedup_TTrace_1775536362.bin
new file mode 100644
index 000000000..e7461f615
Binary files /dev/null and b/temporalio/contrib/pubsub/verification/PubSubDedup_TTrace_1775536362.bin differ
diff --git a/temporalio/contrib/pubsub/verification/PubSubDedup_TTrace_1775536362.tla b/temporalio/contrib/pubsub/verification/PubSubDedup_TTrace_1775536362.tla
new file mode 100644
index 000000000..8fd999a5b
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/PubSubDedup_TTrace_1775536362.tla
@@ -0,0 +1,185 @@
+---- MODULE PubSubDedup_TTrace_1775536362 ----
+EXTENDS Sequences, TLCExt, PubSubDedup, Toolbox, Naturals, TLC
+
+_expression ==
+    LET PubSubDedup_TEExpression == INSTANCE PubSubDedup_TEExpression
+    IN PubSubDedup_TEExpression!expression
+----
+
+_trace ==
+    LET PubSubDedup_TETrace == INSTANCE PubSubDedup_TETrace
+    IN PubSubDedup_TETrace!trace
+----
+
+_inv ==
+    ~(
+        TLCGet("level") = Len(_TETrace)
+        /\
+        item_counter = (4)
+        /\
+        pending = (<<>>)
+        /\
+        pending_seq = (0)
+        /\
+        wf_last_seq = (1)
+        /\
+        delivered = (TRUE)
+        /\
+        flushing = (FALSE)
+        /\
+        buffer = (<<>>)
+        /\
+        wf_log = (<<1, 2, 3, 4>>)
+        /\
+        confirmed_seq = (1)
+    )
+----
+
+_init ==
+    /\ pending = _TETrace[1].pending
+    /\ wf_log = _TETrace[1].wf_log
+    /\ flushing = _TETrace[1].flushing
+    /\ pending_seq = _TETrace[1].pending_seq
+    /\ buffer = _TETrace[1].buffer
+    /\ item_counter = _TETrace[1].item_counter
+    /\ confirmed_seq = _TETrace[1].confirmed_seq
+    /\ wf_last_seq = _TETrace[1].wf_last_seq
+    /\ delivered = _TETrace[1].delivered
+----
+
+_next ==
+    /\ \E i,j \in DOMAIN _TETrace:
+        /\ \/ /\ j = i + 1
+              /\ i = TLCGet("level")
+        /\ pending  = _TETrace[i].pending
+        /\ pending' = _TETrace[j].pending
+        /\ wf_log  = _TETrace[i].wf_log
+        /\ wf_log' = _TETrace[j].wf_log
+        /\ flushing  = _TETrace[i].flushing
+        /\ flushing' = _TETrace[j].flushing
+        /\ pending_seq  = _TETrace[i].pending_seq
+        /\ pending_seq' = _TETrace[j].pending_seq
+        /\ buffer  = _TETrace[i].buffer
+        /\ buffer' = _TETrace[j].buffer
+        /\ item_counter  = _TETrace[i].item_counter
+        /\ item_counter' = _TETrace[j].item_counter
+        /\ confirmed_seq  = _TETrace[i].confirmed_seq
+        /\ confirmed_seq' = _TETrace[j].confirmed_seq
+        /\ wf_last_seq  = _TETrace[i].wf_last_seq
+        /\ wf_last_seq' = _TETrace[j].wf_last_seq
+        /\ delivered  = _TETrace[i].delivered
+        /\ delivered' = _TETrace[j].delivered
+
+\* Uncomment the ASSUME below to write the states of the error trace
+\* to the given file in Json format. Note that you can pass any tuple
+\* to `JsonSerialize`. For example, a sub-sequence of _TETrace.
+    \* ASSUME
+    \*     LET J == INSTANCE Json
+    \*         IN J!JsonSerialize("PubSubDedup_TTrace_1775536362.json", _TETrace)
+
+=============================================================================
+
+ Note that you can extract this module `PubSubDedup_TEExpression`
+  to a dedicated file to reuse `expression` (the module in the 
+  dedicated `PubSubDedup_TEExpression.tla` file takes precedence 
+  over the module `PubSubDedup_TEExpression` below).
+
+---- MODULE PubSubDedup_TEExpression ----
+EXTENDS Sequences, TLCExt, PubSubDedup, Toolbox, Naturals, TLC
+
+expression == 
+    [
+        \* To hide variables of the `PubSubDedup` spec from the error trace,
+        \* remove the variables below.  The trace will be written in the order
+        \* of the fields of this record.
+        pending |-> pending
+        ,wf_log |-> wf_log
+        ,flushing |-> flushing
+        ,pending_seq |-> pending_seq
+        ,buffer |-> buffer
+        ,item_counter |-> item_counter
+        ,confirmed_seq |-> confirmed_seq
+        ,wf_last_seq |-> wf_last_seq
+        ,delivered |-> delivered
+        
+        \* Put additional constant-, state-, and action-level expressions here:
+        \* ,_stateNumber |-> _TEPosition
+        \* ,_pendingUnchanged |-> pending = pending'
+        
+        \* Format the `pending` variable as Json value.
+        \* ,_pendingJson |->
+        \*     LET J == INSTANCE Json
+        \*     IN J!ToJson(pending)
+        
+        \* Lastly, you may build expressions over arbitrary sets of states by
+        \* leveraging the _TETrace operator.  For example, this is how to
+        \* count the number of times a spec variable changed up to the current
+        \* state in the trace.
+        \* ,_pendingModCount |->
+        \*     LET F[s \in DOMAIN _TETrace] ==
+        \*         IF s = 1 THEN 0
+        \*         ELSE IF _TETrace[s].pending # _TETrace[s-1].pending
+        \*             THEN 1 + F[s-1] ELSE F[s-1]
+        \*     IN F[_TEPosition - 1]
+    ]
+
+=============================================================================
+
+
+
+Parsing and semantic processing can take forever if the trace below is long.
+ In this case, it is advised to uncomment the module below to deserialize the
+ trace from a generated binary file.
+
+\*
+\*---- MODULE PubSubDedup_TETrace ----
+\*EXTENDS IOUtils, PubSubDedup, TLC
+\*
+\*trace == IODeserialize("PubSubDedup_TTrace_1775536362.bin", TRUE)
+\*
+\*=============================================================================
+\*
+
+---- MODULE PubSubDedup_TETrace ----
+EXTENDS PubSubDedup, TLC
+
+trace == 
+    <<
+    ([item_counter |-> 0,pending |-> <<>>,pending_seq |-> 0,wf_last_seq |-> 0,delivered |-> FALSE,flushing |-> FALSE,buffer |-> <<>>,wf_log |-> <<>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 1,pending |-> <<>>,pending_seq |-> 0,wf_last_seq |-> 0,delivered |-> FALSE,flushing |-> FALSE,buffer |-> <<1>>,wf_log |-> <<>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 2,pending |-> <<>>,pending_seq |-> 0,wf_last_seq |-> 0,delivered |-> FALSE,flushing |-> FALSE,buffer |-> <<1, 2>>,wf_log |-> <<>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 3,pending |-> <<>>,pending_seq |-> 0,wf_last_seq |-> 0,delivered |-> FALSE,flushing |-> FALSE,buffer |-> <<1, 2, 3>>,wf_log |-> <<>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 4,pending |-> <<>>,pending_seq |-> 0,wf_last_seq |-> 0,delivered |-> FALSE,flushing |-> FALSE,buffer |-> <<1, 2, 3, 4>>,wf_log |-> <<>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 4,pending |-> <<1, 2, 3, 4>>,pending_seq |-> 1,wf_last_seq |-> 0,delivered |-> FALSE,flushing |-> TRUE,buffer |-> <<>>,wf_log |-> <<>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 4,pending |-> <<1, 2, 3, 4>>,pending_seq |-> 1,wf_last_seq |-> 1,delivered |-> TRUE,flushing |-> TRUE,buffer |-> <<>>,wf_log |-> <<1, 2, 3, 4>>,confirmed_seq |-> 0]),
+    ([item_counter |-> 4,pending |-> <<>>,pending_seq |-> 0,wf_last_seq |-> 1,delivered |-> TRUE,flushing |-> FALSE,buffer |-> <<>>,wf_log |-> <<1, 2, 3, 4>>,confirmed_seq |-> 1])
+    >>
+----
+
+
+=============================================================================
+
+---- CONFIG PubSubDedup_TTrace_1775536362 ----
+CONSTANTS
+    MaxItems = 4
+
+INVARIANT
+    _inv
+
+CHECK_DEADLOCK
+    \* CHECK_DEADLOCK off because of PROPERTY or INVARIANT above.
+    FALSE
+
+INIT
+    _init
+
+NEXT
+    _next
+
+CONSTANT
+    _TETrace <- _trace
+
+ALIAS
+    _expression
+=============================================================================
+\* Generated on Mon Apr 06 21:32:43 PDT 2026
\ No newline at end of file
diff --git a/temporalio/contrib/pubsub/verification/README.md b/temporalio/contrib/pubsub/verification/README.md
new file mode 100644
index 000000000..0a6a3d50c
--- /dev/null
+++ b/temporalio/contrib/pubsub/verification/README.md
@@ -0,0 +1,52 @@
+# Pub/Sub Dedup Verification
+
+TLA+ specifications for the exactly-once delivery protocol.
+See [PROOF.md](./PROOF.md) for the full correctness argument.
+
+## Files
+
+| File | Purpose |
+|---|---|
+| `PubSubDedup.tla` | Correct algorithm — bounded model checking (safety + liveness) |
+| `PubSubDedupInductive.tla` | Strengthened invariant — reachable-state verification + informal induction argument |
+| `PubSubDedupTTL.tla` | Multi-publisher + TTL pruning (safe vs unsafe) |
+| `PubSubDedupBroken.tla` | Old (broken) algorithm — TLC finds the duplicate bug |
+| `PROOF.md` | Full proof: invariant, order preservation, TTL safety, counterexamples |
+
+## Verified Properties
+
+| Property | Type | Spec |
+|---|---|---|
+| NoDuplicates | safety | all specs |
+| OrderPreserved | safety | single-publisher |
+| OrderPreservedPerPublisher | safety | multi-publisher |
+| AllItemsDelivered | liveness | all specs (under fairness) |
+| TTL safe pruning | safety | PubSubDedupTTL |
+
+## Running
+
+```bash
+curl -sL -o /tmp/tla2tools.jar \
+  https://github.com/tlaplus/tlaplus/releases/download/v1.8.0/tla2tools.jar
+
+# Single-publisher bounded model checking
+java -cp /tmp/tla2tools.jar tlc2.TLC PubSubDedup -workers auto
+
+# Inductive invariant (unbounded)
+java -cp /tmp/tla2tools.jar tlc2.TLC PubSubDedupInductive -workers auto
+
+# Multi-publisher base protocol
+java -cp /tmp/tla2tools.jar tlc2.TLC PubSubDedupTTL \
+  -config PubSubDedupTTL_Base.cfg -workers auto
+
+# TTL unsafe pruning (should FAIL)
+java -cp /tmp/tla2tools.jar tlc2.TLC PubSubDedupTTL \
+  -config PubSubDedupTTL_Unsafe.cfg -workers auto
+
+# TTL safe pruning (should PASS)
+java -cp /tmp/tla2tools.jar tlc2.TLC PubSubDedupTTL \
+  -config PubSubDedupTTL_Safe.cfg -workers auto
+
+# Broken algorithm (should FAIL)
+java -cp /tmp/tla2tools.jar tlc2.TLC PubSubDedupBroken -workers auto
+```
diff --git a/tests/contrib/google_adk_agents/test_adk_streaming.py b/tests/contrib/google_adk_agents/test_adk_streaming.py
new file mode 100644
index 000000000..a6c964544
--- /dev/null
+++ b/tests/contrib/google_adk_agents/test_adk_streaming.py
@@ -0,0 +1,198 @@
+"""Integration tests for ADK streaming support.
+
+Verifies that the streaming model activity publishes TEXT_DELTA events via
+PubSubMixin and that non-streaming mode remains backward-compatible.
+"""
+
+import asyncio
+import json
+import logging
+import uuid
+from collections.abc import AsyncGenerator
+from datetime import timedelta
+
+import pytest
+from google.adk import Agent
+from google.adk.models import BaseLlm, LLMRegistry
+from google.adk.models.llm_request import LlmRequest
+from google.adk.models.llm_response import LlmResponse
+from google.adk.runners import InMemoryRunner
+from google.genai.types import Content, Part
+
+from temporalio import workflow
+from temporalio.client import Client
+from temporalio.contrib.google_adk_agents import GoogleAdkPlugin, TemporalModel
+from temporalio.contrib.pubsub import PubSubClient, PubSubMixin
+from temporalio.worker import Worker
+
+logger = logging.getLogger(__name__)
+
+
+class StreamingTestModel(BaseLlm):
+    """Test model that yields multiple partial responses to simulate streaming."""
+
+    @classmethod
+    def supported_models(cls) -> list[str]:
+        return ["streaming_test_model"]
+
+    async def generate_content_async(
+        self, llm_request: LlmRequest, stream: bool = False
+    ) -> AsyncGenerator[LlmResponse, None]:
+        yield LlmResponse(
+            content=Content(role="model", parts=[Part(text="Hello ")])
+        )
+        yield LlmResponse(
+            content=Content(role="model", parts=[Part(text="world!")])
+        )
+
+
+@workflow.defn
+class StreamingAdkWorkflow(PubSubMixin):
+    """Test workflow that uses streaming TemporalModel with PubSubMixin."""
+
+    @workflow.init
+    def __init__(self, prompt: str) -> None:
+        self.init_pubsub()
+
+    @workflow.run
+    async def run(self, prompt: str) -> str:
+        model = TemporalModel("streaming_test_model", streaming=True)
+        agent = Agent(
+            name="test_agent",
+            model=model,
+            instruction="You are a test agent.",
+        )
+
+        runner = InMemoryRunner(agent=agent, app_name="test-app")
+        session = await runner.session_service.create_session(
+            app_name="test-app", user_id="test"
+        )
+
+        final_text = ""
+        async for event in runner.run_async(
+            user_id="test",
+            session_id=session.id,
+            new_message=Content(role="user", parts=[Part(text=prompt)]),
+        ):
+            if event.content and event.content.parts:
+                for part in event.content.parts:
+                    if part.text:
+                        final_text = part.text
+
+        return final_text
+
+
+@workflow.defn
+class NonStreamingAdkWorkflow:
+    """Test workflow without streaming -- verifies backward compatibility."""
+
+    @workflow.run
+    async def run(self, prompt: str) -> str:
+        model = TemporalModel("streaming_test_model", streaming=False)
+        agent = Agent(
+            name="test_agent",
+            model=model,
+            instruction="You are a test agent.",
+        )
+
+        runner = InMemoryRunner(agent=agent, app_name="test-app")
+        session = await runner.session_service.create_session(
+            app_name="test-app", user_id="test"
+        )
+
+        final_text = ""
+        async for event in runner.run_async(
+            user_id="test",
+            session_id=session.id,
+            new_message=Content(role="user", parts=[Part(text=prompt)]),
+        ):
+            if event.content and event.content.parts:
+                for part in event.content.parts:
+                    if part.text:
+                        final_text = part.text
+
+        return final_text
+
+
+@pytest.mark.asyncio
+async def test_streaming_publishes_events(client: Client):
+    """Verify that streaming activity publishes TEXT_DELTA events via pubsub."""
+    LLMRegistry.register(StreamingTestModel)
+
+    new_config = client.config()
+    new_config["plugins"] = [GoogleAdkPlugin()]
+    client = Client(**new_config)
+
+    workflow_id = f"adk-streaming-test-{uuid.uuid4()}"
+
+    async with Worker(
+        client,
+        task_queue="adk-streaming-test",
+        workflows=[StreamingAdkWorkflow],
+        max_cached_workflows=0,
+    ):
+        handle = await client.start_workflow(
+            StreamingAdkWorkflow.run,
+            "Hello",
+            id=workflow_id,
+            task_queue="adk-streaming-test",
+            execution_timeout=timedelta(seconds=30),
+        )
+
+        # Subscribe concurrently while the workflow is running
+        pubsub = PubSubClient.create(client, workflow_id)
+        events: list[dict] = []
+
+        async def collect_events() -> None:
+            async for item in pubsub.subscribe(
+                ["events"], from_offset=0, poll_cooldown=0.05
+            ):
+                event = json.loads(item.data)
+                events.append(event)
+                if event["type"] == "LLM_CALL_COMPLETE":
+                    break
+
+        collect_task = asyncio.create_task(collect_events())
+        result = await handle.result()
+
+        # Wait for event collection with a timeout
+        await asyncio.wait_for(collect_task, timeout=10.0)
+
+    assert result is not None
+
+    event_types = [e["type"] for e in events]
+    assert "LLM_CALL_START" in event_types, f"Expected LLM_CALL_START, got: {event_types}"
+    assert "TEXT_DELTA" in event_types, f"Expected TEXT_DELTA, got: {event_types}"
+    assert "LLM_CALL_COMPLETE" in event_types, (
+        f"Expected LLM_CALL_COMPLETE, got: {event_types}"
+    )
+
+    text_deltas = [e["data"]["delta"] for e in events if e["type"] == "TEXT_DELTA"]
+    assert len(text_deltas) >= 1, f"Expected at least 1 TEXT_DELTA, got: {text_deltas}"
+
+
+@pytest.mark.asyncio
+async def test_non_streaming_backward_compatible(client: Client):
+    """Verify non-streaming mode still works (backward compatibility)."""
+    LLMRegistry.register(StreamingTestModel)
+
+    new_config = client.config()
+    new_config["plugins"] = [GoogleAdkPlugin()]
+    client = Client(**new_config)
+
+    async with Worker(
+        client,
+        task_queue="adk-non-streaming-test",
+        workflows=[NonStreamingAdkWorkflow],
+        max_cached_workflows=0,
+    ):
+        handle = await client.start_workflow(
+            NonStreamingAdkWorkflow.run,
+            "Hello",
+            id=f"adk-non-streaming-test-{uuid.uuid4()}",
+            task_queue="adk-non-streaming-test",
+            execution_timeout=timedelta(seconds=30),
+        )
+        result = await handle.result()
+
+    assert result is not None
diff --git a/tests/contrib/openai_agents/test_openai_streaming.py b/tests/contrib/openai_agents/test_openai_streaming.py
new file mode 100644
index 000000000..ca90eb3f3
--- /dev/null
+++ b/tests/contrib/openai_agents/test_openai_streaming.py
@@ -0,0 +1,287 @@
+"""Integration tests for OpenAI Agents streaming support.
+
+Verifies that the streaming model activity publishes TEXT_DELTA events via
+PubSubMixin and that the workflow returns the correct final result.
+"""
+
+import asyncio
+import json
+import logging
+import uuid
+from collections.abc import AsyncIterator
+from datetime import timedelta
+from typing import Any
+
+import pytest
+from agents import (
+    Agent,
+    AgentOutputSchemaBase,
+    Handoff,
+    Model,
+    ModelResponse,
+    ModelSettings,
+    ModelTracing,
+    Runner,
+    Tool,
+    TResponseInputItem,
+    Usage,
+)
+from agents.items import TResponseStreamEvent
+from openai.types.responses import (
+    Response,
+    ResponseCompletedEvent,
+    ResponseOutputMessage,
+    ResponseOutputText,
+    ResponseTextDeltaEvent,
+)
+
+from temporalio import workflow
+from temporalio.client import Client
+from temporalio.contrib.openai_agents import ModelActivityParameters
+from temporalio.contrib.openai_agents.testing import AgentEnvironment
+from temporalio.contrib.pubsub import PubSubClient, PubSubMixin
+from tests.helpers import new_worker
+
+logger = logging.getLogger(__name__)
+
+
+class StreamingTestModel(Model):
+    """Test model that yields text deltas followed by a ResponseCompletedEvent."""
+
+    __test__ = False
+
+    async def get_response(
+        self,
+        system_instructions: str | None,
+        input: str | list[TResponseInputItem],
+        model_settings: ModelSettings,
+        tools: list[Tool],
+        output_schema: AgentOutputSchemaBase | None,
+        handoffs: list[Handoff],
+        tracing: ModelTracing,
+        **kwargs: Any,
+    ) -> ModelResponse:
+        return ModelResponse(
+            output=[
+                ResponseOutputMessage(
+                    id="msg_test",
+                    content=[
+                        ResponseOutputText(
+                            text="Hello world!",
+                            annotations=[],
+                            type="output_text",
+                            logprobs=[],
+                        )
+                    ],
+                    role="assistant",
+                    status="completed",
+                    type="message",
+                )
+            ],
+            usage=Usage(),
+            response_id=None,
+        )
+
+    async def stream_response(
+        self,
+        system_instructions: str | None,
+        input: str | list[TResponseInputItem],
+        model_settings: ModelSettings,
+        tools: list[Tool],
+        output_schema: AgentOutputSchemaBase | None,
+        handoffs: list[Handoff],
+        tracing: ModelTracing,
+        **kwargs: Any,
+    ) -> AsyncIterator[TResponseStreamEvent]:
+        # Yield text deltas
+        yield ResponseTextDeltaEvent(
+            content_index=0,
+            delta="Hello ",
+            item_id="item1",
+            output_index=0,
+            sequence_number=0,
+            type="response.output_text.delta",
+            logprobs=[],
+        )
+        yield ResponseTextDeltaEvent(
+            content_index=0,
+            delta="world!",
+            item_id="item1",
+            output_index=0,
+            sequence_number=1,
+            type="response.output_text.delta",
+            logprobs=[],
+        )
+
+        # Yield the final completed event
+        response = Response(
+            id="resp_test",
+            created_at=0,
+            error=None,
+            incomplete_details=None,
+            instructions=None,
+            metadata={},
+            model="test",
+            object="response",
+            output=[
+                ResponseOutputMessage(
+                    id="msg_test",
+                    content=[
+                        ResponseOutputText(
+                            text="Hello world!",
+                            annotations=[],
+                            type="output_text",
+                            logprobs=[],
+                        )
+                    ],
+                    role="assistant",
+                    status="completed",
+                    type="message",
+                )
+            ],
+            parallel_tool_calls=True,
+            temperature=1.0,
+            tool_choice="auto",
+            tools=[],
+            top_p=1.0,
+            status="completed",
+            text={"format": {"type": "text"}},
+            truncation="disabled",
+            usage={
+                "input_tokens": 10,
+                "output_tokens": 5,
+                "total_tokens": 15,
+                "input_tokens_details": {"cached_tokens": 0},
+                "output_tokens_details": {"reasoning_tokens": 0},
+            },
+        )
+        yield ResponseCompletedEvent(
+            response=response, sequence_number=2, type="response.completed"
+        )
+
+
+@workflow.defn
+class StreamingOpenAIWorkflow(PubSubMixin):
+    """Test workflow that uses streaming model activity with PubSubMixin."""
+
+    @workflow.init
+    def __init__(self, prompt: str) -> None:
+        self.init_pubsub()
+
+    @workflow.run
+    async def run(self, prompt: str) -> str:
+        agent = Agent[None](
+            name="Assistant",
+            instructions="You are a test agent.",
+        )
+        result = await Runner.run(starting_agent=agent, input=prompt)
+        return result.final_output
+
+
+@workflow.defn
+class NonStreamingOpenAIWorkflow:
+    """Test workflow without streaming -- verifies backward compatibility."""
+
+    @workflow.run
+    async def run(self, prompt: str) -> str:
+        agent = Agent[None](
+            name="Assistant",
+            instructions="You are a test agent.",
+        )
+        result = await Runner.run(starting_agent=agent, input=prompt)
+        return result.final_output
+
+
+@pytest.mark.asyncio
+async def test_streaming_publishes_events(client: Client):
+    """Verify that streaming activity publishes TEXT_DELTA events via pubsub."""
+    model = StreamingTestModel()
+    async with AgentEnvironment(
+        model=model,
+        model_params=ModelActivityParameters(
+            start_to_close_timeout=timedelta(seconds=30),
+            enable_streaming=True,
+        ),
+    ) as env:
+        client = env.applied_on_client(client)
+
+        workflow_id = f"openai-streaming-test-{uuid.uuid4()}"
+
+        async with new_worker(
+            client,
+            StreamingOpenAIWorkflow,
+            max_cached_workflows=0,
+        ) as worker:
+            handle = await client.start_workflow(
+                StreamingOpenAIWorkflow.run,
+                "Hello",
+                id=workflow_id,
+                task_queue=worker.task_queue,
+                execution_timeout=timedelta(seconds=30),
+            )
+
+            # Subscribe concurrently while the workflow is running
+            pubsub = PubSubClient.create(client, workflow_id)
+            events: list[dict] = []
+
+            async def collect_events() -> None:
+                async for item in pubsub.subscribe(
+                    ["events"], from_offset=0, poll_cooldown=0.05
+                ):
+                    event = json.loads(item.data)
+                    events.append(event)
+                    if event["type"] == "LLM_CALL_COMPLETE":
+                        break
+
+            collect_task = asyncio.create_task(collect_events())
+            result = await handle.result()
+
+            # Wait for event collection with a timeout
+            await asyncio.wait_for(collect_task, timeout=10.0)
+
+    assert result is not None
+
+    event_types = [e["type"] for e in events]
+    assert "LLM_CALL_START" in event_types, (
+        f"Expected LLM_CALL_START, got: {event_types}"
+    )
+    assert "TEXT_DELTA" in event_types, (
+        f"Expected TEXT_DELTA, got: {event_types}"
+    )
+    assert "LLM_CALL_COMPLETE" in event_types, (
+        f"Expected LLM_CALL_COMPLETE, got: {event_types}"
+    )
+
+    text_deltas = [e["data"]["delta"] for e in events if e["type"] == "TEXT_DELTA"]
+    assert len(text_deltas) >= 1, f"Expected at least 1 TEXT_DELTA, got: {text_deltas}"
+    assert "Hello " in text_deltas
+    assert "world!" in text_deltas
+
+
+@pytest.mark.asyncio
+async def test_non_streaming_backward_compatible(client: Client):
+    """Verify non-streaming mode still works (backward compatibility)."""
+    model = StreamingTestModel()
+    async with AgentEnvironment(
+        model=model,
+        model_params=ModelActivityParameters(
+            start_to_close_timeout=timedelta(seconds=30),
+            enable_streaming=False,
+        ),
+    ) as env:
+        client = env.applied_on_client(client)
+
+        async with new_worker(
+            client,
+            NonStreamingOpenAIWorkflow,
+            max_cached_workflows=0,
+        ) as worker:
+            result = await client.execute_workflow(
+                NonStreamingOpenAIWorkflow.run,
+                "Hello",
+                id=f"openai-non-streaming-test-{uuid.uuid4()}",
+                task_queue=worker.task_queue,
+                execution_timeout=timedelta(seconds=30),
+            )
+
+    assert result == "Hello world!"
diff --git a/tests/contrib/pubsub/__init__.py b/tests/contrib/pubsub/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/contrib/pubsub/test_pubsub.py b/tests/contrib/pubsub/test_pubsub.py
new file mode 100644
index 000000000..e0154035e
--- /dev/null
+++ b/tests/contrib/pubsub/test_pubsub.py
@@ -0,0 +1,1333 @@
+"""E2E integration tests for temporalio.contrib.pubsub."""
+
+from __future__ import annotations
+
+import asyncio
+import uuid
+from datetime import timedelta
+
+import pytest
+
+from typing import Any
+
+from dataclasses import dataclass
+
+from temporalio import activity, workflow
+from temporalio.client import Client
+from temporalio.contrib.pubsub import (
+    PollInput,
+    PollResult,
+    PubSubClient,
+    PubSubItem,
+    PubSubMixin,
+    PubSubState,
+    PublishEntry,
+    PublishInput,
+)
+from temporalio.contrib.pubsub._types import encode_data
+from tests.helpers import assert_eq_eventually, new_worker
+
+
+# ---------------------------------------------------------------------------
+# Test workflows (must be module-level, not local classes)
+# ---------------------------------------------------------------------------
+
+
+@workflow.defn
+class BasicPubSubWorkflow(PubSubMixin):
+    @workflow.init
+    def __init__(self) -> None:
+        self.init_pubsub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self) -> None:
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class ActivityPublishWorkflow(PubSubMixin):
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.init_pubsub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        await workflow.execute_activity(
+            "publish_items",
+            count,
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+        self.publish("status", b"activity_done")
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class WorkflowSidePublishWorkflow(PubSubMixin):
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.init_pubsub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        for i in range(count):
+            self.publish("events", f"item-{i}".encode())
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class MultiTopicWorkflow(PubSubMixin):
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.init_pubsub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        await workflow.execute_activity(
+            "publish_multi_topic",
+            count,
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class InterleavedWorkflow(PubSubMixin):
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.init_pubsub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        self.publish("status", b"started")
+        await workflow.execute_activity(
+            "publish_items",
+            count,
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+        self.publish("status", b"done")
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class PriorityWorkflow(PubSubMixin):
+    @workflow.init
+    def __init__(self) -> None:
+        self.init_pubsub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self) -> None:
+        await workflow.execute_activity(
+            "publish_with_priority",
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class FlushOnExitWorkflow(PubSubMixin):
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.init_pubsub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        await workflow.execute_activity(
+            "publish_batch_test",
+            count,
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class MaxBatchWorkflow(PubSubMixin):
+    @workflow.init
+    def __init__(self, count: int) -> None:
+        self.init_pubsub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.run
+    async def run(self, count: int) -> None:
+        await workflow.execute_activity(
+            "publish_with_max_batch",
+            count,
+            start_to_close_timeout=timedelta(seconds=30),
+            heartbeat_timeout=timedelta(seconds=10),
+        )
+        self.publish("status", b"activity_done")
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class MixinCoexistenceWorkflow(PubSubMixin):
+    @workflow.init
+    def __init__(self) -> None:
+        self.init_pubsub()
+        self._app_data: list[str] = []
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.signal
+    def app_signal(self, value: str) -> None:
+        self._app_data.append(value)
+
+    @workflow.query
+    def app_query(self) -> list[str]:
+        return self._app_data
+
+    @workflow.run
+    async def run(self) -> None:
+        await workflow.wait_condition(lambda: self._closed)
+
+
+# ---------------------------------------------------------------------------
+# Activities
+# ---------------------------------------------------------------------------
+
+
+@activity.defn(name="publish_items")
+async def publish_items(count: int) -> None:
+    client = PubSubClient.create(batch_interval=0.5)
+    async with client:
+        for i in range(count):
+            activity.heartbeat()
+            client.publish("events", f"item-{i}".encode())
+
+
+@activity.defn(name="publish_multi_topic")
+async def publish_multi_topic(count: int) -> None:
+    topics = ["a", "b", "c"]
+    client = PubSubClient.create(batch_interval=0.5)
+    async with client:
+        for i in range(count):
+            activity.heartbeat()
+            topic = topics[i % len(topics)]
+            client.publish(topic, f"{topic}-{i}".encode())
+
+
+@activity.defn(name="publish_with_priority")
+async def publish_with_priority() -> None:
+    client = PubSubClient.create(batch_interval=60.0)
+    async with client:
+        client.publish("events", b"normal-0")
+        client.publish("events", b"normal-1")
+        client.publish("events", b"priority", priority=True)
+        # Give the flusher time to wake and flush
+        await asyncio.sleep(0.5)
+
+
+@activity.defn(name="publish_batch_test")
+async def publish_batch_test(count: int) -> None:
+    client = PubSubClient.create(batch_interval=60.0)
+    async with client:
+        for i in range(count):
+            activity.heartbeat()
+            client.publish("events", f"item-{i}".encode())
+
+
+@activity.defn(name="publish_with_max_batch")
+async def publish_with_max_batch(count: int) -> None:
+    client = PubSubClient.create(batch_interval=60.0, max_batch_size=3)
+    async with client:
+        for i in range(count):
+            activity.heartbeat()
+            client.publish("events", f"item-{i}".encode())
+        # Long batch_interval ensures only max_batch_size triggers flushes
+        # Context manager exit flushes any remainder
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _is_different_run(old_handle, new_handle) -> bool:
+    """Check if new_handle points to a different run than old_handle."""
+    try:
+        desc = await new_handle.describe()
+        return desc.run_id != old_handle.result_run_id
+    except Exception:
+        return False
+
+
+async def collect_items(
+    handle,
+    topics: list[str] | None,
+    from_offset: int,
+    expected_count: int,
+    timeout: float = 15.0,
+) -> list[PubSubItem]:
+    """Subscribe and collect exactly expected_count items, with timeout."""
+    client = PubSubClient(handle)
+    items: list[PubSubItem] = []
+    try:
+        async with asyncio.timeout(timeout):
+            async for item in client.subscribe(
+                topics=topics, from_offset=from_offset, poll_cooldown=0
+            ):
+                items.append(item)
+                if len(items) >= expected_count:
+                    break
+    except asyncio.TimeoutError:
+        pass
+    return items
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_activity_publish_and_subscribe(client: Client) -> None:
+    """Activity publishes items, external client subscribes and receives them."""
+    count = 10
+    async with new_worker(
+        client,
+        ActivityPublishWorkflow,
+        activities=[publish_items],
+    ) as worker:
+        handle = await client.start_workflow(
+            ActivityPublishWorkflow.run,
+            count,
+            id=f"pubsub-basic-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+        # Collect activity items + the "activity_done" status item
+        items = await collect_items(handle, None, 0, count + 1)
+        assert len(items) == count + 1
+
+        # Check activity items
+        for i in range(count):
+            assert items[i].topic == "events"
+            assert items[i].data == f"item-{i}".encode()
+
+        # Check workflow-side status item
+        assert items[count].topic == "status"
+        assert items[count].data == b"activity_done"
+
+        await handle.signal(ActivityPublishWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_topic_filtering(client: Client) -> None:
+    """Publish to multiple topics, subscribe with filter."""
+    count = 9  # 3 per topic
+    async with new_worker(
+        client,
+        MultiTopicWorkflow,
+        activities=[publish_multi_topic],
+    ) as worker:
+        handle = await client.start_workflow(
+            MultiTopicWorkflow.run,
+            count,
+            id=f"pubsub-filter-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Subscribe to topic "a" only — should get 3 items
+        a_items = await collect_items(handle, ["a"], 0, 3)
+        assert len(a_items) == 3
+        assert all(item.topic == "a" for item in a_items)
+
+        # Subscribe to ["a", "c"] — should get 6 items
+        ac_items = await collect_items(handle, ["a", "c"], 0, 6)
+        assert len(ac_items) == 6
+        assert all(item.topic in ("a", "c") for item in ac_items)
+
+        # Subscribe to all (None) — should get all 9
+        all_items = await collect_items(handle, None, 0, 9)
+        assert len(all_items) == 9
+
+        await handle.signal(MultiTopicWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_subscribe_from_offset(client: Client) -> None:
+    """Subscribe from a non-zero offset."""
+    count = 5
+    async with new_worker(
+        client,
+        WorkflowSidePublishWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            WorkflowSidePublishWorkflow.run,
+            count,
+            id=f"pubsub-offset-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Subscribe from offset 3 — should get items 3, 4
+        items = await collect_items(handle, None, 3, 2)
+        assert len(items) == 2
+        assert items[0].data == b"item-3"
+        assert items[1].data == b"item-4"
+
+        # Subscribe from offset 0 — should get all 5
+        all_items = await collect_items(handle, None, 0, 5)
+        assert len(all_items) == 5
+
+        await handle.signal(WorkflowSidePublishWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_per_item_offsets(client: Client) -> None:
+    """Each yielded PubSubItem carries its correct global offset."""
+    count = 5
+    async with new_worker(
+        client,
+        WorkflowSidePublishWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            WorkflowSidePublishWorkflow.run,
+            count,
+            id=f"pubsub-item-offset-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        items = await collect_items(handle, None, 0, count)
+        assert len(items) == count
+        for i, item in enumerate(items):
+            assert item.offset == i, f"item {i} has offset {item.offset}"
+
+        # Subscribe from offset 3 — offsets should be 3, 4
+        later_items = await collect_items(handle, None, 3, 2)
+        assert len(later_items) == 2
+        assert later_items[0].offset == 3
+        assert later_items[1].offset == 4
+
+        await handle.signal(WorkflowSidePublishWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_per_item_offsets_with_topic_filter(client: Client) -> None:
+    """Per-item offsets are global (not per-topic) even when filtering."""
+    count = 9  # 3 per topic (a, b, c round-robin)
+    async with new_worker(
+        client,
+        MultiTopicWorkflow,
+        activities=[publish_multi_topic],
+    ) as worker:
+        handle = await client.start_workflow(
+            MultiTopicWorkflow.run,
+            count,
+            id=f"pubsub-item-offset-filter-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Subscribe to topic "a" only — items are at global offsets 0, 3, 6
+        a_items = await collect_items(handle, ["a"], 0, 3)
+        assert len(a_items) == 3
+        assert a_items[0].offset == 0
+        assert a_items[1].offset == 3
+        assert a_items[2].offset == 6
+
+        # Subscribe to topic "b" — items are at global offsets 1, 4, 7
+        b_items = await collect_items(handle, ["b"], 0, 3)
+        assert len(b_items) == 3
+        assert b_items[0].offset == 1
+        assert b_items[1].offset == 4
+        assert b_items[2].offset == 7
+
+        await handle.signal(MultiTopicWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_per_item_offsets_after_truncation(client: Client) -> None:
+    """Per-item offsets remain correct after log truncation."""
+    async with new_worker(
+        client,
+        TruncateSignalWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            TruncateSignalWorkflow.run,
+            id=f"pubsub-item-offset-trunc-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish 5 items
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(items=[
+                PublishEntry(topic="events", data=encode_data(f"item-{i}".encode()))
+                for i in range(5)
+            ]),
+        )
+        await asyncio.sleep(0.5)
+
+        # Truncate up to offset 3
+        await handle.signal("truncate", 3)
+        await asyncio.sleep(0.3)
+
+        # Items 3, 4 should have offsets 3, 4
+        items = await collect_items(handle, None, 3, 2)
+        assert len(items) == 2
+        assert items[0].offset == 3
+        assert items[1].offset == 4
+
+        await handle.signal("close")
+
+
+@pytest.mark.asyncio
+async def test_poll_truncated_offset_returns_application_error(client: Client) -> None:
+    """Polling a truncated offset raises ApplicationError (not ValueError)
+    and does not crash the workflow task."""
+    async with new_worker(
+        client,
+        TruncateSignalWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            TruncateSignalWorkflow.run,
+            id=f"pubsub-trunc-error-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish 5 items
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(items=[
+                PublishEntry(topic="events", data=encode_data(f"item-{i}".encode()))
+                for i in range(5)
+            ]),
+        )
+        await asyncio.sleep(0.5)
+
+        # Truncate up to offset 3
+        await handle.signal("truncate", 3)
+        await asyncio.sleep(0.3)
+
+        # Poll from offset 1 (truncated) — should get ApplicationError,
+        # NOT crash the workflow task.
+        from temporalio.client import WorkflowUpdateFailedError
+        with pytest.raises(WorkflowUpdateFailedError):
+            await handle.execute_update(
+                "__pubsub_poll",
+                PollInput(topics=[], from_offset=1),
+                result_type=PollResult,
+            )
+
+        # Workflow should still be usable — poll from valid offset 3
+        items = await collect_items(handle, None, 3, 2)
+        assert len(items) == 2
+        assert items[0].offset == 3
+
+        await handle.signal("close")
+
+
+@pytest.mark.asyncio
+async def test_poll_offset_zero_after_truncation(client: Client) -> None:
+    """Polling from offset 0 after truncation returns items from base_offset."""
+    async with new_worker(
+        client,
+        TruncateSignalWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            TruncateSignalWorkflow.run,
+            id=f"pubsub-trunc-zero-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish 5 items, truncate first 3
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(items=[
+                PublishEntry(topic="events", data=encode_data(f"item-{i}".encode()))
+                for i in range(5)
+            ]),
+        )
+        await asyncio.sleep(0.5)
+        await handle.signal("truncate", 3)
+        await asyncio.sleep(0.3)
+
+        # Poll from offset 0 — should get items starting from base_offset (3)
+        items = await collect_items(handle, None, 0, 2)
+        assert len(items) == 2
+        assert items[0].offset == 3
+        assert items[1].offset == 4
+
+        await handle.signal("close")
+
+
+@pytest.mark.asyncio
+async def test_subscribe_recovers_from_truncation(client: Client) -> None:
+    """subscribe() auto-recovers when offset falls behind truncation."""
+    async with new_worker(
+        client,
+        TruncateSignalWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            TruncateSignalWorkflow.run,
+            id=f"pubsub-trunc-recover-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish 5 items
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(items=[
+                PublishEntry(topic="events", data=encode_data(f"item-{i}".encode()))
+                for i in range(5)
+            ]),
+        )
+        await asyncio.sleep(0.5)
+
+        # Truncate first 3
+        await handle.signal("truncate", 3)
+        await asyncio.sleep(0.3)
+
+        # subscribe from offset 1 (truncated) — should auto-recover
+        # and deliver items from base_offset (3)
+        pubsub = PubSubClient(handle)
+        items: list[PubSubItem] = []
+        try:
+            async with asyncio.timeout(5):
+                async for item in pubsub.subscribe(
+                    from_offset=1, poll_cooldown=0
+                ):
+                    items.append(item)
+                    if len(items) >= 2:
+                        break
+        except asyncio.TimeoutError:
+            pass
+        assert len(items) == 2
+        assert items[0].offset == 3
+
+        await handle.signal("close")
+
+
+@pytest.mark.asyncio
+async def test_workflow_and_activity_publish_interleaved(client: Client) -> None:
+    """Workflow publishes status events around activity publishing."""
+    count = 5
+    async with new_worker(
+        client,
+        InterleavedWorkflow,
+        activities=[publish_items],
+    ) as worker:
+        handle = await client.start_workflow(
+            InterleavedWorkflow.run,
+            count,
+            id=f"pubsub-interleave-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Total: 1 (started) + count (activity) + 1 (done) = count + 2
+        items = await collect_items(handle, None, 0, count + 2)
+        assert len(items) == count + 2
+
+        # First item is workflow-side "started"
+        assert items[0].topic == "status"
+        assert items[0].data == b"started"
+
+        # Middle items are from activity
+        for i in range(count):
+            assert items[i + 1].topic == "events"
+            assert items[i + 1].data == f"item-{i}".encode()
+
+        # Last item is workflow-side "done"
+        assert items[count + 1].topic == "status"
+        assert items[count + 1].data == b"done"
+
+        await handle.signal(InterleavedWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_priority_flush(client: Client) -> None:
+    """Priority publish triggers immediate flush without waiting for timer."""
+    async with new_worker(
+        client,
+        PriorityWorkflow,
+        activities=[publish_with_priority],
+    ) as worker:
+        handle = await client.start_workflow(
+            PriorityWorkflow.run,
+            id=f"pubsub-priority-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # If priority works, we get all 3 items quickly despite 60s batch interval
+        items = await collect_items(handle, None, 0, 3, timeout=10.0)
+        assert len(items) == 3
+        assert items[2].data == b"priority"
+
+        await handle.signal(PriorityWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_iterator_cancellation(client: Client) -> None:
+    """Cancelling a subscription iterator completes cleanly."""
+    async with new_worker(
+        client,
+        BasicPubSubWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            BasicPubSubWorkflow.run,
+            id=f"pubsub-cancel-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        pubsub_client = PubSubClient(handle)
+
+        async def subscribe_and_collect():
+            items = []
+            async for item in pubsub_client.subscribe(
+                from_offset=0, poll_cooldown=0
+            ):
+                items.append(item)
+            return items
+
+        task = asyncio.create_task(subscribe_and_collect())
+        await asyncio.sleep(0.5)
+        task.cancel()
+        try:
+            await task
+        except asyncio.CancelledError:
+            pass
+
+        await handle.signal(BasicPubSubWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_context_manager_flushes_on_exit(client: Client) -> None:
+    """Context manager exit flushes all buffered items."""
+    count = 5
+    async with new_worker(
+        client,
+        FlushOnExitWorkflow,
+        activities=[publish_batch_test],
+    ) as worker:
+        handle = await client.start_workflow(
+            FlushOnExitWorkflow.run,
+            count,
+            id=f"pubsub-flush-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Despite 60s batch interval, all items arrive because __aexit__ flushes
+        items = await collect_items(handle, None, 0, count, timeout=15.0)
+        assert len(items) == count
+        for i in range(count):
+            assert items[i].data == f"item-{i}".encode()
+
+        await handle.signal(FlushOnExitWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_concurrent_subscribers(client: Client) -> None:
+    """Two subscribers on different topics receive correct items concurrently."""
+    count = 6  # 2 per topic
+    async with new_worker(
+        client,
+        MultiTopicWorkflow,
+        activities=[publish_multi_topic],
+    ) as worker:
+        handle = await client.start_workflow(
+            MultiTopicWorkflow.run,
+            count,
+            id=f"pubsub-concurrent-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        a_task = asyncio.create_task(collect_items(handle, ["a"], 0, 2))
+        b_task = asyncio.create_task(collect_items(handle, ["b"], 0, 2))
+
+        a_items, b_items = await asyncio.gather(a_task, b_task)
+
+        assert len(a_items) == 2
+        assert all(item.topic == "a" for item in a_items)
+        assert len(b_items) == 2
+        assert all(item.topic == "b" for item in b_items)
+
+        await handle.signal(MultiTopicWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_mixin_coexistence(client: Client) -> None:
+    """PubSubMixin works alongside application signals and queries."""
+    async with new_worker(
+        client,
+        MixinCoexistenceWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            MixinCoexistenceWorkflow.run,
+            id=f"pubsub-coexist-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Use application signal
+        await handle.signal(MixinCoexistenceWorkflow.app_signal, "hello")
+        await handle.signal(MixinCoexistenceWorkflow.app_signal, "world")
+
+        # Use pub/sub signal
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(items=[PublishEntry(topic="events", data=encode_data(b"test-item"))]),
+        )
+
+        # Give signals time to be processed
+        await asyncio.sleep(0.5)
+
+        # Query application state
+        app_data = await handle.query(MixinCoexistenceWorkflow.app_query)
+        assert app_data == ["hello", "world"]
+
+        # Query pub/sub offset
+        pubsub_client = PubSubClient(handle)
+        offset = await pubsub_client.get_offset()
+        assert offset == 1
+
+        # Subscribe to pub/sub
+        items = await collect_items(handle, None, 0, 1)
+        assert len(items) == 1
+        assert items[0].topic == "events"
+
+        await handle.signal(MixinCoexistenceWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_max_batch_size(client: Client) -> None:
+    """max_batch_size triggers auto-flush without waiting for timer."""
+    count = 7  # with max_batch_size=3: flushes at 3, 6, then remainder 1 on exit
+    async with new_worker(
+        client,
+        MaxBatchWorkflow,
+        activities=[publish_with_max_batch],
+        max_cached_workflows=0,
+    ) as worker:
+        handle = await client.start_workflow(
+            MaxBatchWorkflow.run,
+            count,
+            id=f"pubsub-maxbatch-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+        # count items from activity + 1 "activity_done" from workflow
+        items = await collect_items(handle, None, 0, count + 1, timeout=15.0)
+        assert len(items) == count + 1
+        for i in range(count):
+            assert items[i].data == f"item-{i}".encode()
+        await handle.signal(MaxBatchWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_replay_safety(client: Client) -> None:
+    """Pub/sub mixin survives workflow replay (max_cached_workflows=0)."""
+    async with new_worker(
+        client,
+        InterleavedWorkflow,
+        activities=[publish_items],
+        max_cached_workflows=0,
+    ) as worker:
+        handle = await client.start_workflow(
+            InterleavedWorkflow.run,
+            5,
+            id=f"pubsub-replay-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+        # 1 (started) + 5 (activity) + 1 (done) = 7
+        items = await collect_items(handle, None, 0, 7)
+        assert len(items) == 7
+        assert items[0].data == b"started"
+        assert items[6].data == b"done"
+        await handle.signal(InterleavedWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_flush_keeps_pending_on_signal_failure(client: Client) -> None:
+    """If flush signal fails, items stay in _pending for retry with same sequence.
+
+    This matches the TLA+-verified algorithm (PubSubDedup.tla): on failure,
+    the pending batch and sequence are kept so the next _flush() retries with
+    the SAME sequence number. The confirmed sequence (_sequence) does NOT
+    advance until delivery is confirmed.
+    """
+    bogus_handle = client.get_workflow_handle("nonexistent-workflow-id")
+    pubsub = PubSubClient(bogus_handle)
+
+    pubsub.publish("events", b"item-0")
+    pubsub.publish("events", b"item-1")
+    assert len(pubsub._buffer) == 2
+
+    # flush should fail (workflow doesn't exist)
+    with pytest.raises(Exception):
+        await pubsub._flush()
+
+    # Items moved to _pending (not restored to _buffer)
+    assert len(pubsub._buffer) == 0
+    assert pubsub._pending is not None
+    assert len(pubsub._pending) == 2
+    assert pubsub._pending[0].data == encode_data(b"item-0")
+    assert pubsub._pending[1].data == encode_data(b"item-1")
+    # Pending sequence is set, confirmed sequence is NOT advanced
+    assert pubsub._pending_seq == 1
+    assert pubsub._sequence == 0
+
+    # New items published during failure go to _buffer (not _pending)
+    pubsub.publish("events", b"item-2")
+    assert len(pubsub._buffer) == 1
+    assert pubsub._pending is not None  # Still set for retry
+
+    # Next flush retries the pending batch with the same sequence
+    with pytest.raises(Exception):
+        await pubsub._flush()
+    assert pubsub._pending_seq == 1  # Same sequence on retry
+    assert pubsub._sequence == 0  # Still not advanced
+
+
+@pytest.mark.asyncio
+async def test_max_retry_duration_expiry(client: Client) -> None:
+    """Flush raises TimeoutError when max_retry_duration is exceeded."""
+    bogus_handle = client.get_workflow_handle("nonexistent-workflow-id")
+    pubsub = PubSubClient(bogus_handle, max_retry_duration=0.1)
+
+    pubsub.publish("events", b"item-0")
+
+    # First flush fails, sets pending
+    with pytest.raises(Exception, match="not found"):
+        await pubsub._flush()
+    assert pubsub._pending is not None
+
+    # Wait for retry duration to expire
+    await asyncio.sleep(0.2)
+
+    # Next flush should raise TimeoutError and clear pending
+    with pytest.raises(TimeoutError, match="max_retry_duration"):
+        await pubsub._flush()
+    assert pubsub._pending is None
+    assert pubsub._sequence == 0
+
+
+@pytest.mark.asyncio
+async def test_dedup_rejects_duplicate_signal(client: Client) -> None:
+    """Workflow deduplicates signals with the same publisher_id + sequence."""
+    async with new_worker(
+        client,
+        BasicPubSubWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            BasicPubSubWorkflow.run,
+            id=f"pubsub-dedup-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Send a batch with publisher_id and sequence
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[PublishEntry(topic="events", data=encode_data(b"item-0"))],
+                publisher_id="test-pub",
+                sequence=1,
+            ),
+        )
+
+        # Send the same sequence again — should be deduped
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[PublishEntry(topic="events", data=encode_data(b"duplicate"))],
+                publisher_id="test-pub",
+                sequence=1,
+            ),
+        )
+
+        # Send a new sequence — should go through
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[PublishEntry(topic="events", data=encode_data(b"item-1"))],
+                publisher_id="test-pub",
+                sequence=2,
+            ),
+        )
+
+        await asyncio.sleep(0.5)
+
+        # Should have 2 items, not 3
+        items = await collect_items(handle, None, 0, 2)
+        assert len(items) == 2
+        assert items[0].data == b"item-0"
+        assert items[1].data == b"item-1"
+
+        # Verify offset is 2 (not 3)
+        pubsub_client = PubSubClient(handle)
+        offset = await pubsub_client.get_offset()
+        assert offset == 2
+
+        await handle.signal(BasicPubSubWorkflow.close)
+
+
+@pytest.mark.asyncio
+async def test_truncate_pubsub(client: Client) -> None:
+    """truncate_pubsub discards prefix and adjusts base_offset."""
+    async with new_worker(
+        client,
+        TruncateSignalWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            TruncateSignalWorkflow.run,
+            id=f"pubsub-truncate-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish 5 items via signal
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(items=[
+                PublishEntry(topic="events", data=encode_data(f"item-{i}".encode()))
+                for i in range(5)
+            ]),
+        )
+        await asyncio.sleep(0.5)
+
+        # Verify all 5 items
+        items = await collect_items(handle, None, 0, 5)
+        assert len(items) == 5
+
+        # Truncate up to offset 3 (discard items 0, 1, 2)
+        await handle.signal("truncate", 3)
+        await asyncio.sleep(0.3)
+
+        # Offset should still be 5
+        pubsub_client = PubSubClient(handle)
+        offset = await pubsub_client.get_offset()
+        assert offset == 5
+
+        # Reading from offset 3 should work (items 3, 4)
+        items_after = await collect_items(handle, None, 3, 2)
+        assert len(items_after) == 2
+        assert items_after[0].data == b"item-3"
+        assert items_after[1].data == b"item-4"
+
+        await handle.signal("close")
+
+
+@pytest.mark.asyncio
+async def test_ttl_pruning_in_get_pubsub_state(client: Client) -> None:
+    """get_pubsub_state prunes stale publisher entries based on TTL."""
+    async with new_worker(
+        client,
+        TTLTestWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            TTLTestWorkflow.run,
+            id=f"pubsub-ttl-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish from two different publishers
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[PublishEntry(topic="events", data=encode_data(b"from-a"))],
+                publisher_id="pub-a",
+                sequence=1,
+            ),
+        )
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(
+                items=[PublishEntry(topic="events", data=encode_data(b"from-b"))],
+                publisher_id="pub-b",
+                sequence=1,
+            ),
+        )
+        await asyncio.sleep(0.5)
+
+        # Query state with a very long TTL — both publishers retained
+        state = await handle.query(TTLTestWorkflow.get_state_with_ttl, 9999.0)
+        assert "pub-a" in state.publisher_sequences
+        assert "pub-b" in state.publisher_sequences
+
+        # Query state with TTL=0 — both publishers pruned
+        state_pruned = await handle.query(TTLTestWorkflow.get_state_with_ttl, 0.0)
+        assert "pub-a" not in state_pruned.publisher_sequences
+        assert "pub-b" not in state_pruned.publisher_sequences
+
+        # Items are still in the log regardless of pruning
+        assert len(state_pruned.log) == 2
+
+        await handle.signal("close")
+
+
+# ---------------------------------------------------------------------------
+# Truncate and TTL test workflows
+# ---------------------------------------------------------------------------
+
+
+@workflow.defn
+class TruncateSignalWorkflow(PubSubMixin):
+    """Workflow that accepts a truncate signal for testing."""
+
+    @workflow.init
+    def __init__(self) -> None:
+        self.init_pubsub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.signal
+    def truncate(self, up_to_offset: int) -> None:
+        self.truncate_pubsub(up_to_offset)
+
+    @workflow.run
+    async def run(self) -> None:
+        await workflow.wait_condition(lambda: self._closed)
+
+
+@workflow.defn
+class TTLTestWorkflow(PubSubMixin):
+    """Workflow that exposes get_pubsub_state via query for TTL testing."""
+
+    @workflow.init
+    def __init__(self) -> None:
+        self.init_pubsub()
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.query
+    def get_state_with_ttl(self, ttl: float) -> PubSubState:
+        return self.get_pubsub_state(publisher_ttl=ttl)
+
+    @workflow.run
+    async def run(self) -> None:
+        await workflow.wait_condition(lambda: self._closed)
+
+
+# ---------------------------------------------------------------------------
+# Continue-as-new workflow and test
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class CANWorkflowInputAny:
+    """Uses Any typing — reproduces the pitfall."""
+    pubsub_state: Any = None
+
+
+@dataclass
+class CANWorkflowInputTyped:
+    """Uses proper typing."""
+    pubsub_state: PubSubState | None = None
+
+
+@workflow.defn
+class ContinueAsNewAnyWorkflow(PubSubMixin):
+    """CAN workflow using Any-typed pubsub_state (reproduces samples pattern)."""
+
+    @workflow.init
+    def __init__(self, input: CANWorkflowInputAny) -> None:
+        self.init_pubsub(prior_state=input.pubsub_state)
+        self._should_continue = False
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.signal
+    def trigger_continue(self) -> None:
+        self._should_continue = True
+
+    @workflow.run
+    async def run(self, input: CANWorkflowInputAny) -> None:
+        while True:
+            await workflow.wait_condition(
+                lambda: self._should_continue or self._closed
+            )
+            if self._closed:
+                return
+            if self._should_continue:
+                self._should_continue = False
+                self.drain_pubsub()
+                await workflow.wait_condition(workflow.all_handlers_finished)
+                workflow.continue_as_new(args=[CANWorkflowInputAny(
+                    pubsub_state=self.get_pubsub_state(),
+                )])
+
+
+@workflow.defn
+class ContinueAsNewTypedWorkflow(PubSubMixin):
+    """CAN workflow using properly-typed pubsub_state."""
+
+    @workflow.init
+    def __init__(self, input: CANWorkflowInputTyped) -> None:
+        self.init_pubsub(prior_state=input.pubsub_state)
+        self._should_continue = False
+        self._closed = False
+
+    @workflow.signal
+    def close(self) -> None:
+        self._closed = True
+
+    @workflow.signal
+    def trigger_continue(self) -> None:
+        self._should_continue = True
+
+    @workflow.run
+    async def run(self, input: CANWorkflowInputTyped) -> None:
+        while True:
+            await workflow.wait_condition(
+                lambda: self._should_continue or self._closed
+            )
+            if self._closed:
+                return
+            if self._should_continue:
+                self._should_continue = False
+                self.drain_pubsub()
+                await workflow.wait_condition(workflow.all_handlers_finished)
+                workflow.continue_as_new(args=[CANWorkflowInputTyped(
+                    pubsub_state=self.get_pubsub_state(),
+                )])
+
+
+async def _run_can_test(can_client: Client, workflow_cls, input_cls) -> None:
+    """Shared CAN test logic: publish, CAN, verify items survive."""
+    async with new_worker(
+        can_client,
+        workflow_cls,
+    ) as worker:
+        handle = await can_client.start_workflow(
+            workflow_cls.run,
+            input_cls(),
+            id=f"pubsub-can-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        # Publish 3 items via signal
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(items=[
+                PublishEntry(topic="events", data=encode_data(b"item-0")),
+                PublishEntry(topic="events", data=encode_data(b"item-1")),
+                PublishEntry(topic="events", data=encode_data(b"item-2")),
+            ]),
+        )
+
+        # Verify items are there
+        items_before = await collect_items(handle, None, 0, 3)
+        assert len(items_before) == 3
+
+        # Trigger continue-as-new
+        await handle.signal(workflow_cls.trigger_continue)
+
+        # Wait for new run to start (poll, don't sleep)
+        new_handle = can_client.get_workflow_handle(handle.id)
+        await assert_eq_eventually(
+            True,
+            lambda: _is_different_run(handle, new_handle),
+        )
+
+        # The 3 items from before CAN should still be readable
+        items_after = await collect_items(new_handle, None, 0, 3)
+        assert len(items_after) == 3
+        assert items_after[0].data == b"item-0"
+        assert items_after[1].data == b"item-1"
+        assert items_after[2].data == b"item-2"
+
+        # New items should get offset 3+
+        await new_handle.signal(
+            "__pubsub_publish",
+            PublishInput(items=[PublishEntry(topic="events", data=encode_data(b"item-3"))]),
+        )
+        items_all = await collect_items(new_handle, None, 0, 4)
+        assert len(items_all) == 4
+        assert items_all[3].data == b"item-3"
+
+        await new_handle.signal(workflow_cls.close)
+
+
+@pytest.mark.asyncio
+async def test_continue_as_new_any_typed_fails(client: Client) -> None:
+    """Any-typed pubsub_state does NOT survive CAN — documents the pitfall.
+
+    The default data converter deserializes Any fields as plain dicts, losing
+    the PubSubState type. Use ``PubSubState | None`` instead.
+    """
+    async with new_worker(
+        client,
+        ContinueAsNewAnyWorkflow,
+    ) as worker:
+        handle = await client.start_workflow(
+            ContinueAsNewAnyWorkflow.run,
+            CANWorkflowInputAny(),
+            id=f"pubsub-can-any-{uuid.uuid4()}",
+            task_queue=worker.task_queue,
+        )
+
+        await handle.signal(
+            "__pubsub_publish",
+            PublishInput(items=[PublishEntry(topic="events", data=encode_data(b"item-0"))]),
+        )
+        items = await collect_items(handle, None, 0, 1)
+        assert len(items) == 1
+
+        # Trigger CAN — the new run will fail to deserialize pubsub_state
+        await handle.signal(ContinueAsNewAnyWorkflow.trigger_continue)
+
+        # Wait for CAN to happen
+        new_handle = client.get_workflow_handle(handle.id)
+        await assert_eq_eventually(
+            True,
+            lambda: _is_different_run(handle, new_handle),
+        )
+
+        # The new run should be broken — items are NOT accessible
+        items_after = await collect_items(new_handle, None, 0, 1, timeout=3.0)
+        assert len(items_after) == 0  # fails because workflow can't start
+
+
+@pytest.mark.asyncio
+async def test_continue_as_new_properly_typed(client: Client) -> None:
+    """CAN with PubSubState-typed pubsub_state field."""
+    await _run_can_test(client, ContinueAsNewTypedWorkflow, CANWorkflowInputTyped)
diff --git a/uv.lock b/uv.lock
index c63faefad..df900573a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -8,6 +8,10 @@ resolution-markers = [
     "python_full_version < '3.11'",
 ]
 
+[options]
+exclude-newer = "2026-03-30T03:37:56.787253Z"
+exclude-newer-span = "P7D"
+
 [[package]]
 name = "aioboto3"
 version = "15.5.0"
@@ -1768,7 +1772,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" },
     { url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" },
     { url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" },
-    { url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" },
     { url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" },
     { url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" },
     { url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" },
@@ -1776,7 +1779,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" },
     { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" },
     { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/8b/1430a04657735a3f23116c2e0d5eb10220928846e4537a938a41b350bed6/greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2", size = 605046, upload-time = "2026-02-20T21:02:45.234Z" },
     { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" },
     { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" },
     { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" },
@@ -1785,7 +1787,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" },
     { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" },
     { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" },
     { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" },
     { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" },
     { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" },
@@ -1794,7 +1795,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" },
     { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" },
     { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" },
-    { url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" },
     { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" },
     { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" },
     { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" },
@@ -1803,7 +1803,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" },
     { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" },
     { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" },
     { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" },
     { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" },
     { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" },
@@ -1812,7 +1811,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" },
     { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" },
     { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" },
     { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" },
     { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" },
     { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" },
@@ -4857,7 +4855,7 @@ requires-dist = [
     { name = "pydantic", marker = "extra == 'pydantic'", specifier = ">=2.0.0,<3" },
     { name = "python-dateutil", marker = "python_full_version < '3.11'", specifier = ">=2.8.2,<3" },
     { name = "types-aioboto3", extras = ["s3"], marker = "extra == 'aioboto3'", specifier = ">=10.4.0" },
-    { name = "types-protobuf", specifier = ">=3.20" },
+    { name = "types-protobuf", specifier = ">=3.20,<7.0.0" },
     { name = "typing-extensions", specifier = ">=4.2.0,<5" },
 ]
 provides-extras = ["grpc", "opentelemetry", "pydantic", "openai-agents", "google-adk", "aioboto3"]